Skip to content

Code improvements to time processing routines #1010

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Nov 9, 2022
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ scipy==1.6.2
tenacity==7.0.0
newrelic
epiweeks==2.1.2
typing-extensions
47 changes: 34 additions & 13 deletions src/server/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


from ._exceptions import ValidationFailedException
from .utils import days_in_range, weeks_in_range, guess_time_value_is_day
from .utils import days_in_range, weeks_in_range, guess_time_value_is_day, guess_time_value_is_week, TimeValues, time_values_to_ranges


def _parse_common_multi_arg(key: str) -> List[Tuple[str, Union[bool, Sequence[str]]]]:
Expand Down Expand Up @@ -108,7 +108,7 @@ def parse_single_source_signal_arg(key: str) -> SourceSignalPair:
@dataclass
class TimePair:
time_type: str
time_values: Union[bool, Sequence[Union[int, Tuple[int, int]]]]
time_values: Union[bool, TimeValues]

@property
def is_week(self) -> bool:
Expand Down Expand Up @@ -205,8 +205,28 @@ def _parse_time_pair(time_type: str, time_values: Union[bool, Sequence[str]]) ->
raise ValidationFailedException(f'time param: {time_type} is not one of "day" or "week"')


def parse_time_arg(key: str = "time") -> List[TimePair]:
return [_parse_time_pair(time_type, time_values) for [time_type, time_values] in _parse_common_multi_arg(key)]
def parse_time_arg(key: str = "time") -> Optional[TimePair]:
time_pairs = [_parse_time_pair(time_type, time_values) for [time_type, time_values] in _parse_common_multi_arg(key)]

# single value
if len(time_pairs) == 0:
return None
if len(time_pairs) == 1:
return time_pairs[0]

# make sure 'day' and 'week' aren't mixed
time_types = set(time_pair.time_type for time_pair in time_pairs)
if len(time_types) >= 2:
raise ValidationFailedException(f'{key}: {time_pairs} mixes "day" and "week" time types')

# merge all time pairs into one
merged = []
for time_pair in time_pairs:
if time_pair.time_values == True:
return time_pair
else:
merged.extend(time_pair.time_values)
return TimePair(time_pairs[0].time_type, time_values_to_ranges(merged))


def parse_single_time_arg(key: str) -> TimePair:
Expand Down Expand Up @@ -255,25 +275,26 @@ def parse_week_range_arg(key: str) -> Tuple[int, int]:
raise ValidationFailedException(f"{key} must match YYYYWW-YYYYWW")
return r

def parse_day_or_week_arg(key: str, default_value: Optional[int] = None) -> Tuple[int, bool]:
def parse_day_or_week_arg(key: str, default_value: Optional[int] = None) -> TimePair:
v = request.values.get(key)
if not v:
if default_value is not None:
return default_value, guess_time_value_is_day(default_value)
time_type = "day" if guess_time_value_is_day(default_value) else "week"
return TimePair(time_type, [default_value])
raise ValidationFailedException(f"{key} param is required")
# format is either YYYY-MM-DD or YYYYMMDD or YYYYMM
is_week = len(v) == 6
is_week = guess_time_value_is_week(v)
if is_week:
return parse_week_arg(key), False
return parse_day_arg(key), True
return TimePair("week", [parse_week_arg(key)])
return TimePair("day", [parse_day_arg(key)])

def parse_day_or_week_range_arg(key: str) -> Tuple[Tuple[int, int], bool]:
def parse_day_or_week_range_arg(key: str) -> TimePair:
v = request.values.get(key)
if not v:
raise ValidationFailedException(f"{key} param is required")
# format is either YYYY-MM-DD--YYYY-MM-DD or YYYYMMDD-YYYYMMDD or YYYYMM-YYYYMM
# so if the first before the - has length 6, it must be a week
is_week = len(v.split('-', 2)[0]) == 6
is_week = guess_time_value_is_week(v.split('-', 2)[0])
if is_week:
return parse_week_range_arg(key), False
return parse_day_range_arg(key), True
return TimePair("week", [parse_week_range_arg(key)])
return TimePair("day", [parse_day_range_arg(key)])
58 changes: 22 additions & 36 deletions src/server/_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from ._db import metadata
from ._printer import create_printer, APrinter
from ._exceptions import DatabaseErrorException
from ._validate import DateRange, extract_strings
from ._validate import extract_strings
from ._params import GeoPair, SourceSignalPair, TimePair
from .utils import time_values_to_ranges, days_to_ranges, weeks_to_ranges
from .utils import time_values_to_ranges, days_to_ranges, weeks_to_ranges, TimeValues


def date_string(value: int) -> str:
Expand All @@ -36,7 +36,7 @@ def date_string(value: int) -> str:

def to_condition(
field: str,
value: Union[Tuple[str, str], str, Tuple[int, int], int],
value: Union[str, Tuple[int, int], int],
param_key: str,
params: Dict[str, Any],
formatter=lambda x: x,
Expand All @@ -52,7 +52,7 @@ def to_condition(

def filter_values(
field: str,
values: Optional[Sequence[Union[Tuple[str, str], str, Tuple[int, int], int]]],
values: Optional[Sequence[Union[str, Tuple[int, int], int]]],
param_key: str,
params: Dict[str, Any],
formatter=lambda x: x,
Expand All @@ -68,7 +68,7 @@ def filter_values(

def filter_strings(
field: str,
values: Optional[Sequence[Union[Tuple[str, str], str]]],
values: Optional[Sequence[str]],
param_key: str,
params: Dict[str, Any],
):
Expand All @@ -86,7 +86,7 @@ def filter_integers(

def filter_dates(
field: str,
values: Optional[Sequence[Union[Tuple[int, int], int]]],
values: Optional[TimeValues],
param_key: str,
params: Dict[str, Any],
):
Expand Down Expand Up @@ -171,32 +171,28 @@ def filter_pair(pair: SourceSignalPair, i) -> str:
return f"({' OR '.join(parts)})"


def filter_time_pairs(
def filter_time_pair(
type_field: str,
time_field: str,
values: Sequence[TimePair],
pair: Optional[TimePair],
param_key: str,
params: Dict[str, Any],
) -> str:
"""
returns the SQL sub query to filter by the given time pairs
returns the SQL sub query to filter by the given time pair
"""
if not pair:
return "FALSE"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: is this a good code path to have? AFAICT there's an exception raised in covidcast.py:parse_time_pairs if you don't have either: a) time=... or b) time_type=... and time_values=.... So this will technically never happen in the API. So:

  • Pro: function safety, can be reused in the future in other places that didn't have the same validation
  • Con: confusing now, because it makes you think this is a possible code path

not super blocking, i can go either way, but curious what everyone thinks here, cc @melange396 @krivard

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like you've mentioned, I'd rather have the extra failsafe for function safety so I kept this for now (but it should be trivial to remove if needed)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im not actually sure what would be most correct here... it could arguably even be reasonable to return "TRUE" -- if we are not filtering on any time, then all time values are valid.

since it is ambiguous, i think its good to leave that condition there but but also add a comment to the effect of "time values are required by the API, so this case should never be reached, but this check further ensures the desired behavior"


def filter_pair(pair: TimePair, i) -> str:
type_param = f"{param_key}_{i}t"
params[type_param] = pair.time_type
if isinstance(pair.time_values, bool) and pair.time_values:
return f"{type_field} = :{type_param}"
type_param = f"{param_key}_0t"
params[type_param] = pair.time_type
if isinstance(pair.time_values, bool) and pair.time_values:
parts = f"{type_field} = :{type_param}"
else:
ranges = weeks_to_ranges(pair.time_values) if pair.is_week else days_to_ranges(pair.time_values)
return f"({type_field} = :{type_param} AND {filter_integers(time_field, cast(Sequence[Union[int, Tuple[int,int]]], ranges), type_param, params)})"

parts = [filter_pair(p, i) for i, p in enumerate(values)]

if not parts:
# something has to be selected
return "FALSE"
parts = f"({type_field} = :{type_param} AND {filter_integers(time_field, ranges, type_param, params)})"

return f"({' OR '.join(parts)})"
return f"({parts})"


def parse_row(
Expand Down Expand Up @@ -393,7 +389,7 @@ def where(self, **kvargs: Dict[str, Any]) -> "QueryBuilder":
def where_strings(
self,
field: str,
values: Optional[Sequence[Union[Tuple[str, str], str]]],
values: Optional[Sequence[str]],
param_key: Optional[str] = None,
) -> "QueryBuilder":
fq_field = f"{self.alias}.{field}" if "." not in field else field
Expand All @@ -413,16 +409,6 @@ def where_integers(
self.conditions.append(filter_integers(fq_field, values, param_key or field, self.params))
return self

def where_dates(
self,
field: str,
values: Optional[Sequence[Union[Tuple[int, int], int]]],
param_key: Optional[str] = None,
) -> "QueryBuilder":
fq_field = self._fq_field(field)
self.conditions.append(filter_dates(fq_field, values, param_key or field, self.params))
return self

def where_geo_pairs(
self,
type_field: str,
Expand Down Expand Up @@ -463,17 +449,17 @@ def where_source_signal_pairs(
)
return self

def where_time_pairs(
def where_time_pair(
self,
type_field: str,
value_field: str,
values: Sequence[TimePair],
values: Optional[TimePair],
param_key: Optional[str] = None,
) -> "QueryBuilder":
fq_type_field = self._fq_field(type_field)
fq_value_field = self._fq_field(value_field)
self.conditions.append(
filter_time_pairs(
filter_time_pair(
fq_type_field,
fq_value_field,
values,
Expand Down
8 changes: 3 additions & 5 deletions src/server/_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from flask import request

from ._exceptions import UnAuthenticatedException, ValidationFailedException
from .utils import TimeValues


def resolve_auth_token() -> Optional[str]:
Expand Down Expand Up @@ -141,14 +142,11 @@ def extract_date(key: Union[str, Sequence[str]]) -> Optional[int]:
return parse_date(s)


DateRange = Union[Tuple[int, int], int]


def extract_dates(key: Union[str, Sequence[str]]) -> Optional[List[DateRange]]:
def extract_dates(key: Union[str, Sequence[str]]) -> Optional[TimeValues]:
parts = extract_strings(key)
if not parts:
return None
values: List[Union[Tuple[int, int], int]] = []
values: TimeValues = []

def push_range(first: str, last: str):
first_d = parse_date(first)
Expand Down
Loading