From 7b0addbbca69aa9a2893cc339e16861a01a331a9 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 8 Dec 2022 14:29:59 -0800 Subject: [PATCH 1/2] JIT: Small name improvement in model.py --- src/server/endpoints/covidcast_utils/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covidcast_utils/model.py b/src/server/endpoints/covidcast_utils/model.py index 3d4cb0cb5..530560cf4 100644 --- a/src/server/endpoints/covidcast_utils/model.py +++ b/src/server/endpoints/covidcast_utils/model.py @@ -543,9 +543,9 @@ def _generate_transformed_rows( for key, source_signal_geo_rows in groupby(parsed_rows, group_keyfunc): base_source_name, base_signal_name, _, _ = key # Extract the list of derived signals; if a signal is not in the dictionary, then use the identity map. - derived_signal_transform_map: SourceSignalPair = transform_dict.get(SourceSignalPair(base_source_name, [base_signal_name]), SourceSignalPair(base_source_name, [base_signal_name])) + derived_signals: SourceSignalPair = transform_dict.get(SourceSignalPair(base_source_name, [base_signal_name]), SourceSignalPair(base_source_name, [base_signal_name])) # Create a list of source-signal pairs along with the transformation required for the signal. - signal_names_and_transforms: List[Tuple[Tuple[str, str], Callable]] = [(derived_signal, _get_base_signal_transform((base_source_name, derived_signal))) for derived_signal in derived_signal_transform_map.signal] + signal_names_and_transforms: List[Tuple[Tuple[str, str], Callable]] = [(derived_signal, _get_base_signal_transform((base_source_name, derived_signal))) for derived_signal in derived_signals.signal] # Put the current time series on a contiguous time index. source_signal_geo_rows = _reindex_iterable(source_signal_geo_rows, fill_value=transform_args.get("pad_fill_value")) # Create copies of the iterable, with smart memory usage. From e2a2585e3049036404e7d85463c32227a13bda12 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 8 Dec 2022 14:41:40 -0800 Subject: [PATCH 2/2] JIT: reduce the iterator stack on non-derived signals --- src/server/endpoints/covidcast_utils/model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/server/endpoints/covidcast_utils/model.py b/src/server/endpoints/covidcast_utils/model.py index 530560cf4..c98244b75 100644 --- a/src/server/endpoints/covidcast_utils/model.py +++ b/src/server/endpoints/covidcast_utils/model.py @@ -544,6 +544,10 @@ def _generate_transformed_rows( base_source_name, base_signal_name, _, _ = key # Extract the list of derived signals; if a signal is not in the dictionary, then use the identity map. derived_signals: SourceSignalPair = transform_dict.get(SourceSignalPair(base_source_name, [base_signal_name]), SourceSignalPair(base_source_name, [base_signal_name])) + # Speed up base signals by not transforming them. + if derived_signals.signal == [base_signal_name]: + yield from source_signal_geo_rows + continue # Create a list of source-signal pairs along with the transformation required for the signal. signal_names_and_transforms: List[Tuple[Tuple[str, str], Callable]] = [(derived_signal, _get_base_signal_transform((base_source_name, derived_signal))) for derived_signal in derived_signals.signal] # Put the current time series on a contiguous time index.