diff --git a/changehc/delphi_changehc/backfill.py b/changehc/delphi_changehc/backfill.py index 8b9ea8491..7338c4c40 100644 --- a/changehc/delphi_changehc/backfill.py +++ b/changehc/delphi_changehc/backfill.py @@ -46,6 +46,18 @@ def store_backfill_file(df, _end_date, backfill_dir, numtype, geo, weekday): 'num', 'den'] backfilldata = backfilldata.loc[backfilldata["time_value"] >= _start_date, selected_columns] + + backfilldata["lag"] = [(_end_date - x).days for x in backfilldata["time_value"]] + backfilldata["time_value"] = backfilldata.time_value.dt.strftime("%Y-%m-%d") + backfilldata["issue_date"] = datetime.strftime(_end_date, "%Y-%m-%d") + + backfilldata = backfilldata.astype({ + "time_value": "string", + "issue_date": "string", + "fips": "string", + "state_id": "string" + }) + path = backfill_dir + \ "/changehc_%s_as_of_%s.parquet"%(numtype, datetime.strftime(_end_date, "%Y%m%d")) # Store intermediate file into the backfill folder @@ -109,9 +121,6 @@ def get_date(file_link): pdList = [] for fn in new_files: df = pd.read_parquet(fn, engine='pyarrow') - issue_date = get_date(fn) - df["issue_date"] = issue_date - df["lag"] = [(issue_date - x).days for x in df["time_value"]] pdList.append(df) merged_file = pd.concat(pdList).sort_values(["time_value", "fips"]) path = backfill_dir + "/changehc_%s_from_%s_to_%s.parquet"%( diff --git a/changehc/tests/test_backfill.py b/changehc/tests/test_backfill.py index 58e76b9db..c7c8796d4 100644 --- a/changehc/tests/test_backfill.py +++ b/changehc/tests/test_backfill.py @@ -40,7 +40,7 @@ class TestBackfill: def test_store_backfill_file(self): - + fn = "changehc_covid_as_of_20200101.parquet" dropdate = datetime(2020, 1, 1) numtype = "covid" @@ -69,7 +69,7 @@ def test_store_backfill_file(self): backfill_df = pd.read_parquet(backfill_dir + "/"+ fn, engine='pyarrow') selected_columns = ['time_value', 'fips', 'state_id', - 'num', 'den'] + 'num', 'den', 'lag', 'issue_date'] assert set(selected_columns) == set(backfill_df.columns) os.remove(backfill_dir + "/" + fn) @@ -114,9 +114,6 @@ def test_merge_backfill_file(self): if "from" in file: continue df = pd.read_parquet(file, engine='pyarrow') - issue_date = datetime.strptime(file[-16:-8], "%Y%m%d") - df["issue_date"] = issue_date - df["lag"] = [(issue_date - x).days for x in df["time_value"]] pdList.append(df) os.remove(file) new_files = glob.glob(backfill_dir + "/changehc_%s*.parquet"%numtype) diff --git a/quidel_covidtest/delphi_quidel_covidtest/backfill.py b/quidel_covidtest/delphi_quidel_covidtest/backfill.py index 7e8482551..1c83622ea 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/backfill.py +++ b/quidel_covidtest/delphi_quidel_covidtest/backfill.py @@ -56,6 +56,17 @@ def store_backfill_file(df, _end_date, backfill_dir): 'num_age_0_17', 'den_age_0_17'] backfilldata = backfilldata.loc[backfilldata["time_value"] >= _start_date, selected_columns] + backfilldata["lag"] = [(_end_date - x).days for x in backfilldata["time_value"]] + backfilldata["time_value"] = backfilldata.time_value.dt.strftime("%Y-%m-%d") + backfilldata["issue_date"] = datetime.strftime(_end_date, "%Y-%m-%d") + + backfilldata = backfilldata.astype({ + "time_value": "string", + "issue_date": "string", + "fips": "string", + "state_id": "string" + }) + path = backfill_dir + \ "/quidel_covidtest_as_of_%s.parquet"%datetime.strftime(_end_date, "%Y%m%d") # Store intermediate file into the backfill folder @@ -108,9 +119,6 @@ def get_date(file_link): pdList = [] for fn in new_files: df = pd.read_parquet(fn, engine='pyarrow') - issue_date = get_date(fn) - df["issue_date"] = issue_date - df["lag"] = [(issue_date - x).days for x in df["time_value"]] pdList.append(df) merged_file = pd.concat(pdList).sort_values(["time_value", "fips"]) path = backfill_dir + "/quidel_covidtest_from_%s_to_%s.parquet"%( diff --git a/quidel_covidtest/tests/test_backfill.py b/quidel_covidtest/tests/test_backfill.py index 7a033fb47..27e0d01bc 100644 --- a/quidel_covidtest/tests/test_backfill.py +++ b/quidel_covidtest/tests/test_backfill.py @@ -49,7 +49,8 @@ def test_store_backfill_file(self): 'num_age_18_49', 'den_age_18_49', 'num_age_50_64', 'den_age_50_64', 'num_age_65plus', 'den_age_65plus', - 'num_age_0_17', 'den_age_0_17'] + 'num_age_0_17', 'den_age_0_17', + 'lag', 'issue_date'] assert set(selected_columns) == set(backfill_df.columns) os.remove(backfill_dir + "/" + fn) @@ -86,9 +87,6 @@ def test_merge_backfill_file(self): if "from" in file: continue df = pd.read_parquet(file, engine='pyarrow') - issue_date = datetime.strptime(file[-16:-8], "%Y%m%d") - df["issue_date"] = issue_date - df["lag"] = [(issue_date - x).days for x in df["time_value"]] pdList.append(df) os.remove(file) new_files = glob.glob(backfill_dir + "/quidel_covidtest*.parquet")