Skip to content

Commit 530b687

Browse files
committed
move lag and issue_date setting to daily files
1 parent 2f48fed commit 530b687

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

claims_hosp/delphi_claims_hosp/backfill.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ def store_backfill_file(claims_filepath, _end_date, backfill_dir):
5050
backfilldata = backfilldata.loc[(backfilldata["time_value"] >= _start_date)
5151
& (~backfilldata["fips"].isnull()),
5252
selected_columns]
53+
54+
backfilldata["lag"] = [(_end_date - x).days for x in backfilldata["time_value"]]
55+
backfilldata["time_value"] = backfilldata.time_value.dt.strftime("%Y-%m-%d")
56+
backfilldata["issue_date"] = datetime.strftime(_end_date, "%Y-%m-%d")
57+
58+
backfilldata = backfilldata.astype({
59+
"time_value": "string",
60+
"issue_date": "string",
61+
"fips": "string",
62+
"state_id": "string"
63+
})
64+
5365
path = backfill_dir + \
5466
"/claims_hosp_as_of_%s.parquet"%datetime.strftime(_end_date, "%Y%m%d")
5567
# Store intermediate file into the backfill folder
@@ -101,9 +113,6 @@ def get_date(file_link):
101113
pdList = []
102114
for fn in new_files:
103115
df = pd.read_parquet(fn, engine='pyarrow')
104-
issue_date = get_date(fn)
105-
df["issue_date"] = issue_date
106-
df["lag"] = [(issue_date - x).days for x in df["time_value"]]
107116
pdList.append(df)
108117
merged_file = pd.concat(pdList).sort_values(["time_value", "fips"])
109118
path = backfill_dir + "/claims_hosp_from_%s_to_%s.parquet"%(

claims_hosp/tests/test_backfill.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_store_backfill_file(self):
3737
backfill_df = pd.read_parquet(backfill_dir + "/"+ fn, engine='pyarrow')
3838

3939
selected_columns = ['time_value', 'fips', 'state_id',
40-
'num', 'den']
40+
'num', 'den', 'lag', 'issue_date']
4141
assert set(selected_columns) == set(backfill_df.columns)
4242

4343
os.remove(backfill_dir + "/" + fn)
@@ -79,9 +79,6 @@ def test_merge_backfill_file(self):
7979
if "from" in file:
8080
continue
8181
df = pd.read_parquet(file, engine='pyarrow')
82-
issue_date = datetime.strptime(file[-16:-8], "%Y%m%d")
83-
df["issue_date"] = issue_date
84-
df["lag"] = [(issue_date - x).days for x in df["time_value"]]
8582
pdList.append(df)
8683
os.remove(file)
8784
new_files = glob.glob(backfill_dir + "/claims_hosp*.parquet")

0 commit comments

Comments
 (0)