@@ -47,10 +47,20 @@ def test_merge_backfill_file(self):
47
47
48
48
today = datetime .today ()
49
49
50
- new_files = glob .glob (backfill_dir + "/claims_hosp*.parquet" )
51
50
fn = "claims_hosp_from_20200611_to_20200614.parquet"
52
51
assert fn not in os .listdir (backfill_dir )
53
52
53
+ # Check when there is no daily file to merge.
54
+ today = datetime (2020 , 6 , 14 )
55
+ merge_backfill_file (backfill_dir , today .weekday (), today ,
56
+ test_mode = True , check_nd = 8 )
57
+ assert fn not in os .listdir (backfill_dir )
58
+
59
+ # Generate backfill daily files
60
+ for d in range (11 , 15 ):
61
+ dropdate = datetime (2020 , 6 , d )
62
+ store_backfill_file (DATA_FILEPATH , dropdate , backfill_dir )
63
+
54
64
# Check the when the merged file is not generated
55
65
today = datetime (2020 , 6 , 14 )
56
66
merge_backfill_file (backfill_dir , today .weekday (), today ,
@@ -63,13 +73,18 @@ def test_merge_backfill_file(self):
63
73
assert fn in os .listdir (backfill_dir )
64
74
65
75
# Read daily file
76
+ new_files = glob .glob (backfill_dir + "/claims_hosp*.parquet" )
66
77
pdList = []
67
78
for file in new_files :
68
79
df = pd .read_parquet (file , engine = 'pyarrow' )
69
80
issue_date = datetime .strptime (file [- 16 :- 8 ], "%Y%m%d" )
70
81
df ["issue_date" ] = issue_date
71
82
df ["lag" ] = [(issue_date - x ).days for x in df ["time_value" ]]
72
83
pdList .append (df )
84
+ os .remove (file )
85
+ new_files = glob .glob (backfill_dir + "/claims_hosp*.parquet" )
86
+ assert len (new_files ) == 1
87
+
73
88
expected = pd .concat (pdList ).sort_values (["time_value" , "fips" ])
74
89
75
90
# Read the merged file
0 commit comments