Skip to content

Commit 513a39b

Browse files
committed
concat outside of loops
1 parent 4f28fba commit 513a39b

File tree

4 files changed

+35
-28
lines changed

4 files changed

+35
-28
lines changed

doctor_visits/delphi_doctor_visits/sensor.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,17 @@ def fill_dates(y_data, dates):
6060
last_date = dates[-1]
6161
cols = y_data.columns
6262

63+
df_list = [y_data]
6364
if first_date not in y_data.index:
64-
y_data = pd.concat([
65-
y_data,
65+
df_list.append(
6666
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[first_date])
67-
])
67+
)
6868
if last_date not in y_data.index:
69-
y_data = pd.concat([
70-
y_data,
69+
df_list.append(
7170
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[last_date])
72-
])
71+
)
7372

74-
y_data.sort_index(inplace=True)
73+
y_data = pd.concat(df_list).sort_index()
7574
y_data = y_data.asfreq("D", fill_value=0)
7675
return y_data
7776

quidel_covidtest/delphi_quidel_covidtest/data_tools.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,18 @@ def fill_dates(y_data, first_date, last_date):
3030
Returns: dataframe containing all dates given
3131
"""
3232
cols = y_data.columns
33+
34+
df_list = [y_data]
3335
if first_date not in y_data.index:
34-
y_data = pd.concat([y_data, pd.DataFrame(dict.fromkeys(cols, 0.),
35-
columns=cols, index=[first_date])])
36+
df_list.append(
37+
pd.DataFrame(dict.fromkeys(cols, 0.), columns=cols, index=[first_date])
38+
)
3639
if last_date not in y_data.index:
37-
y_data = pd.concat([y_data, pd.DataFrame(dict.fromkeys(cols, 0.),
38-
columns=cols, index=[last_date])])
40+
df_list.append(
41+
pd.DataFrame(dict.fromkeys(cols, 0.), columns=cols, index=[last_date])
42+
)
3943

40-
y_data.sort_index(inplace=True)
44+
y_data = pd.concat(df_list).sort_index()
4145
y_data = y_data.asfreq('D', fill_value=0)
4246
y_data.fillna(0, inplace=True)
4347
return y_data

quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device,
2727
Returns:
2828
df: pd.DataFrame
2929
"""
30-
state_df = pd.DataFrame(columns=["geo_id", "val", "se", "sample_size", "timestamp"])
3130
state_list = list(state_groups.groups.keys())
31+
df_list = []
3232
for state in state_list:
3333
state_group = state_groups.get_group(state)
3434
state_group = state_group.drop(columns=[res_key])
@@ -63,15 +63,15 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device,
6363
stat = stat * 100
6464

6565
se = se * 100
66-
state_df = pd.concat([
67-
state_df,
66+
df_list.append(
6867
pd.DataFrame({"geo_id": state,
6968
"timestamp": state_group.index,
7069
"val": stat,
7170
"se": se,
7271
"sample_size": sample_size})
73-
])
74-
return remove_null_samples(state_df)
72+
)
73+
74+
return remove_null_samples(pd.concat(df_list))
7575

7676
def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
7777
device, first_date, last_date, suffix):
@@ -91,9 +91,9 @@ def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
9191
df: pd.DataFrame
9292
"""
9393
has_parent = True
94-
res_df = pd.DataFrame(columns=["geo_id", "val", "se", "sample_size"])
9594
if res_key == "fips": # Add rest-of-state report for county level
9695
data = add_megacounties(data, smooth)
96+
df_list = []
9797
for loc, res_group in data.groupby(res_key):
9898
parent_state = res_group['state_id'].values[0]
9999
try:
@@ -150,12 +150,12 @@ def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
150150
stat = stat * 100
151151

152152
se = se * 100
153-
res_df = pd.concat([
154-
res_df,
153+
df_list.append(
155154
pd.DataFrame({"geo_id": loc,
156155
"timestamp": res_group.index,
157156
"val": stat,
158157
"se": se,
159158
"sample_size": sample_size})
160-
])
161-
return remove_null_samples(res_df)
159+
)
160+
161+
return remove_null_samples(pd.concat(df_list))

quidel_covidtest/delphi_quidel_covidtest/pull.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def get_from_s3(start_date, end_date, bucket, logger):
3434
'State', 'Zip', 'PatientAge', 'Result1',
3535
'Result2', 'OverallResult', 'StorageDate',
3636
'fname']
37-
df = pd.DataFrame(columns=selected_columns)
3837
s3_files = {}
3938
for obj in bucket.objects.all():
4039
if "-sars" in obj.key:
@@ -52,25 +51,30 @@ def get_from_s3(start_date, end_date, bucket, logger):
5251
s3_files[received_date].append(obj.key)
5352

5453
n_days = (end_date - start_date).days + 1
54+
df_list = []
55+
seen_files = set()
5556
for search_date in [start_date + timedelta(days=x) for x in range(n_days)]:
5657
if search_date in s3_files.keys():
57-
# Avoid appending duplicate datasets
5858
logger.info(f"Pulling data received on {search_date.date()}")
5959

6060
# Fetch data received on the same day
6161
for fn in s3_files[search_date]:
62+
# Skip non-CSV files, such as directories
6263
if ".csv" not in fn:
63-
continue #Add to avoid that the folder name was readed as a fn.
64-
if fn in set(df["fname"].values):
64+
continue
65+
# Avoid appending duplicate datasets
66+
if fn in seen_files:
6567
continue
6668
obj = bucket.Object(key=fn)
6769
newdf = pd.read_csv(obj.get()["Body"],
6870
parse_dates=["StorageDate", "TestDate"],
6971
low_memory=False)
72+
seen_files.add(fn)
7073
newdf["fname"] = fn
71-
df = pd.concat([df, newdf[selected_columns]])
74+
df_list.append(newdf[selected_columns])
7275
time_flag = search_date
73-
return df, time_flag
76+
77+
return pd.concat(df_list), time_flag
7478

7579
def fix_zipcode(df):
7680
"""Fix zipcode that is 9 digit instead of 5 digit."""

0 commit comments

Comments
 (0)