Skip to content

Commit 1367e26

Browse files
authored
Merge pull request #501 from sgsmob/export
Expand functionality of `delphi_utils.create_export_csv()` for use in more indicators
2 parents f66601f + a3eff48 commit 1367e26

File tree

6 files changed

+156
-36
lines changed

6 files changed

+156
-36
lines changed

_delphi_utils_python/delphi_utils/export.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,19 @@
22
# -*- coding: utf-8 -*-
33
from datetime import datetime
44
from os.path import join
5+
from typing import Optional
56

7+
import numpy as np
68
import pandas as pd
79

810
def create_export_csv(
911
df: pd.DataFrame,
10-
start_date: datetime,
1112
export_dir: str,
12-
metric: str,
1313
geo_res: str,
1414
sensor: str,
15+
metric: Optional[str] = None,
16+
start_date: Optional[datetime] = None,
17+
end_date: Optional[datetime] = None
1518
):
1619
"""Export data in the format expected by the Delphi API.
1720
@@ -21,21 +24,36 @@ def create_export_csv(
2124
Columns: geo_id, timestamp, val, se, sample_size
2225
export_dir: str
2326
Export directory
24-
metric: str
25-
Metric we are considering
2627
geo_res: str
2728
Geographic resolution to which the data has been aggregated
2829
sensor: str
2930
Sensor that has been calculated (cumulative_counts vs new_counts)
31+
metric: Optional[str]
32+
Metric we are considering, if any.
33+
start_date: Optional[datetime]
34+
Earliest date to export or None if no minimum date restrictions should be applied.
35+
end_date: Optional[datetime]
36+
Latest date to export or None if no maximum date restrictions should be applied.
3037
"""
3138
df = df.copy()
39+
3240
df["timestamp"] = pd.to_datetime(df["timestamp"])
41+
if start_date is None:
42+
start_date = min(df["timestamp"])
43+
if end_date is None:
44+
end_date = max(df["timestamp"])
45+
3346
dates = pd.Series(
34-
df[df["timestamp"] >= start_date]["timestamp"].unique()
47+
df[np.logical_and(df["timestamp"] >= start_date,
48+
df["timestamp"] <= end_date)]["timestamp"].unique()
3549
).sort_values()
50+
3651
for date in dates:
37-
export_fn = f'{date.strftime("%Y%m%d")}_{geo_res}_' f"{metric}_{sensor}.csv"
38-
export_file = join(export_dir, export_fn)
52+
if metric is None:
53+
export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{sensor}.csv"
54+
else:
55+
export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{metric}_{sensor}.csv"
56+
export_file = join(export_dir, export_filename)
3957
df[df["timestamp"] == date][["geo_id", "val", "se", "sample_size",]].to_csv(
4058
export_file, index=False, na_rep="NA"
4159
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.csv

_delphi_utils_python/tests/test_dir/20200215_county_deaths_test.csv

Lines changed: 0 additions & 3 deletions
This file was deleted.

_delphi_utils_python/tests/test_dir/20200301_county_deaths_test.csv

Lines changed: 0 additions & 2 deletions
This file was deleted.

_delphi_utils_python/tests/test_dir/20200315_county_deaths_test.csv

Lines changed: 0 additions & 2 deletions
This file was deleted.
Lines changed: 130 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,155 @@
1-
import pytest
2-
1+
"""Tests for exporting CSV files."""
32
from datetime import datetime
43
from os import listdir, remove
54
from os.path import join
65

76
import pandas as pd
87
from delphi_utils import create_export_csv
98

9+
def _clean_directory(directory):
10+
"""Clean files out of a directory."""
11+
for fname in listdir(directory):
12+
if fname.startswith("."):
13+
continue
14+
remove(join(directory, fname))
15+
16+
17+
def _non_ignored_files_set(directory):
18+
"""List all files in a directory not preceded by a '.' and store them in a set."""
19+
out = set()
20+
for fname in listdir(directory):
21+
if fname.startswith("."):
22+
continue
23+
out.add(fname)
24+
return out
25+
1026

1127
class TestExport:
12-
def test_export_csv(self):
28+
"""Tests for exporting CSVs."""
29+
# List of times for data points.
30+
TIMES = [
31+
datetime.strptime(x, "%Y-%m-%d")
32+
for x in ["2020-02-15", "2020-02-15", "2020-03-01", "2020-03-15"]
33+
]
34+
35+
# A sample data frame.
36+
DF = pd.DataFrame(
37+
{
38+
"geo_id": ["51093", "51175", "51175", "51620"],
39+
"timestamp": TIMES,
40+
"val": [3.6, 2.1, 2.2, 2.6],
41+
"se": [0.15, 0.22, 0.20, 0.34],
42+
"sample_size": [100, 100, 101, 100],
43+
}
44+
)
45+
46+
# Directory in which to store tests.
47+
TEST_DIR = "test_dir"
48+
49+
def test_export_with_metric(self):
50+
"""Test that exporting CSVs with the `metrics` argument yields the correct files."""
1351

1452
# Clean receiving directory
15-
for fname in listdir("test_dir"):
16-
remove(join("test_dir", fname))
17-
18-
times = [
19-
datetime.strptime(x, "%Y-%m-%d")
20-
for x in ["2020-02-15", "2020-02-15", "2020-03-01", "2020-03-15"]
21-
]
22-
df = pd.DataFrame(
23-
{
24-
"geo_id": ["51093", "51175", "51175", "51620"],
25-
"timestamp": times,
26-
"val": [3.6, 2.1, 2.2, 2.6],
27-
"se": [0.15, 0.22, 0.20, 0.34],
28-
"sample_size": [100, 100, 101, 100],
29-
}
30-
)
53+
_clean_directory(self.TEST_DIR)
3154

3255
create_export_csv(
33-
df=df,
56+
df=self.DF,
3457
start_date=datetime.strptime("2020-02-15", "%Y-%m-%d"),
35-
export_dir="test_dir",
58+
export_dir=self.TEST_DIR,
3659
metric="deaths",
3760
geo_res="county",
3861
sensor="test",
3962
)
4063

41-
assert set(listdir("test_dir")) == set(
64+
assert _non_ignored_files_set(self.TEST_DIR) == set(
4265
[
4366
"20200215_county_deaths_test.csv",
4467
"20200301_county_deaths_test.csv",
4568
"20200315_county_deaths_test.csv",
4669
]
4770
)
71+
72+
def test_export_without_metric(self):
73+
"""Test that exporting CSVs without the `metrics` argument yields the correct files."""
74+
75+
# Clean receiving directory
76+
_clean_directory(self.TEST_DIR)
77+
78+
create_export_csv(
79+
df=self.DF,
80+
start_date=datetime.strptime("2020-02-15", "%Y-%m-%d"),
81+
export_dir=self.TEST_DIR,
82+
geo_res="county",
83+
sensor="test",
84+
)
85+
86+
assert _non_ignored_files_set(self.TEST_DIR) == set(
87+
[
88+
"20200215_county_test.csv",
89+
"20200301_county_test.csv",
90+
"20200315_county_test.csv",
91+
]
92+
)
93+
94+
def test_export_with_limiting_start_date(self):
95+
"""Test that the `start_date` prevents earlier dates from being exported."""
96+
97+
# Clean receiving directory
98+
_clean_directory(self.TEST_DIR)
99+
100+
create_export_csv(
101+
df=self.DF,
102+
start_date=datetime.strptime("2020-02-20", "%Y-%m-%d"),
103+
export_dir=self.TEST_DIR,
104+
geo_res="county",
105+
sensor="test",
106+
)
107+
108+
assert _non_ignored_files_set(self.TEST_DIR) == set(
109+
[
110+
"20200301_county_test.csv",
111+
"20200315_county_test.csv",
112+
]
113+
)
114+
115+
def test_export_with_limiting_end_date(self):
116+
"""Test that the `end_date` prevents later dates from being exported."""
117+
118+
# Clean receiving directory
119+
_clean_directory(self.TEST_DIR)
120+
121+
create_export_csv(
122+
df=self.DF,
123+
end_date=datetime.strptime("2020-03-07", "%Y-%m-%d"),
124+
export_dir=self.TEST_DIR,
125+
geo_res="county",
126+
sensor="test",
127+
)
128+
129+
assert _non_ignored_files_set(self.TEST_DIR) == set(
130+
[
131+
"20200215_county_test.csv",
132+
"20200301_county_test.csv",
133+
]
134+
)
135+
136+
def test_export_with_no_dates(self):
137+
"""Test that omitting the `start_date` and `end_date` exports all dates."""
138+
139+
# Clean receiving directory
140+
_clean_directory(self.TEST_DIR)
141+
142+
create_export_csv(
143+
df=self.DF,
144+
export_dir=self.TEST_DIR,
145+
geo_res="state",
146+
sensor="test",
147+
)
148+
149+
assert _non_ignored_files_set(self.TEST_DIR) == set(
150+
[
151+
"20200215_state_test.csv",
152+
"20200301_state_test.csv",
153+
"20200315_state_test.csv",
154+
]
155+
)

0 commit comments

Comments
 (0)