Skip to content

Commit f45cb9c

Browse files
committed
Update utilities for NAN codes:
* update export utility to export the missing columns and add a test * add a nancodes utility, which is just a dict of code constants
1 parent 95cc7a8 commit f45cb9c

File tree

4 files changed

+54
-1
lines changed

4 files changed

+54
-1
lines changed

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@
1212
from .geomap import GeoMapper
1313
from .smooth import Smoother
1414
from .signal import add_prefix
15+
from .nancodes import NAN_CODES
1516

1617
__version__ = "0.1.0"

_delphi_utils_python/delphi_utils/export.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,16 @@ def create_export_csv(
6464
else:
6565
export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{metric}_{sensor}.csv"
6666
export_file = join(export_dir, export_filename)
67-
export_df = df[df["timestamp"] == date][["geo_id", "val", "se", "sample_size",]]
67+
EXPECTED_COLUMNS = [
68+
"geo_id",
69+
"val",
70+
"se",
71+
"sample_size",
72+
"missing_val",
73+
"missing_se",
74+
"missing_sample_size"
75+
]
76+
export_df = df[df["timestamp"] == date].filter(items=EXPECTED_COLUMNS)
6877
if remove_null_samples:
6978
export_df = export_df[export_df["sample_size"].notnull()]
7079
export_df = export_df.round({"val": 7, "se": 7})
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
NAN_CODES = {
3+
"Not Missing": 0,
4+
"Not Applicable": 1,
5+
"Region Exception": 2,
6+
"Data Insufficient": 3,
7+
"Unknown": 4
8+
}

_delphi_utils_python/tests/test_export.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from os.path import join
55

66
import pandas as pd
7+
import numpy as np
78
from delphi_utils import create_export_csv
89

910
def _clean_directory(directory):
@@ -43,6 +44,20 @@ class TestExport:
4344
}
4445
)
4546

47+
# A sample data frame with missingness.
48+
DF2 = pd.DataFrame(
49+
{
50+
"geo_id": ["51093", "51175", "51175", "51620"],
51+
"timestamp": TIMES,
52+
"val": [3.12345678910, np.nan, 2.2, 2.6],
53+
"se": [0.15, 0.22, np.nan, 0.34],
54+
"sample_size": [100, 100, 101, None],
55+
"missing_val": [0, 1, 0, 0],
56+
"missing_se": [0, 0, 1, 0],
57+
"missing_sample_size": [0, 0, 0, 1]
58+
}
59+
)
60+
4661
# Directory in which to store tests.
4762
TEST_DIR = "test_dir"
4863

@@ -235,3 +250,23 @@ def test_export_without_null_removal(self):
235250
]
236251
)
237252
assert pd.read_csv(join(self.TEST_DIR, "20200606_state_test.csv")).size > 0
253+
254+
def test_export_df_with_missingness(self):
255+
_clean_directory(self.TEST_DIR)
256+
257+
create_export_csv(
258+
df=self.DF2.copy(),
259+
export_dir=self.TEST_DIR,
260+
geo_res="state",
261+
sensor="test",
262+
remove_null_samples=False
263+
)
264+
assert _non_ignored_files_set(self.TEST_DIR) == set(
265+
[
266+
"20200215_state_test.csv",
267+
"20200301_state_test.csv",
268+
"20200315_state_test.csv",
269+
]
270+
)
271+
breakpoint()
272+
assert pd.read_csv(join(self.TEST_DIR, "20200315_state_test.csv")).size > 0

0 commit comments

Comments
 (0)