|
1 |
| -import pytest |
2 |
| - |
| 1 | +"""Tests for exporting CSV files.""" |
3 | 2 | from datetime import datetime
|
4 | 3 | from os import listdir, remove
|
5 | 4 | from os.path import join
|
6 | 5 |
|
7 | 6 | import pandas as pd
|
8 | 7 | from delphi_utils import create_export_csv
|
9 | 8 |
|
| 9 | +def _clean_directory(directory): |
| 10 | + """Clean files out of a directory.""" |
| 11 | + for fname in listdir(directory): |
| 12 | + if fname.startswith("."): |
| 13 | + continue |
| 14 | + remove(join(directory, fname)) |
| 15 | + |
| 16 | + |
| 17 | +def _non_ignored_files_set(directory): |
| 18 | + """List all files in a directory not preceded by a '.' and store them in a set.""" |
| 19 | + out = set() |
| 20 | + for fname in listdir(directory): |
| 21 | + if fname.startswith("."): |
| 22 | + continue |
| 23 | + out.add(fname) |
| 24 | + return out |
| 25 | + |
10 | 26 |
|
11 | 27 | class TestExport:
|
12 |
| - def test_export_csv(self): |
| 28 | + """Tests for exporting CSVs.""" |
| 29 | + # List of times for data points. |
| 30 | + TIMES = [ |
| 31 | + datetime.strptime(x, "%Y-%m-%d") |
| 32 | + for x in ["2020-02-15", "2020-02-15", "2020-03-01", "2020-03-15"] |
| 33 | + ] |
| 34 | + |
| 35 | + # A sample data frame. |
| 36 | + DF = pd.DataFrame( |
| 37 | + { |
| 38 | + "geo_id": ["51093", "51175", "51175", "51620"], |
| 39 | + "timestamp": TIMES, |
| 40 | + "val": [3.6, 2.1, 2.2, 2.6], |
| 41 | + "se": [0.15, 0.22, 0.20, 0.34], |
| 42 | + "sample_size": [100, 100, 101, 100], |
| 43 | + } |
| 44 | + ) |
| 45 | + |
| 46 | + # Directory in which to store tests. |
| 47 | + TEST_DIR = "test_dir" |
| 48 | + |
| 49 | + def test_export_with_metric(self): |
| 50 | + """Test that exporting CSVs with the `metrics` argument yields the correct files.""" |
13 | 51 |
|
14 | 52 | # Clean receiving directory
|
15 |
| - for fname in listdir("test_dir"): |
16 |
| - remove(join("test_dir", fname)) |
17 |
| - |
18 |
| - times = [ |
19 |
| - datetime.strptime(x, "%Y-%m-%d") |
20 |
| - for x in ["2020-02-15", "2020-02-15", "2020-03-01", "2020-03-15"] |
21 |
| - ] |
22 |
| - df = pd.DataFrame( |
23 |
| - { |
24 |
| - "geo_id": ["51093", "51175", "51175", "51620"], |
25 |
| - "timestamp": times, |
26 |
| - "val": [3.6, 2.1, 2.2, 2.6], |
27 |
| - "se": [0.15, 0.22, 0.20, 0.34], |
28 |
| - "sample_size": [100, 100, 101, 100], |
29 |
| - } |
30 |
| - ) |
| 53 | + _clean_directory(self.TEST_DIR) |
31 | 54 |
|
32 | 55 | create_export_csv(
|
33 |
| - df=df, |
| 56 | + df=self.DF, |
34 | 57 | start_date=datetime.strptime("2020-02-15", "%Y-%m-%d"),
|
35 |
| - export_dir="test_dir", |
| 58 | + export_dir=self.TEST_DIR, |
36 | 59 | metric="deaths",
|
37 | 60 | geo_res="county",
|
38 | 61 | sensor="test",
|
39 | 62 | )
|
40 | 63 |
|
41 |
| - assert set(listdir("test_dir")) == set( |
| 64 | + assert _non_ignored_files_set(self.TEST_DIR) == set( |
42 | 65 | [
|
43 | 66 | "20200215_county_deaths_test.csv",
|
44 | 67 | "20200301_county_deaths_test.csv",
|
45 | 68 | "20200315_county_deaths_test.csv",
|
46 | 69 | ]
|
47 | 70 | )
|
| 71 | + |
| 72 | + def test_export_without_metric(self): |
| 73 | + """Test that exporting CSVs without the `metrics` argument yields the correct files.""" |
| 74 | + |
| 75 | + # Clean receiving directory |
| 76 | + _clean_directory(self.TEST_DIR) |
| 77 | + |
| 78 | + create_export_csv( |
| 79 | + df=self.DF, |
| 80 | + start_date=datetime.strptime("2020-02-15", "%Y-%m-%d"), |
| 81 | + export_dir=self.TEST_DIR, |
| 82 | + geo_res="county", |
| 83 | + sensor="test", |
| 84 | + ) |
| 85 | + |
| 86 | + assert _non_ignored_files_set(self.TEST_DIR) == set( |
| 87 | + [ |
| 88 | + "20200215_county_test.csv", |
| 89 | + "20200301_county_test.csv", |
| 90 | + "20200315_county_test.csv", |
| 91 | + ] |
| 92 | + ) |
| 93 | + |
| 94 | + def test_export_with_limiting_start_date(self): |
| 95 | + """Test that the `start_date` prevents earlier dates from being exported.""" |
| 96 | + |
| 97 | + # Clean receiving directory |
| 98 | + _clean_directory(self.TEST_DIR) |
| 99 | + |
| 100 | + create_export_csv( |
| 101 | + df=self.DF, |
| 102 | + start_date=datetime.strptime("2020-02-20", "%Y-%m-%d"), |
| 103 | + export_dir=self.TEST_DIR, |
| 104 | + geo_res="county", |
| 105 | + sensor="test", |
| 106 | + ) |
| 107 | + |
| 108 | + assert _non_ignored_files_set(self.TEST_DIR) == set( |
| 109 | + [ |
| 110 | + "20200301_county_test.csv", |
| 111 | + "20200315_county_test.csv", |
| 112 | + ] |
| 113 | + ) |
| 114 | + |
| 115 | + def test_export_with_limiting_end_date(self): |
| 116 | + """Test that the `end_date` prevents later dates from being exported.""" |
| 117 | + |
| 118 | + # Clean receiving directory |
| 119 | + _clean_directory(self.TEST_DIR) |
| 120 | + |
| 121 | + create_export_csv( |
| 122 | + df=self.DF, |
| 123 | + end_date=datetime.strptime("2020-03-07", "%Y-%m-%d"), |
| 124 | + export_dir=self.TEST_DIR, |
| 125 | + geo_res="county", |
| 126 | + sensor="test", |
| 127 | + ) |
| 128 | + |
| 129 | + assert _non_ignored_files_set(self.TEST_DIR) == set( |
| 130 | + [ |
| 131 | + "20200215_county_test.csv", |
| 132 | + "20200301_county_test.csv", |
| 133 | + ] |
| 134 | + ) |
| 135 | + |
| 136 | + def test_export_with_no_dates(self): |
| 137 | + """Test that omitting the `start_date` and `end_date` exports all dates.""" |
| 138 | + |
| 139 | + # Clean receiving directory |
| 140 | + _clean_directory(self.TEST_DIR) |
| 141 | + |
| 142 | + create_export_csv( |
| 143 | + df=self.DF, |
| 144 | + export_dir=self.TEST_DIR, |
| 145 | + geo_res="state", |
| 146 | + sensor="test", |
| 147 | + ) |
| 148 | + |
| 149 | + assert _non_ignored_files_set(self.TEST_DIR) == set( |
| 150 | + [ |
| 151 | + "20200215_state_test.csv", |
| 152 | + "20200301_state_test.csv", |
| 153 | + "20200315_state_test.csv", |
| 154 | + ] |
| 155 | + ) |
0 commit comments