Skip to content

Commit 9f67b79

Browse files
authored
Merge pull request #1221 from cmu-delphi/release/indicators_v0.1.12_utils_v0.1.10
Release covidcast-indicators 0.1.12
2 parents 9f6002d + 0f48b74 commit 9f67b79

File tree

8 files changed

+160
-5
lines changed

8 files changed

+160
-5
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.1.11
2+
current_version = 0.1.12
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

ansible/templates/usafacts-params-prod.json.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"common": {
33
"export_dir": "/common/covidcast/receiving/usa-facts",
4+
"input_dir": "./input-cache",
45
"log_filename": "/var/log/indicators/usafacts.log"
56
},
67
"indicator": {

usafacts/delphi_usafacts/pull.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
# -*- coding: utf-8 -*-
22
"""Functions for pulling data from the USAFacts website."""
3+
from datetime import date
34
import hashlib
45
from logging import Logger
6+
import os
57

68
import numpy as np
79
import pandas as pd
10+
import requests
811

912
# Columns to drop the the data frame.
1013
DROP_COLUMNS = [
@@ -14,8 +17,24 @@
1417
"statefips"
1518
]
1619

20+
def fetch(url: str, cache: str) -> pd.DataFrame:
21+
"""Handle network I/O for fetching raw input data file.
1722
18-
def pull_usafacts_data(base_url: str, metric: str, logger: Logger) -> pd.DataFrame:
23+
This is necessary because for some reason pd.read_csv is generating
24+
403:Forbidden on the new URLs.
25+
"""
26+
r = requests.get(url)
27+
r.raise_for_status()
28+
datestamp = date.today().strftime('%Y%m%d')
29+
name = url.split('/')[-1].replace('.csv','')
30+
os.makedirs(cache, exist_ok=True)
31+
filename = os.path.join(cache, f"{datestamp}_{name}.csv")
32+
with open(filename, "w") as f:
33+
f.write(r.text)
34+
return pd.read_csv(filename)
35+
36+
37+
def pull_usafacts_data(base_url: str, metric: str, logger: Logger, cache: str=None) -> pd.DataFrame:
1938
"""Pull the latest USA Facts data, and conform it into a dataset.
2039
2140
The output dataset has:
@@ -47,14 +66,17 @@ def pull_usafacts_data(base_url: str, metric: str, logger: Logger) -> pd.DataFra
4766
Base URL for pulling the USA Facts data
4867
metric: str
4968
One of 'confirmed' or 'deaths'. The keys of base_url.
69+
logger: Logger
70+
cache: str
71+
Directory where downloaded csvs should be stashed.
5072
5173
Returns
5274
-------
5375
pd.DataFrame
5476
Dataframe as described above.
5577
"""
5678
# Read data
57-
df = pd.read_csv(base_url.format(metric=metric))
79+
df = fetch(base_url.format(metric=metric), cache)
5880
date_cols = [i for i in df.columns if i.startswith("2")]
5981
logger.info("data retrieved from source",
6082
metric=metric,

usafacts/delphi_usafacts/run.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,10 @@ def run_module(params: Dict[str, Dict[str, Any]]):
9090
else:
9191
export_start_date = datetime.strptime(export_start_date, "%Y-%m-%d")
9292
export_dir = params["common"]["export_dir"]
93+
input_dir = params["common"]["input_dir"]
9394
base_url = params["indicator"]["base_url"]
9495

95-
dfs = {metric: pull_usafacts_data(base_url, metric, logger) for metric in METRICS}
96+
dfs = {metric: pull_usafacts_data(base_url, metric, logger, input_dir) for metric in METRICS}
9697
for metric, geo_res, sensor, smoother in product(
9798
METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS):
9899
if "cumulative" in sensor and "seven_day_average" in smoother:

usafacts/input-cache/.gitignore

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# You should hard commit a prototype for this file, but we
2+
# want to avoid accidental adding of API tokens and other
3+
# private data parameters
4+
params.json
5+
6+
# Do not commit output files
7+
receiving/*.csv
8+
9+
# Remove macOS files
10+
.DS_Store
11+
12+
# virtual environment
13+
dview/
14+
15+
# Byte-compiled / optimized / DLL files
16+
__pycache__/
17+
*.py[cod]
18+
*$py.class
19+
20+
# C extensions
21+
*.so
22+
23+
# Distribution / packaging
24+
coverage.xml
25+
.Python
26+
build/
27+
develop-eggs/
28+
dist/
29+
downloads/
30+
eggs/
31+
.eggs/
32+
lib/
33+
lib64/
34+
parts/
35+
sdist/
36+
var/
37+
wheels/
38+
*.egg-info/
39+
.installed.cfg
40+
*.egg
41+
MANIFEST
42+
43+
# PyInstaller
44+
# Usually these files are written by a python script from a template
45+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
46+
*.manifest
47+
*.spec
48+
49+
# Installer logs
50+
pip-log.txt
51+
pip-delete-this-directory.txt
52+
53+
# Unit test / coverage reports
54+
htmlcov/
55+
.tox/
56+
.coverage
57+
.coverage.*
58+
.cache
59+
nosetests.xml
60+
coverage.xml
61+
*.cover
62+
.hypothesis/
63+
.pytest_cache/
64+
65+
# Translations
66+
*.mo
67+
*.pot
68+
69+
# Django stuff:
70+
*.log
71+
.static_storage/
72+
.media/
73+
local_settings.py
74+
75+
# Flask stuff:
76+
instance/
77+
.webassets-cache
78+
79+
# Scrapy stuff:
80+
.scrapy
81+
82+
# Sphinx documentation
83+
docs/_build/
84+
85+
# PyBuilder
86+
target/
87+
88+
# Jupyter Notebook
89+
.ipynb_checkpoints
90+
91+
# pyenv
92+
.python-version
93+
94+
# celery beat schedule file
95+
celerybeat-schedule
96+
97+
# SageMath parsed files
98+
*.sage.py
99+
100+
# Environments
101+
.env
102+
.venv
103+
env/
104+
venv/
105+
ENV/
106+
env.bak/
107+
venv.bak/
108+
109+
# Spyder project settings
110+
.spyderproject
111+
.spyproject
112+
113+
# Rope project settings
114+
.ropeproject
115+
116+
# mkdocs documentation
117+
/site
118+
119+
# mypy
120+
.mypy_cache/

usafacts/params.json.template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"common": {
33
"export_dir": "./receiving",
4+
"input_dir": "./input-cache",
45
"log_exceptions": false,
56
"log_filename": "./usa-facts.log"
67
},

usafacts/tests/test_pull.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import pytest
22
import logging
3+
from unittest.mock import patch
34

45
import pandas as pd
56

67
from delphi_usafacts.pull import pull_usafacts_data
78

9+
from test_run import local_fetch
10+
811
BASE_URL_GOOD = "test_data/small_{metric}_pull.csv"
912

1013
BASE_URL_BAD = {
@@ -15,6 +18,7 @@
1518

1619
TEST_LOGGER = logging.getLogger()
1720

21+
@patch("delphi_usafacts.pull.fetch", local_fetch)
1822
class TestPullUSAFacts:
1923
def test_good_file(self):
2024
metric = "deaths"

usafacts/tests/test_run.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,22 @@
22
from itertools import product
33
from os import listdir
44
from os.path import join
5+
from unittest.mock import patch
56

67
import pandas as pd
78

89
from delphi_usafacts.run import run_module
910

11+
def local_fetch(url, cache):
12+
return pd.read_csv(url)
13+
14+
@patch("delphi_usafacts.pull.fetch", local_fetch)
1015
class TestRun:
1116
"""Tests for the `run_module()` function."""
1217
PARAMS = {
1318
"common": {
14-
"export_dir": "./receiving"
19+
"export_dir": "./receiving",
20+
"input_dir": "./input_cache"
1521
},
1622
"indicator": {
1723
"base_url": "./test_data/small_{metric}.csv",

0 commit comments

Comments
 (0)