Skip to content

Commit 519001f

Browse files
authored
Merge pull request #531 from sgsmob/google_health
Migrate google_health to use `delphi_utils.create_export_csv()`
2 parents abc9cc8 + 122facc commit 519001f

File tree

5 files changed

+52
-147
lines changed

5 files changed

+52
-147
lines changed

google_health/delphi_google_health/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from __future__ import absolute_import
1010

11-
from . import export
11+
from . import data_tools
1212
from . import map_values
1313
from . import pull_api
1414
from . import run
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# -*- coding: utf-8 -*-
2+
"""Functions to reformat the data."""
3+
4+
import numpy as np
5+
import pandas as pd
6+
7+
from .smooth import smoothed_values_by_geo_id
8+
9+
RESCALE_VAL = 4000 / 100
10+
11+
def format_for_export(df: pd.DataFrame, smooth: bool):
12+
"""Transform data columns of df to match those expected by `delphi_utils.create_export_csv()`.
13+
Parameters
14+
----------
15+
df: pd.DataFrame
16+
data frame with columns "geo_id", "timestamp", and "val"
17+
smooth: bool
18+
should the signal in "val" be smoothed?
19+
20+
Returns
21+
-------
22+
pd.DataFrame
23+
A data frame with columns "val", "se", and "sample_size".
24+
"""
25+
df = df.copy()
26+
if smooth:
27+
df["val"] = smoothed_values_by_geo_id(df)
28+
29+
df["val"] /= RESCALE_VAL
30+
df["se"] = np.nan
31+
df["sample_size"] = np.nan
32+
return df

google_health/delphi_google_health/export.py

Lines changed: 0 additions & 59 deletions
This file was deleted.

google_health/delphi_google_health/run.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
from delphi_utils import (
1414
read_params,
1515
S3ArchiveDiffer,
16-
add_prefix
16+
add_prefix,
17+
create_export_csv
1718
)
1819

20+
from .data_tools import format_for_export
1921
from .pull_api import GoogleHealthTrends, get_counts_states, get_counts_dma
2022
from .map_values import derived_counts_from_dma
21-
from .export import export_csv
2223
from .constants import (SIGNALS, RAW, SMOOTHED,
2324
MSA, HRR, STATE, DMA,
2425
PULL_START_DATE)
@@ -68,45 +69,37 @@ def run_module():
6869
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
6970
logging.info("Creating data from %s through %s.", start_date, end_date)
7071

72+
# Dictionary mapping geo resolution to the data corresponding to that resolution.
73+
df_by_geo_res = {}
74+
7175
if not params["test"]:
7276
# setup class to handle API calls
7377
ght = GoogleHealthTrends(ght_key=ght_key)
7478

7579
# read data frame version of the data
76-
df_state = get_counts_states(
80+
df_by_geo_res[STATE] = get_counts_states(
7781
ght, PULL_START_DATE, end_date, static_dir=static_dir, data_dir=data_dir
7882
)
79-
df_dma = get_counts_dma(
83+
df_by_geo_res[DMA] = get_counts_dma(
8084
ght, PULL_START_DATE, end_date, static_dir=static_dir, data_dir=data_dir
8185
)
8286
else:
83-
df_state = pd.read_csv(params["test_data_dir"].format(geo_res="state"))
84-
df_dma = pd.read_csv(params["test_data_dir"].format(geo_res="dma"))
87+
df_by_geo_res[STATE] = pd.read_csv(params["test_data_dir"].format(geo_res="state"))
88+
df_by_geo_res[DMA] = pd.read_csv(params["test_data_dir"].format(geo_res="dma"))
8589

86-
df_hrr, df_msa = derived_counts_from_dma(df_dma, static_dir=static_dir)
90+
df_by_geo_res[HRR], df_by_geo_res[MSA] = derived_counts_from_dma(df_by_geo_res[DMA],
91+
static_dir=static_dir)
8792

8893
signal_names = add_prefix(SIGNALS, wip_signal, prefix="wip_")
8994

9095
for signal in signal_names:
91-
if signal.endswith(SMOOTHED):
92-
# export each geographic region, with both smoothed and unsmoothed data
93-
export_csv(df_state, STATE, signal, smooth=True,
94-
start_date=start_date, receiving_dir=export_dir)
95-
export_csv(df_dma, DMA, signal, smooth=True,
96-
start_date=start_date, receiving_dir=export_dir)
97-
export_csv(df_hrr, HRR, signal, smooth=True,
98-
start_date=start_date, receiving_dir=export_dir)
99-
export_csv(df_msa, MSA, signal, smooth=True,
100-
start_date = start_date, receiving_dir=export_dir)
101-
elif signal.endswith(RAW):
102-
export_csv(df_state, STATE, signal, smooth=False,
103-
start_date=start_date, receiving_dir=export_dir)
104-
export_csv(df_dma, DMA, signal, smooth=False,
105-
start_date=start_date, receiving_dir=export_dir)
106-
export_csv(df_hrr, HRR, signal, smooth=False,
107-
start_date=start_date, receiving_dir=export_dir)
108-
export_csv(df_msa, MSA, signal, smooth=False,
109-
start_date=start_date, receiving_dir=export_dir)
96+
is_smoothed = signal.endswith(SMOOTHED)
97+
for geo_res, df in df_by_geo_res.items():
98+
create_export_csv(format_for_export(df, is_smoothed),
99+
geo_res=geo_res,
100+
sensor=signal,
101+
start_date=start_date,
102+
export_dir=export_dir)
110103

111104
if not params["test"]:
112105
# Diff exports, and make incremental versions

google_health/tests/test_export.py

Lines changed: 0 additions & 61 deletions
This file was deleted.

0 commit comments

Comments
 (0)