Skip to content

Migrate google_health to use delphi_utils.create_export_csv() #531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion google_health/delphi_google_health/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from __future__ import absolute_import

from . import export
from . import data_tools
from . import map_values
from . import pull_api
from . import run
Expand Down
32 changes: 32 additions & 0 deletions google_health/delphi_google_health/data_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
"""Functions to reformat the data."""

import numpy as np
import pandas as pd

from .smooth import smoothed_values_by_geo_id

RESCALE_VAL = 4000 / 100

def format_for_export(df: pd.DataFrame, smooth: bool):
"""Transform data columns of df to match those expected by `delphi_utils.create_export_csv()`.
Parameters
----------
df: pd.DataFrame
data frame with columns "geo_id", "timestamp", and "val"
smooth: bool
should the signal in "val" be smoothed?

Returns
-------
pd.DataFrame
A data frame with columns "val", "se", and "sample_size".
"""
df = df.copy()
if smooth:
df["val"] = smoothed_values_by_geo_id(df)

df["val"] /= RESCALE_VAL
df["se"] = np.nan
df["sample_size"] = np.nan
return df
59 changes: 0 additions & 59 deletions google_health/delphi_google_health/export.py

This file was deleted.

45 changes: 19 additions & 26 deletions google_health/delphi_google_health/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
from delphi_utils import (
read_params,
S3ArchiveDiffer,
add_prefix
add_prefix,
create_export_csv
)

from .data_tools import format_for_export
from .pull_api import GoogleHealthTrends, get_counts_states, get_counts_dma
from .map_values import derived_counts_from_dma
from .export import export_csv
from .constants import (SIGNALS, RAW, SMOOTHED,
MSA, HRR, STATE, DMA,
PULL_START_DATE)
Expand Down Expand Up @@ -68,45 +69,37 @@ def run_module():
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
logging.info("Creating data from %s through %s.", start_date, end_date)

# Dictionary mapping geo resolution to the data corresponding to that resolution.
df_by_geo_res = {}

if not params["test"]:
# setup class to handle API calls
ght = GoogleHealthTrends(ght_key=ght_key)

# read data frame version of the data
df_state = get_counts_states(
df_by_geo_res[STATE] = get_counts_states(
ght, PULL_START_DATE, end_date, static_dir=static_dir, data_dir=data_dir
)
df_dma = get_counts_dma(
df_by_geo_res[DMA] = get_counts_dma(
ght, PULL_START_DATE, end_date, static_dir=static_dir, data_dir=data_dir
)
else:
df_state = pd.read_csv(params["test_data_dir"].format(geo_res="state"))
df_dma = pd.read_csv(params["test_data_dir"].format(geo_res="dma"))
df_by_geo_res[STATE] = pd.read_csv(params["test_data_dir"].format(geo_res="state"))
df_by_geo_res[DMA] = pd.read_csv(params["test_data_dir"].format(geo_res="dma"))

df_hrr, df_msa = derived_counts_from_dma(df_dma, static_dir=static_dir)
df_by_geo_res[HRR], df_by_geo_res[MSA] = derived_counts_from_dma(df_by_geo_res[DMA],
static_dir=static_dir)

signal_names = add_prefix(SIGNALS, wip_signal, prefix="wip_")

for signal in signal_names:
if signal.endswith(SMOOTHED):
# export each geographic region, with both smoothed and unsmoothed data
export_csv(df_state, STATE, signal, smooth=True,
start_date=start_date, receiving_dir=export_dir)
export_csv(df_dma, DMA, signal, smooth=True,
start_date=start_date, receiving_dir=export_dir)
export_csv(df_hrr, HRR, signal, smooth=True,
start_date=start_date, receiving_dir=export_dir)
export_csv(df_msa, MSA, signal, smooth=True,
start_date = start_date, receiving_dir=export_dir)
elif signal.endswith(RAW):
export_csv(df_state, STATE, signal, smooth=False,
start_date=start_date, receiving_dir=export_dir)
export_csv(df_dma, DMA, signal, smooth=False,
start_date=start_date, receiving_dir=export_dir)
export_csv(df_hrr, HRR, signal, smooth=False,
start_date=start_date, receiving_dir=export_dir)
export_csv(df_msa, MSA, signal, smooth=False,
start_date=start_date, receiving_dir=export_dir)
is_smoothed = signal.endswith(SMOOTHED)
for geo_res, df in df_by_geo_res.items():
create_export_csv(format_for_export(df, is_smoothed),
geo_res=geo_res,
sensor=signal,
start_date=start_date,
export_dir=export_dir)

if not params["test"]:
# Diff exports, and make incremental versions
Expand Down
61 changes: 0 additions & 61 deletions google_health/tests/test_export.py

This file was deleted.