Skip to content

Commit e803593

Browse files
authored
Merge pull request #700 from benjaminysmith/even_more_logging
Add logging to google health and symptoms.
2 parents 3a53bc1 + fdb7bd4 commit e803593

File tree

4 files changed

+71
-11
lines changed

4 files changed

+71
-11
lines changed

ansible/templates/google_health-params-prod.json.j2

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@
1313
},
1414
"bucket_name": "delphi-covidcast-indicator-output",
1515
"test": false,
16-
"test_data_dir": ""
16+
"test_data_dir": "",
17+
"log_filename": "/var/log/indicators/google_health.log"
1718
}

ansible/templates/google_symptoms-params-prod.json.j2

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
"static_file_dir": "./static",
44
"export_dir": "/common/covidcast/receiving/google-symptoms",
55
"cache_dir": "./cache",
6-
"base_url": "https://raw.githubusercontent.com/google-research/open-covid-19-data/master/data/exports/search_trends_symptoms_dataset/United%20States%20of%20America{sub_url}2020_US_{state}daily_symptoms_dataset.csv"
6+
"base_url": "https://raw.githubusercontent.com/google-research/open-covid-19-data/master/data/exports/search_trends_symptoms_dataset/United%20States%20of%20America{sub_url}2020_US_{state}daily_symptoms_dataset.csv",
7+
"log_filename": "/var/log/indicators/google_symptoms.log"
78
}

google_health/delphi_google_health/run.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,16 @@
77

88
import datetime
99
import logging
10+
import time
1011

1112
import pandas as pd
1213

1314
from delphi_utils import (
1415
read_params,
1516
S3ArchiveDiffer,
1617
add_prefix,
17-
create_export_csv
18+
create_export_csv,
19+
get_structured_logger
1820
)
1921

2022
from .data_tools import format_for_export
@@ -32,7 +34,10 @@ def run_module():
3234
the directory defined by the `export_dir` (should be "receiving" expect for
3335
testing purposes).
3436
"""
35-
37+
start_time = time.time()
38+
csv_export_count = 0
39+
oldest_final_export_date = None
40+
3641
# read parameters
3742
params = read_params()
3843
ght_key = params["ght_key"]
@@ -44,6 +49,8 @@ def run_module():
4449
wip_signal = params["wip_signal"]
4550
cache_dir = params["cache_dir"]
4651

52+
logger = get_structured_logger(__name__, filename = params.get("log_filename"))
53+
4754
arch_diff = S3ArchiveDiffer(
4855
cache_dir, export_dir,
4956
params["bucket_name"], "ght",
@@ -95,11 +102,19 @@ def run_module():
95102
for signal in signal_names:
96103
is_smoothed = signal.endswith(SMOOTHED)
97104
for geo_res, df in df_by_geo_res.items():
98-
create_export_csv(format_for_export(df, is_smoothed),
99-
geo_res=geo_res,
100-
sensor=signal,
101-
start_date=start_date,
102-
export_dir=export_dir)
105+
exported_csv_dates = create_export_csv(
106+
format_for_export(df, is_smoothed),
107+
geo_res=geo_res,
108+
sensor=signal,
109+
start_date=start_date,
110+
export_dir=export_dir)
111+
112+
if not exported_csv_dates.empty:
113+
csv_export_count += exported_csv_dates.size
114+
if not oldest_final_export_date:
115+
oldest_final_export_date = max(exported_csv_dates)
116+
oldest_final_export_date = min(
117+
oldest_final_export_date, max(exported_csv_dates))
103118

104119
if not params["test"]:
105120
# Diff exports, and make incremental versions
@@ -117,3 +132,15 @@ def run_module():
117132
# Report failures: someone should probably look at them
118133
for exported_file in fails:
119134
print(f"Failed to archive '{exported_file}'")
135+
136+
elapsed_time_in_seconds = round(time.time() - start_time, 2)
137+
max_lag_in_days = None
138+
formatted_oldest_final_export_date = None
139+
if oldest_final_export_date:
140+
max_lag_in_days = (datetime.datetime.now() - oldest_final_export_date).days
141+
formatted_oldest_final_export_date = oldest_final_export_date.strftime("%Y-%m-%d")
142+
logger.info("Completed indicator run",
143+
elapsed_time_in_seconds = elapsed_time_in_seconds,
144+
csv_export_count = csv_export_count,
145+
max_lag_in_days = max_lag_in_days,
146+
oldest_final_export_date = formatted_oldest_final_export_date)

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@
88
from itertools import product
99

1010
import numpy as np
11-
from delphi_utils import read_params, create_export_csv, geomap
11+
import time
12+
from delphi_utils import (
13+
read_params,
14+
create_export_csv,
15+
geomap,
16+
get_structured_logger
17+
)
1218

1319
from .pull import pull_gs_data
1420
from .geo import geo_map
@@ -18,11 +24,17 @@
1824

1925
def run_module():
2026
"""Run Google Symptoms module."""
27+
start_time = time.time()
28+
csv_export_count = 0
29+
oldest_final_export_date = None
30+
2131
params = read_params()
2232
export_start_date = datetime.strptime(params["export_start_date"], "%Y-%m-%d")
2333
export_dir = params["export_dir"]
2434
base_url = params["base_url"]
2535

36+
logger = get_structured_logger(__name__, filename = params.get("log_filename"))
37+
2638
# Pull GS data
2739
dfs = pull_gs_data(base_url)
2840
gmpr = geomap.GeoMapper()
@@ -47,10 +59,29 @@ def run_module():
4759
df = df.loc[~df["val"].isnull(), :]
4860
df = df.reset_index()
4961
sensor_name = "_".join([smoother, "search"])
50-
create_export_csv(
62+
exported_csv_dates = create_export_csv(
5163
df,
5264
export_dir=export_dir,
5365
start_date=SMOOTHERS_MAP[smoother][1](export_start_date),
5466
metric=metric.lower(),
5567
geo_res=geo_res,
5668
sensor=sensor_name)
69+
70+
if not exported_csv_dates.empty:
71+
csv_export_count += exported_csv_dates.size
72+
if not oldest_final_export_date:
73+
oldest_final_export_date = max(exported_csv_dates)
74+
oldest_final_export_date = min(
75+
oldest_final_export_date, max(exported_csv_dates))
76+
77+
elapsed_time_in_seconds = round(time.time() - start_time, 2)
78+
max_lag_in_days = None
79+
formatted_oldest_final_export_date = None
80+
if oldest_final_export_date:
81+
max_lag_in_days = (datetime.now() - oldest_final_export_date).days
82+
formatted_oldest_final_export_date = oldest_final_export_date.strftime("%Y-%m-%d")
83+
logger.info("Completed indicator run",
84+
elapsed_time_in_seconds = elapsed_time_in_seconds,
85+
csv_export_count = csv_export_count,
86+
max_lag_in_days = max_lag_in_days,
87+
oldest_final_export_date = formatted_oldest_final_export_date)

0 commit comments

Comments
 (0)