Skip to content

Commit bcf412d

Browse files
authored
Merge pull request #463 from cmu-delphi/change-lint
Fix linting on CHC
2 parents a471262 + 5999c25 commit bcf412d

File tree

3 files changed

+32
-42
lines changed

3 files changed

+32
-42
lines changed

changehc/delphi_changehc/sensor.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,15 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"):
111111
112112
"""
113113
# backfill
114-
total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values)
114+
total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values,
115+
y_data[den_col].values)
115116

116117
# calculate smoothed counts and jeffreys rate
117118
# the left_gauss_linear smoother is not guaranteed to return values greater than 0
118119

119-
smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits)
120+
smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(
121+
total_counts.flatten(), total_visits
122+
)
120123

121124
# in smoothing, the numerator may have become more than the denominator
122125
# simple fix is to clip the max values elementwise to the denominator (note that
@@ -136,12 +139,18 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"):
136139
), f"0 or negative value, {geo_id}"
137140

138141
# cut off at sensor indexes
139-
rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits}, index=y_data.index)
142+
rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits},
143+
index=y_data.index)
140144
rate_data = rate_data[first_sensor_date:]
141145
include = rate_data['den'] >= Config.MIN_DEN
142146
valid_rates = rate_data[include]
143147
se_valid = valid_rates.eval('sqrt(rate * (1 - rate) / den)')
144148
rate_data['se'] = se_valid
145149

146-
logging.debug(f"{geo_id}: {rate_data['rate'][-1]:.3f},[{rate_data['se'][-1]:.3f}]")
147-
return {"geo_id": geo_id, "rate": 100 * rate_data['rate'], "se": 100 * rate_data['se'], "incl": include}
150+
logging.debug("{0}: {1:.3f},[{2:.3f}]".format(
151+
geo_id, rate_data['rate'][-1], rate_data['se'][-1]
152+
))
153+
return {"geo_id": geo_id,
154+
"rate": 100 * rate_data['rate'],
155+
"se": 100 * rate_data['se'],
156+
"incl": include}

changehc/delphi_changehc/update_sensor.py

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@
66
# standard packages
77
import logging
88
from multiprocessing import Pool, cpu_count
9-
from delphi_utils import GeoMapper, S3ArchiveDiffer, read_params, add_prefix
109

1110
# third party
1211
import numpy as np
1312
import pandas as pd
13+
from delphi_utils import GeoMapper, read_params, add_prefix
14+
1415
# first party
1516
from .config import Config, Constants
17+
from .constants import SIGNALS, SMOOTHED, SMOOTHED_ADJ, NA
1618
from .load_data import load_combined_data
1719
from .sensor import CHCSensor
1820
from .weekday import Weekday
19-
from .constants import SIGNALS, SMOOTHED, SMOOTHED_ADJ, NA
2021

2122

2223
def write_to_csv(output_dict, write_se, out_name, output_path="."):
@@ -28,7 +29,7 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."):
2829
output_path: outfile path to write the csv (default is current directory)
2930
"""
3031
if write_se:
31-
logging.info(f"========= WARNING: WRITING SEs TO {out_name} =========")
32+
logging.info("========= WARNING: WRITING SEs TO {0} =========".format(out_name))
3233
geo_level = output_dict["geo_level"]
3334
dates = output_dict["dates"]
3435
geo_ids = output_dict["geo_ids"]
@@ -52,7 +53,9 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."):
5253
assert not np.isnan(sensor), "value for included sensor is nan"
5354
assert not np.isnan(se), "se for included sensor is nan"
5455
if sensor > 90:
55-
logging.warning(f"value suspiciously high, {geo_id}: {sensor}")
56+
logging.warning("value suspiciously high, {0}: {1}".format(
57+
geo_id, sensor
58+
))
5659
assert se < 5, f"se suspiciously high, {geo_id}: {se}"
5760
if write_se:
5861
assert sensor > 0 and se > 0, "p=0, std_err=0 invalid"
@@ -64,10 +67,12 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."):
6467
"%s,%f,%s,%s,%s\n" % (geo_id, sensor, NA, NA, NA)
6568
)
6669
out_n += 1
67-
logging.debug(f"wrote {out_n} rows for {len(geo_ids)} {geo_level}")
70+
logging.debug("wrote {0} rows for {1} {2}".format(
71+
out_n, len(geo_ids), geo_level
72+
))
6873

6974

70-
class CHCSensorUpdator:
75+
class CHCSensorUpdator: # pylint: disable=too-many-instance-attributes
7176
"""Contains methods to update sensor and write results to csv
7277
"""
7378

@@ -136,7 +141,9 @@ def geo_reindex(self, data):
136141
geo = self.geo
137142
gmpr = GeoMapper()
138143
if geo not in {"county", "state", "msa", "hrr"}:
139-
logging.error(f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr'")
144+
logging.error("{0} is invalid, pick one of 'county', 'state', 'msa', 'hrr'".format(
145+
geo
146+
))
140147
return False
141148
if geo == "county":
142149
data_frame = gmpr.fips_to_megacounty(data,
@@ -203,7 +210,7 @@ def update_sensor(self,
203210
sensor_include[geo_id] = np.array(res.loc[final_sensor_idxs,"incl"])
204211
else:
205212
n_cpu = min(10, cpu_count())
206-
logging.debug(f"starting pool with {n_cpu} workers")
213+
logging.debug("starting pool with {0} workers".format(n_cpu))
207214
with Pool(n_cpu) as pool:
208215
pool_results = []
209216
for geo_id, sub_data in data_frame.groupby(level=0,as_index=False):
@@ -235,30 +242,4 @@ def update_sensor(self,
235242
# write out results
236243
for signal in self.updated_signal_names:
237244
write_to_csv(output_dict, self.se, signal, outpath)
238-
logging.debug(f"wrote files to {outpath}")
239-
'''
240-
params = read_params()
241-
242-
arch_diff = S3ArchiveDiffer(
243-
params["cache_dir"],
244-
params["export_dir"],
245-
params["bucket_name"], "chc",
246-
params["aws_credentials"])
247-
arch_diff.update_cache()
248-
249-
_, common_diffs, new_files = arch_diff.diff_exports()
250-
251-
# Archive changed and new files only
252-
to_archive = [f for f, diff in common_diffs.items() if diff is not None]
253-
to_archive += new_files
254-
_, fails = arch_diff.archive_exports(to_archive)
255-
print(fails)
256-
257-
# Filter existing exports to exclude those that failed to archive
258-
succ_common_diffs = {f: diff for f, diff in common_diffs.items() if f not in fails}
259-
arch_diff.filter_exports(succ_common_diffs)
260-
261-
# Report failures: someone should probably look at them
262-
for exported_file in fails:
263-
print(f"Failed to archive '{exported_file}'")
264-
'''
245+
logging.debug("wrote files to {0}".format(outpath))

changehc/delphi_changehc/weekday.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class Weekday:
1818

1919
@staticmethod
2020
def get_params(data):
21-
"""Correct a signal estimated as numerator/denominator for weekday effects.
21+
r"""Correct a signal estimated as numerator/denominator for weekday effects.
2222
2323
The ordinary estimate would be numerator_t/denominator_t for each time point
2424
t. Instead, model
@@ -63,7 +63,7 @@ def get_params(data):
6363

6464
# Construct design matrix to have weekday indicator columns and then day
6565
# indicators.
66-
X = np.zeros((nums.shape[0], 6 + nums.shape[0]))
66+
X = np.zeros((nums.shape[0], 6 + nums.shape[0])) # pylint: disable=invalid-name
6767
not_sunday = np.where(nums.index.dayofweek != 6)[0]
6868
X[not_sunday, np.array(nums.index.dayofweek)[not_sunday]] = 1
6969
X[np.where(nums.index.dayofweek == 6)[0], :6] = -1

0 commit comments

Comments
 (0)