|
65 | 65 | ]
|
66 | 66 |
|
67 | 67 |
|
| 68 | +def add_nancodes(df, smoother): |
| 69 | + """Add nancodes to the dataframe.""" |
| 70 | + idx = pd.IndexSlice |
| 71 | + |
| 72 | + # Default nancodes |
| 73 | + df["missing_val"] = Nans.NOT_MISSING |
| 74 | + df["missing_se"] = Nans.NOT_APPLICABLE |
| 75 | + df["missing_sample_size"] = Nans.NOT_APPLICABLE |
| 76 | + |
| 77 | + # Mark early smoothing entries as data insufficient |
| 78 | + if smoother == "seven_day_average": |
| 79 | + df.sort_index(inplace=True) |
| 80 | + min_time_value = df.index.min()[0] + 6 * pd.Timedelta(days=1) |
| 81 | + df.loc[idx[:min_time_value, :], "missing_val"] = Nans.DATA_INSUFFICIENT |
| 82 | + |
| 83 | + # Mark any remaining nans with unknown |
| 84 | + remaining_nans_mask = df["val"].isnull() & (df["missing_val"] == Nans.NOT_MISSING) |
| 85 | + df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN |
| 86 | + return df |
| 87 | + |
| 88 | + |
68 | 89 | def run_module(params: Dict[str, Dict[str, Any]]):
|
69 | 90 | """Run the usafacts indicator.
|
70 | 91 |
|
@@ -114,30 +135,16 @@ def run_module(params: Dict[str, Dict[str, Any]]):
|
114 | 135 | # Aggregate to appropriate geographic resolution
|
115 | 136 | df = geo_map(df, geo_res, sensor)
|
116 | 137 | df.set_index(["timestamp", "geo_id"], inplace=True)
|
117 |
| - idx = pd.IndexSlice |
118 | 138 |
|
119 | 139 | # Smooth
|
120 |
| - smooth_obj, smoother_prefix, is_smooth_wip, smoother_lag = SMOOTHERS_MAP[smoother] |
| 140 | + smooth_obj, smoother_prefix, _, smoother_lag = SMOOTHERS_MAP[smoother] |
121 | 141 | df["val"] = df[sensor].groupby(level=1).transform(smooth_obj.smooth)
|
122 | 142 |
|
123 | 143 | # USAFacts is not a survey indicator
|
124 | 144 | df["se"] = np.nan
|
125 | 145 | df["sample_size"] = np.nan
|
126 | 146 |
|
127 |
| - # Default missing code |
128 |
| - df["missing_val"] = Nans.NOT_MISSING |
129 |
| - df["missing_se"] = Nans.NOT_APPLICABLE |
130 |
| - df["missing_sample_size"] = Nans.NOT_APPLICABLE |
131 |
| - |
132 |
| - # Mark early smoothing entries as data insufficient |
133 |
| - if smoother == "seven_day_average": |
134 |
| - df.sort_index(inplace=True) |
135 |
| - min_time_value = df.index.min()[0] + 6 * pd.Timedelta(days=1) |
136 |
| - df.loc[idx[:min_time_value, :], "missing_val"] = Nans.DATA_INSUFFICIENT |
137 |
| - |
138 |
| - # Mark any remaining nans with unknown |
139 |
| - remaining_nans_mask = df["val"].isnull() & (df["missing_val"] == Nans.NOT_MISSING) |
140 |
| - df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN |
| 147 | + df = add_nancodes(df, smoother) |
141 | 148 |
|
142 | 149 | df.reset_index(inplace=True)
|
143 | 150 | sensor_name = SENSOR_NAME_MAP[sensor][0]
|
|
0 commit comments