|
12 | 12 |
|
13 | 13 | def filter_contradicting_missing_codes(df, sensor, metric, date, logger=None):
|
14 | 14 | """Find values with contradictory missingness codes, filter them, and log."""
|
15 |
| - val_contradictory_missing_mask = ( |
16 |
| - (df["val"].isna() & df["missing_val"].eq(Nans.NOT_MISSING)) |
17 |
| - | |
18 |
| - (df["val"].notna() & df["missing_val"].ne(Nans.NOT_MISSING)) |
19 |
| - ) |
20 |
| - se_contradictory_missing_mask = ( |
21 |
| - (df["se"].isna() & df["missing_se"].eq(Nans.NOT_MISSING)) |
22 |
| - | |
23 |
| - (df["se"].notna() & df["missing_se"].ne(Nans.NOT_MISSING)) |
24 |
| - ) |
25 |
| - sample_size_contradictory_missing_mask = ( |
26 |
| - (df["sample_size"].isna() & df["missing_sample_size"].eq(Nans.NOT_MISSING)) |
27 |
| - | |
28 |
| - (df["sample_size"].notna() & df["missing_sample_size"].ne(Nans.NOT_MISSING)) |
29 |
| - ) |
30 |
| - if df.loc[val_contradictory_missing_mask].size > 0: |
31 |
| - if not logger is None: |
| 15 | + columns = ["val", "se", "sample_size"] |
| 16 | + # Get indicies where the XNOR is true (i.e. both are true or both are false). |
| 17 | + masks = [ |
| 18 | + ~(df[column].isna() ^ df["missing_" + column].eq(Nans.NOT_MISSING)) |
| 19 | + for column in columns |
| 20 | + ] |
| 21 | + for mask in masks: |
| 22 | + if not logger is None and df.loc[mask].size > 0: |
32 | 23 | logger.info(
|
33 | 24 | "Filtering contradictory missing code in " +
|
34 | 25 | "{0}_{1}_{2}.".format(sensor, metric, date.strftime(format="%Y-%m-%d"))
|
35 | 26 | )
|
36 |
| - df = df.loc[~val_contradictory_missing_mask] |
37 |
| - if df.loc[se_contradictory_missing_mask].size > 0: |
38 |
| - if not logger is None: |
39 |
| - logger.info( |
40 |
| - "Filtering contradictory missing code in " + |
41 |
| - "{0}_{1}_{2}.".format(sensor, metric, date.strftime(format="%Y-%m-%d")) |
42 |
| - ) |
43 |
| - df = df.loc[~se_contradictory_missing_mask] |
44 |
| - if df.loc[sample_size_contradictory_missing_mask].size > 0: |
45 |
| - if not logger is None: |
46 |
| - logger.info( |
47 |
| - "Filtering contradictory missing code in " + |
48 |
| - "{0}_{1}_{2}.".format(sensor, metric, date.strftime(format="%Y-%m-%d")) |
49 |
| - ) |
50 |
| - df = df.loc[~se_contradictory_missing_mask] |
| 27 | + df = df.loc[~mask] |
| 28 | + elif logger is None and df.loc[mask].size > 0: |
| 29 | + df = df.loc[~mask] |
51 | 30 | return df
|
52 | 31 |
|
53 | 32 | def create_export_csv(
|
|
0 commit comments