|
11 | 11 |
|
12 | 12 | from delphi_utils import (
|
13 | 13 | create_export_csv,
|
14 |
| - S3ArchiveDiffer, |
15 | 14 | get_structured_logger
|
16 | 15 | )
|
17 | 16 |
|
@@ -45,17 +44,6 @@ def run_module(params):
|
45 | 44 | export_dir = params["common"]["export_dir"]
|
46 | 45 | parquet_url = params["indicator"]["parquet_url"]
|
47 | 46 |
|
48 |
| - # Archive Differ configuration |
49 |
| - if "archive" in params: |
50 |
| - cache_dir = params["archive"]["cache_dir"] |
51 |
| - arch_diff = S3ArchiveDiffer( |
52 |
| - cache_dir, export_dir, |
53 |
| - params["archive"]["bucket_name"], "CAN", |
54 |
| - params["archive"]["aws_credentials"]) |
55 |
| - arch_diff.update_cache() |
56 |
| - else: |
57 |
| - arch_diff = None |
58 |
| - |
59 | 47 | # Load CAN county-level testing data
|
60 | 48 | print("Pulling CAN data")
|
61 | 49 | df_pq = load_data(parquet_url)
|
@@ -93,26 +81,6 @@ def run_module(params):
|
93 | 81 | num_exported_files += exported_csv_dates.size * 2
|
94 | 82 | print(f"Exported dates: {earliest} to {latest}")
|
95 | 83 |
|
96 |
| - # Perform archive differencing |
97 |
| - if not arch_diff is None: |
98 |
| - # Diff exports, and make incremental versions |
99 |
| - _, common_diffs, new_files = arch_diff.diff_exports() |
100 |
| - |
101 |
| - # Archive changed and new files only |
102 |
| - to_archive = [f for f, diff in common_diffs.items() if diff is not None] |
103 |
| - to_archive += new_files |
104 |
| - _, fails = arch_diff.archive_exports(to_archive) |
105 |
| - |
106 |
| - # Filter existing exports to exclude those that failed to archive |
107 |
| - succ_common_diffs = { |
108 |
| - f: diff for f, diff in common_diffs.items() if f not in fails |
109 |
| - } |
110 |
| - arch_diff.filter_exports(succ_common_diffs) |
111 |
| - |
112 |
| - # Report failures: someone should probably look at them |
113 |
| - for exported_file in fails: |
114 |
| - print(f"Failed to archive '{exported_file}'") |
115 |
| - |
116 | 84 | elapsed_time_in_seconds = round(time.time() - start_time, 2)
|
117 | 85 | max_lag_in_days = (datetime.now() - min(max_dates_exported)).days
|
118 | 86 | logger.info("Completed indicator run",
|
|
0 commit comments