cmu-delphi · dshemetov · Jun 26, 2023 · Jun 21, 2023 · Jun 21, 2023 · Jun 21, 2023
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,22 @@
+# EditorConfig helps developers define and maintain consistent
+# coding styles between different editors and IDEs
+# editorconfig.org
+
+root = true
+
+[*]
+# We recommend you to keep these unchanged
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+
+[*.py]
+# Change these settings to your own preference
+indent_style = space
+indent_size = 4
+
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -0,0 +1,24 @@
+# style(black): format cdc acquisition
+980b0b7e80c7923b79e14fee620645e680785703
+# style(black): format covidcast_nowcast acquisition
+9e6ff16f599e8feec34a08dd1bddbc5eae347b55
+# style(black): format ecdc acquisition
+d1141d904da4e62992b97c92d5caebd8fadffd42
+# style(black): format flusurv acquisition
+08af0f6b7bff85bbc2b193b63b5abf6a16ba03e4
+# style(black): format fluview acquisition
+0133ef2042c4df8867e91595eb1f64873edb4632
+# style(black): format ght acquisition
+b8900a0bc846888885310911efd6e26459effa99
+# style(black): format kcdc acquisition
+a849384c884934b3b7c3c67b68aa6240277d6b6d
+# style(black): format nidss acquisition
+d04af3c02fda7708a16bec0952b1aa7475acaec7
+# style(black): format paho acquisition
+7f60fbba572c1b6e5153a9ef216895bdc2f7f5b3
+# style(black): format quidel acquisition
+b9ceb400d9248c8271e8342275664ac5524e335d
+# style(black): format twitter acquisition
+07ed83e5768f717ab0f9a62a9209e4e2cffa058d
+# style(black): format wiki acquisition
+923852eafa86b8f8b182d499489249ba8f815843
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,26 @@
-
 [tool.black]
-line-length = 200
+line-length = 100
 target-version = ['py38']
 include = 'server,tests/server'
+
+[tool.pylint]
+    [tool.pylint.'MESSAGES CONTROL']
+    max-line-length = 100
+    disable = [
+        'logging-format-interpolation',
+        # Allow pytest functions to be part of a class
+        'no-self-use',
+        'too-many-locals',
+        'too-many-arguments',
+        # Allow pytest classes to have one test
+        'too-few-public-methods',
+    ]
+
+    [tool.pylint.'BASIC']
+    # Allow arbitrarily short-named variables.
+    variable-rgx = ['[a-z_][a-z0-9_]*']
+    argument-rgx = [ '[a-z_][a-z0-9_]*' ]
+    attr-rgx = ['[a-z_][a-z0-9_]*']
+
+    [tool.pylint.'DESIGN']
+    ignored-argument-names = ['(_.*|run_as_module)']
diff --git a/src/acquisition/cdcp/cdc_dropbox_receiver.py b/src/acquisition/cdcp/cdc_dropbox_receiver.py
@@ -29,128 +29,128 @@
 
 
 # location constants
-DROPBOX_BASE_DIR = '/cdc_page_stats'
-DELPHI_BASE_DIR = '/common/cdc_stage'
+DROPBOX_BASE_DIR = "/cdc_page_stats"
+DELPHI_BASE_DIR = "/common/cdc_stage"
 
 
 def get_timestamp_string():
-  """
-  Return the current local date and time as a string.
+    """
+    Return the current local date and time as a string.
 
-  The format is "%Y%m%d_%H%M%S".
-  """
-  return datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+    The format is "%Y%m%d_%H%M%S".
+    """
+    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
 def trigger_further_processing():
-  """Add CDCP processing scripts to the Automation run queue."""
+    """Add CDCP processing scripts to the Automation run queue."""
 
-  # connect
-  u, p = secrets.db.auto
-  cnx = mysql.connector.connect(user=u, password=p, database='automation')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.auto
+    cnx = mysql.connector.connect(user=u, password=p, database="automation")
+    cur = cnx.cursor()
 
-  # add step "Process CDCP Data" to queue
-  cur.execute('CALL automation.RunStep(46)')
+    # add step "Process CDCP Data" to queue
+    cur.execute("CALL automation.RunStep(46)")
 
-  # disconnect
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    # disconnect
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def fetch_data():
-  """
-  Check for new files on dropbox, download them, zip them, cleanup dropbox, and
-  trigger further processing of new data.
-  """
-
-  # initialize dropbox api
-  dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
-
-  # look for new CDC data files
-  print('checking dropbox:%s' % DROPBOX_BASE_DIR)
-  save_list = []
-  for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
-    name = entry.name
-    if name.endswith('.csv') or name.endswith('.zip'):
-      print(' download "%s"' % name)
-      save_list.append(name)
-    else:
-      print(' skip "%s"' % name)
-
-  # determine if there's anything to be done
-  if len(save_list) == 0:
-    print('did not find any new data files')
-    return
-
-  # download new files, saving them inside of a new zip file
-  timestamp = get_timestamp_string()
-  zip_path = '%s/dropbox_%s.zip' % (DELPHI_BASE_DIR, timestamp)
-  print('downloading into delphi:%s' % zip_path)
-  with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
+    """
+    Check for new files on dropbox, download them, zip them, cleanup dropbox, and
+    trigger further processing of new data.
+    """
+
+    # initialize dropbox api
+    dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
+
+    # look for new CDC data files
+    print(f"checking dropbox: {DROPBOX_BASE_DIR}")
+    save_list = []
+    for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
+        name = entry.name
+        if name.endswith(".csv") or name.endswith(".zip"):
+            print(f" download: {name}")
+            save_list.append(name)
+        else:
+            print(f" skip: {name}")
+
+    # determine if there's anything to be done
+    if len(save_list) == 0:
+        print("did not find any new data files")
+        return
+
+    # download new files, saving them inside of a new zip file
+    timestamp = get_timestamp_string()
+    zip_path = f"{DELPHI_BASE_DIR}/dropbox_{timestamp}.zip"
+    print(f"downloading into delphi:{zip_path}")
+    with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
+        for name in save_list:
+            # location of the file on dropbox
+            dropbox_path = f"{DROPBOX_BASE_DIR}/{name}"
+            print(f" {dropbox_path}")
+
+            # start the download
+            meta, resp = dbx.files_download(dropbox_path)
+
+            # check status and length
+            if resp.status_code != 200:
+                raise Exception(["resp.status_code", resp.status_code])
+            dropbox_len = meta.size
+            print(f"  need {int(dropbox_len)} bytes...")
+            content_len = int(resp.headers.get("Content-Length", -1))
+            if dropbox_len != content_len:
+                info = ["dropbox_len", dropbox_len, "content_len", content_len]
+                raise Exception(info)
+
+            # finish the download, holding the data in this variable
+            filedata = resp.content
+
+            # check the length again
+            payload_len = len(filedata)
+            print("  downloaded")
+            if dropbox_len != payload_len:
+                info = ["dropbox_len", dropbox_len, "payload_len", payload_len]
+                raise Exception(info)
+
+            # add the downloaded file to the zip file
+            zf.writestr(name, filedata)
+            print("  added")
+
+    # At this point, all the data is stored and awaiting further processing on
+    # the delphi server.
+    print(f"saved all new data in {zip_path}")
+
+    # on dropbox, archive downloaded files so they won't be downloaded again
+    archive_dir = f"archived_reports/processed_{timestamp}"
+    print("archiving files...")
     for name in save_list:
-      # location of the file on dropbox
-      dropbox_path = '%s/%s' % (DROPBOX_BASE_DIR, name)
-      print(' %s' % dropbox_path)
-
-      # start the download
-      meta, resp = dbx.files_download(dropbox_path)
-
-      # check status and length
-      if resp.status_code != 200:
-        raise Exception(['resp.status_code', resp.status_code])
-      dropbox_len = meta.size
-      print('  need %d bytes...' % dropbox_len)
-      content_len = int(resp.headers.get('Content-Length', -1))
-      if dropbox_len != content_len:
-        info = ['dropbox_len', dropbox_len, 'content_len', content_len]
-        raise Exception(info)
-
-      # finish the download, holding the data in this variable
-      filedata = resp.content
-
-      # check the length again
-      payload_len = len(filedata)
-      print('  downloaded')
-      if dropbox_len != payload_len:
-        info = ['dropbox_len', dropbox_len, 'payload_len', payload_len]
-        raise Exception(info)
-
-      # add the downloaded file to the zip file
-      zf.writestr(name, filedata)
-      print('  added')
-
-  # At this point, all the data is stored and awaiting further processing on
-  # the delphi server.
-  print('saved all new data in %s' % zip_path)
-
-  # on dropbox, archive downloaded files so they won't be downloaded again
-  archive_dir = 'archived_reports/processed_%s' % timestamp
-  print('archiving files...')
-  for name in save_list:
-    # source and destination
-    dropbox_src = '%s/%s' % (DROPBOX_BASE_DIR, name)
-    dropbox_dst = '%s/%s/%s' % (DROPBOX_BASE_DIR, archive_dir, name)
-    print(' "%s" -> "%s"' % (dropbox_src, dropbox_dst))
-
-    # move the file
-    meta = dbx.files_move(dropbox_src, dropbox_dst)
-
-    # sanity check
-    if archive_dir not in meta.path_lower:
-      raise Exception('failed to move "%s"' % name)
-
-  # finally, trigger the usual processing flow
-  print('triggering processing flow')
-  trigger_further_processing()
-  print('done')
+        # source and destination
+        dropbox_src = f"{DROPBOX_BASE_DIR}/{name}"
+        dropbox_dst = f"{DROPBOX_BASE_DIR}/{archive_dir}/{name}"
+        print(f" {dropbox_src} -> {dropbox_dst}")
+
+        # move the file
+        meta = dbx.files_move(dropbox_src, dropbox_dst)
+
+        # sanity check
+        if archive_dir not in meta.path_lower:
+            raise Exception(f"failed to move {name}")
+
+    # finally, trigger the usual processing flow
+    print("triggering processing flow")
+    trigger_further_processing()
+    print("done")
 
 
 def main():
-  # fetch new data
-  fetch_data()
+    # fetch new data
+    fetch_data()
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()