cmu-delphi · krivard · Feb 4, 2022 · Feb 2, 2022 · Feb 2, 2022 · Feb 3, 2022
diff --git a/_delphi_utils_python/data_proc/geomap/README.md b/_delphi_utils_python/data_proc/geomap/README.md
@@ -30,7 +30,7 @@ We support the following geocodes.
 
 The source files are requested from a government URL when `geo_data_proc.py` is run (see the top of said script for the URLs). Below we describe the locations to find updated versions of the source files, if they are ever needed.
 
-- ZIP -> FIPS (county) population tables available from [US Census](https://www.census.gov/geographies/reference-files/time-series/geo/relationship-files.html#par_textimage_674173622). This file contains the population of the intersections between ZIP and FIPS regions, allowing the creation of a population-weighted transform between the two. 24 ZIPs did not have population number information associated to them, so we filled those values manually using information available in  [zipdatamaps website](www.zipdatamaps.com).
+- ZIP -> FIPS (county) population tables available from [US Census](https://www.census.gov/geographies/reference-files/time-series/geo/relationship-files.html#par_textimage_674173622). This file contains the population of the intersections between ZIP and FIPS regions, allowing the creation of a population-weighted transform between the two. As of 4 February 2022, this source did not include population information for 24 ZIPs that appear in our indicators. We have added those values manually using information available from the [zipdatamaps website](www.zipdatamaps.com).
 - ZIP -> HRR -> HSA crosswalk file comes from the 2018 version at the [Dartmouth Atlas Project](https://atlasdata.dartmouth.edu/static/supp_research_data).
 - FIPS -> MSA crosswalk file comes from the September 2018 version of the delineation files at the [US Census Bureau](https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/delineation-files.html).
 - State Code -> State ID -> State Name comes from the ANSI standard at the [US Census](https://www.census.gov/library/reference/code-lists/ansi.html#par_textimage_3). The first two digits of a FIPS codes should match the state code here.

diff --git a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py
@@ -32,6 +32,7 @@
 FIPS_POPULATION_URL = f"https://www2.census.gov/programs-surveys/popest/datasets/2010-{YEAR}/counties/totals/co-est{YEAR}-alldata.csv"
 FIPS_PUERTO_RICO_POPULATION_URL = "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt?"
 STATE_HHS_FILE = "hhs.txt"
+ZIP_POP_MISSING_FILE = "zip_pop_filling.csv"
 
 # Out files
 FIPS_STATE_OUT_FILENAME = "fips_state_table.csv"
@@ -365,20 +366,13 @@ def derive_zip_population_table():
     df = census_pop.merge(fz_df, on="fips", how="left")
     df["pop"] = df["pop"].multiply(df["weight"], axis=0)
     df = df.drop(columns=["fips", "weight"]).groupby("zip").sum().dropna().reset_index()
-    ## filling population NAs for specific zips on zip_pop_missing Issue #0648
-    ## cheking if each zip still missing, and concatenating if True
-
-    zip_pop_missing = pd.DataFrame(
-        {
-            "zip": ['57756', '57764', '57770', '57772', '57794', '99554', '99563', '99566',
-                    '99573', '99574', '99581', '99585', '99586', '99604', '99620', '99632',
-                    '99650', '99657', '99658', '99662', '99666', '99677', '99686', '99693'],
-            "pop": [1126, 1923, 5271, 2048, 644, 677, 938, 192,
-                    1115, 2348, 762, 417, 605, 1093, 577, 813,
-                    568, 329, 329, 480, 189, 88, 4005, 248]
-        }
-    )
 
+    ## loading populatoin of some zips- #Issue 0648
+    zip_pop_missing = pd.read_csv(
+        ZIP_POP_MISSING_FILE,sep=",",
+        dtype={"zip":str,"pop":np.int32}
+        )
+    ## cheking if each zip still missing, and concatenating if True
     for x_zip in zip_pop_missing['zip']:
         if x_zip not in df['zip']:
             df = pd.concat([df, zip_pop_missing[zip_pop_missing['zip'] == x_zip]],

diff --git a/_delphi_utils_python/data_proc/geomap/zip_pop_filling.csv b/_delphi_utils_python/data_proc/geomap/zip_pop_filling.csv
@@ -0,0 +1,25 @@
+zip,pop
+57756,1126
+57764,1923
+57770,5271
+57772,2048
+57794,644
+99554,677
+99563,938
+99566,192
+99573,1115
+99574,2348
+99581,762
+99585,417
+99586,605
+99604,1093
+99620,577
+99632,813
+99650,568
+99657,329
+99658,616
+99662,480
+99666,189
+99677,88
+99686,4005
+99693,248
diff --git a/_delphi_utils_python/data_proc/geomap/zip_pop_filling.txt b/_delphi_utils_python/data_proc/geomap/zip_pop_filling.txt
@@ -0,0 +1,25 @@
+zip,pop
+57756,1126
+57764,1923
+57770,5271
+57772,2048
+57794,644
+99554,677
+99563,938
+99566,192
+99573,1115
+99574,2348
+99581,762
+99585,417
+99586,605
+99604,1093
+99620,577
+99632,813
+99650,568
+99657,329
+99658,616
+99662,480
+99666,189
+99677,88
+99686,4005
+99693,248
-Original file line number
+Diff line change
@@ -0,0 +1,25 @@
+    zip,pop
+,1126
+,1923
+,5271
+,2048
+,644
+,677
+,938
+,192
+,1115
+,2348
+,762
+,417
+,605
+,1093
+,577
+,813
+,568
+,329
+,616
+,480
+,189
+,88
+,4005
+,248