Skip to content

Commit fd302a9

Browse files
committed
Update populations with 2019 data and Puerto Rico
1 parent 530e9f8 commit fd302a9

File tree

4 files changed

+35802
-35947
lines changed

4 files changed

+35802
-35947
lines changed

_delphi_utils_python/data_proc/geomap/geo_data_proc.py

Lines changed: 73 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
FIPS_MSA_URL = "https://www2.census.gov/programs-surveys/metro-micro/geographies/reference-files/2018/delineation-files/list1_Sep_2018.xls"
2525
JHU_FIPS_URL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
2626
STATE_CODES_URL = "http://www2.census.gov/geo/docs/reference/state.txt?#"
27-
28-
# fips, zip, jhu_uid
27+
FIPS_POPULATION_URL = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv"
28+
FIPS_PUERTO_RICO_POPULATION_URL = "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt?"
2929

3030
# Out files
3131
FIPS_STATE_OUT_FILENAME = "fips_state_table.csv"
@@ -63,20 +63,6 @@ def create_fips_zip_crosswalk():
6363
# Pare down the dataframe to just the relevant columns: zip, fips, and population
6464
pop_df = pop_df[["zip", "fips", "POPPT"]].rename(columns={"POPPT": "pop"})
6565

66-
# Find the populations by FIPS and ZIP and write them to files
67-
(
68-
pop_df[["fips", "pop"]]
69-
.groupby("fips")
70-
.sum()
71-
.to_csv(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME))
72-
)
73-
(
74-
pop_df[["zip", "pop"]]
75-
.groupby("zip")
76-
.sum()
77-
.to_csv(join(OUTPUT_DIR, ZIP_POPULATION_OUT_FILENAME))
78-
)
79-
8066
# Find the population fractions (the heaviest computation, takes about a minute)
8167
# Note that the denominator in the fractions is the source population
8268
pop_df.set_index(["fips", "zip"], inplace=True)
@@ -255,7 +241,7 @@ def create_jhu_uid_fips_crosswalk():
255241
fips_st = jhu_df["fips"].str.len() <= 2
256242
jhu_df.loc[fips_st, "fips"] = jhu_df.loc[fips_st, "fips"].str.ljust(5, "0")
257243

258-
# Drop the FIPS codes in JHU that were hand-modified
244+
# Drop the two FIPS codes in JHU (see above) that diverged from the official FIPS map
259245
dup_ind = jhu_df["fips"].isin(hand_additions["fips"].values) | jhu_df["fips"].isin(
260246
["02158", "46102"]
261247
)
@@ -360,14 +346,81 @@ def create_state_hhs_crosswalk():
360346
)
361347

362348

349+
def create_fips_population_table():
350+
census_pop = pd.read_csv(FIPS_POPULATION_URL, encoding="ISO-8859-1")
351+
census_pop["fips"] = census_pop.apply(
352+
lambda x: f"{x['STATE']:02d}{x['COUNTY']:03d}", axis=1
353+
)
354+
census_pop["pop"] = census_pop["POPESTIMATE2019"]
355+
census_pop = census_pop[["fips", "pop"]]
356+
census_pop = pd.concat(
357+
[
358+
census_pop,
359+
pd.DataFrame(
360+
{
361+
"fips": ["70002", "70003"],
362+
"pop": [0, 0],
363+
}
364+
),
365+
]
366+
)
367+
census_pop = census_pop.reset_index(drop=True)
368+
369+
# Set population for Dukes and Nantucket
370+
DN_FIPS = "70002"
371+
DUKES_FIPS = "25007"
372+
NANTU_FIPS = "25019"
373+
374+
census_pop.loc[census_pop["fips"] == DN_FIPS, "pop"] = (
375+
census_pop.loc[census_pop["fips"] == DUKES_FIPS, "pop"].values
376+
+ census_pop.loc[census_pop["fips"] == NANTU_FIPS, "pop"].values
377+
)
378+
379+
# Set population for Kansas City
380+
census_pop.loc[census_pop["fips"] == "70003", "pop"] = 491918 # via Google
381+
382+
# Get the file with Puerto Rico populations (and a few counties other small counties)
383+
df_pr = pd.read_csv(FIPS_PUERTO_RICO_POPULATION_URL)
384+
df_pr["fips"] = df_pr["STATE"].astype(str).str.zfill(2) + df_pr["COUNTY"].astype(str).str.zfill(3)
385+
df_pr["pop"] = df_pr["POPPT"]
386+
df_pr = df_pr[["fips", "pop"]]
387+
# Fill the missing data with 2010 information
388+
df_pr = df_pr.groupby("fips").sum().reset_index()
389+
df_pr = df_pr[~df_pr["fips"].isin(census_pop["fips"])]
390+
census_pop_pr = pd.concat([census_pop, df_pr])
391+
census_pop_pr.to_csv(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME), index=False)
392+
393+
394+
def derive_zip_population_table():
395+
if not isfile(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME)):
396+
create_fips_population_table()
397+
398+
if not isfile(join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME)):
399+
create_fips_zip_crosswalk()
400+
401+
census_pop = pd.read_csv(
402+
join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME), dtype={"fips": str, "pop": int}
403+
)
404+
fz_df = pd.read_csv(
405+
join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME),
406+
dtype={"fips": str, "zip": str, "weight": float},
407+
)
408+
409+
df = census_pop.merge(fz_df, on="fips", how="left")
410+
df["pop"] = df["pop"].multiply(df["weight"], axis=0)
411+
df = df.drop(columns=["fips", "weight"]).groupby("zip").sum().dropna().reset_index()
412+
df["pop"] = df["pop"].astype(int)
413+
df.to_csv(join(OUTPUT_DIR, ZIP_POPULATION_OUT_FILENAME), index=False)
414+
415+
363416
def derive_fips_hrr_crosswalk():
364417
"""Derives a crosswalk file from FIPS to HRR through FIPZ -> ZIP -> HRR
365418
from the crosswalk files made by the functions above."""
366419
if not isfile(join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME)):
367420
create_fips_zip_crosswalk()
368421

369422
if not isfile(join(OUTPUT_DIR, ZIP_HRR_OUT_FILENAME)):
370-
create_fips_zip_crosswalk()
423+
create_zip_hsa_hrr_crosswalk()
371424

372425
fz_df = pd.read_csv(
373426
join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME),
@@ -461,8 +514,10 @@ def derive_zip_to_state_code():
461514
create_jhu_uid_fips_crosswalk()
462515
create_state_codes_crosswalk()
463516
create_state_hhs_crosswalk()
517+
create_fips_population_table()
464518

465519
derive_fips_hrr_crosswalk()
466520
derive_zip_msa_crosswalk()
467521
derive_zip_to_state_code()
468522
derive_fips_state_crosswalk()
523+
derive_zip_population_table()

0 commit comments

Comments
 (0)