|
24 | 24 | FIPS_MSA_URL = "https://www2.census.gov/programs-surveys/metro-micro/geographies/reference-files/2018/delineation-files/list1_Sep_2018.xls"
|
25 | 25 | JHU_FIPS_URL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
|
26 | 26 | STATE_CODES_URL = "http://www2.census.gov/geo/docs/reference/state.txt?#"
|
27 |
| - |
28 |
| -# fips, zip, jhu_uid |
| 27 | +FIPS_POPULATION_URL = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv" |
| 28 | +FIPS_PUERTO_RICO_POPULATION_URL = "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt?" |
29 | 29 |
|
30 | 30 | # Out files
|
31 | 31 | FIPS_STATE_OUT_FILENAME = "fips_state_table.csv"
|
@@ -63,20 +63,6 @@ def create_fips_zip_crosswalk():
|
63 | 63 | # Pare down the dataframe to just the relevant columns: zip, fips, and population
|
64 | 64 | pop_df = pop_df[["zip", "fips", "POPPT"]].rename(columns={"POPPT": "pop"})
|
65 | 65 |
|
66 |
| - # Find the populations by FIPS and ZIP and write them to files |
67 |
| - ( |
68 |
| - pop_df[["fips", "pop"]] |
69 |
| - .groupby("fips") |
70 |
| - .sum() |
71 |
| - .to_csv(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME)) |
72 |
| - ) |
73 |
| - ( |
74 |
| - pop_df[["zip", "pop"]] |
75 |
| - .groupby("zip") |
76 |
| - .sum() |
77 |
| - .to_csv(join(OUTPUT_DIR, ZIP_POPULATION_OUT_FILENAME)) |
78 |
| - ) |
79 |
| - |
80 | 66 | # Find the population fractions (the heaviest computation, takes about a minute)
|
81 | 67 | # Note that the denominator in the fractions is the source population
|
82 | 68 | pop_df.set_index(["fips", "zip"], inplace=True)
|
@@ -255,7 +241,7 @@ def create_jhu_uid_fips_crosswalk():
|
255 | 241 | fips_st = jhu_df["fips"].str.len() <= 2
|
256 | 242 | jhu_df.loc[fips_st, "fips"] = jhu_df.loc[fips_st, "fips"].str.ljust(5, "0")
|
257 | 243 |
|
258 |
| - # Drop the FIPS codes in JHU that were hand-modified |
| 244 | + # Drop the two FIPS codes in JHU (see above) that diverged from the official FIPS map |
259 | 245 | dup_ind = jhu_df["fips"].isin(hand_additions["fips"].values) | jhu_df["fips"].isin(
|
260 | 246 | ["02158", "46102"]
|
261 | 247 | )
|
@@ -360,14 +346,81 @@ def create_state_hhs_crosswalk():
|
360 | 346 | )
|
361 | 347 |
|
362 | 348 |
|
| 349 | +def create_fips_population_table(): |
| 350 | + census_pop = pd.read_csv(FIPS_POPULATION_URL, encoding="ISO-8859-1") |
| 351 | + census_pop["fips"] = census_pop.apply( |
| 352 | + lambda x: f"{x['STATE']:02d}{x['COUNTY']:03d}", axis=1 |
| 353 | + ) |
| 354 | + census_pop["pop"] = census_pop["POPESTIMATE2019"] |
| 355 | + census_pop = census_pop[["fips", "pop"]] |
| 356 | + census_pop = pd.concat( |
| 357 | + [ |
| 358 | + census_pop, |
| 359 | + pd.DataFrame( |
| 360 | + { |
| 361 | + "fips": ["70002", "70003"], |
| 362 | + "pop": [0, 0], |
| 363 | + } |
| 364 | + ), |
| 365 | + ] |
| 366 | + ) |
| 367 | + census_pop = census_pop.reset_index(drop=True) |
| 368 | + |
| 369 | + # Set population for Dukes and Nantucket |
| 370 | + DN_FIPS = "70002" |
| 371 | + DUKES_FIPS = "25007" |
| 372 | + NANTU_FIPS = "25019" |
| 373 | + |
| 374 | + census_pop.loc[census_pop["fips"] == DN_FIPS, "pop"] = ( |
| 375 | + census_pop.loc[census_pop["fips"] == DUKES_FIPS, "pop"].values |
| 376 | + + census_pop.loc[census_pop["fips"] == NANTU_FIPS, "pop"].values |
| 377 | + ) |
| 378 | + |
| 379 | + # Set population for Kansas City |
| 380 | + census_pop.loc[census_pop["fips"] == "70003", "pop"] = 491918 # via Google |
| 381 | + |
| 382 | + # Get the file with Puerto Rico populations (and a few counties other small counties) |
| 383 | + df_pr = pd.read_csv(FIPS_PUERTO_RICO_POPULATION_URL) |
| 384 | + df_pr["fips"] = df_pr["STATE"].astype(str).str.zfill(2) + df_pr["COUNTY"].astype(str).str.zfill(3) |
| 385 | + df_pr["pop"] = df_pr["POPPT"] |
| 386 | + df_pr = df_pr[["fips", "pop"]] |
| 387 | + # Fill the missing data with 2010 information |
| 388 | + df_pr = df_pr.groupby("fips").sum().reset_index() |
| 389 | + df_pr = df_pr[~df_pr["fips"].isin(census_pop["fips"])] |
| 390 | + census_pop_pr = pd.concat([census_pop, df_pr]) |
| 391 | + census_pop_pr.to_csv(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME), index=False) |
| 392 | + |
| 393 | + |
| 394 | +def derive_zip_population_table(): |
| 395 | + if not isfile(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME)): |
| 396 | + create_fips_population_table() |
| 397 | + |
| 398 | + if not isfile(join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME)): |
| 399 | + create_fips_zip_crosswalk() |
| 400 | + |
| 401 | + census_pop = pd.read_csv( |
| 402 | + join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME), dtype={"fips": str, "pop": int} |
| 403 | + ) |
| 404 | + fz_df = pd.read_csv( |
| 405 | + join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME), |
| 406 | + dtype={"fips": str, "zip": str, "weight": float}, |
| 407 | + ) |
| 408 | + |
| 409 | + df = census_pop.merge(fz_df, on="fips", how="left") |
| 410 | + df["pop"] = df["pop"].multiply(df["weight"], axis=0) |
| 411 | + df = df.drop(columns=["fips", "weight"]).groupby("zip").sum().dropna().reset_index() |
| 412 | + df["pop"] = df["pop"].astype(int) |
| 413 | + df.to_csv(join(OUTPUT_DIR, ZIP_POPULATION_OUT_FILENAME), index=False) |
| 414 | + |
| 415 | + |
363 | 416 | def derive_fips_hrr_crosswalk():
|
364 | 417 | """Derives a crosswalk file from FIPS to HRR through FIPZ -> ZIP -> HRR
|
365 | 418 | from the crosswalk files made by the functions above."""
|
366 | 419 | if not isfile(join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME)):
|
367 | 420 | create_fips_zip_crosswalk()
|
368 | 421 |
|
369 | 422 | if not isfile(join(OUTPUT_DIR, ZIP_HRR_OUT_FILENAME)):
|
370 |
| - create_fips_zip_crosswalk() |
| 423 | + create_zip_hsa_hrr_crosswalk() |
371 | 424 |
|
372 | 425 | fz_df = pd.read_csv(
|
373 | 426 | join(OUTPUT_DIR, FIPS_ZIP_OUT_FILENAME),
|
@@ -461,8 +514,10 @@ def derive_zip_to_state_code():
|
461 | 514 | create_jhu_uid_fips_crosswalk()
|
462 | 515 | create_state_codes_crosswalk()
|
463 | 516 | create_state_hhs_crosswalk()
|
| 517 | + create_fips_population_table() |
464 | 518 |
|
465 | 519 | derive_fips_hrr_crosswalk()
|
466 | 520 | derive_zip_msa_crosswalk()
|
467 | 521 | derive_zip_to_state_code()
|
468 | 522 | derive_fips_state_crosswalk()
|
| 523 | + derive_zip_population_table() |
0 commit comments