Address PR comments

davidkretch · davidkretch · commit 41dcbbe23fd0 · 2021-04-19T20:33:59.000-04:00
* Change "overall" to "Overall" in the region metadata.
* Delete a comment about dropping missings to reflect new behavior (not dropping missings).
* Restore code to drop aggregations where the main value is missing.
* Make the state abbreviations in the state_list.csv file all upper case.
diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -264,8 +264,6 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
   }
   
   ## Find all unique groups and associated frequencies, saved in column `Freq`.
-  # Keep rows with missing values initially so that we get the correct column
-  # names. Explicitly drop groups with missing values in second step.
   unique_groups_counts <- as.data.frame(
     table(df[, group_vars, with=FALSE], exclude=NULL, dnn=group_vars), 
     stringsAsFactors=FALSE
@@ -324,6 +322,11 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
     aggregation <- aggregations$id[row]
     group_vars <- aggregations$group_by[[row]]
     post_fn <- aggregations$post_fn[[row]]
+    
+    # Keep only aggregations where the main value, `val`, is present.
+    dfs_out[[aggregation]] <- dfs_out[[aggregation]][
+      rowSums(is.na(dfs_out[[aggregation]][, c("val", "sample_size")])) == 0,
+    ]
 
     dfs_out[[aggregation]] <- apply_privacy_censoring(dfs_out[[aggregation]], params)
 
diff --git a/facebook/delphiFacebook/R/contingency_write.R b/facebook/delphiFacebook/R/contingency_write.R
@@ -65,7 +65,9 @@ write_contingency_tables <- function(data, params, geo_type, groupby_vars)
 #' @noRd
 add_geo_vars <- function(data, params, geo_type) {
   
-  start <- data.frame(
+  overall <- "Overall"
+  
+  first <- data.frame(
     country = "United States",
     ISO_3 = "USA",
     GID_0 = "USA"
@@ -74,11 +76,11 @@ add_geo_vars <- function(data, params, geo_type) {
   if (geo_type == "nation") {
     
     rest <- data.frame(
-      region = "overall",
+      region = overall,
       GID_1 = NA_character_,
-      state = "overall",
+      state = overall,
       state_fips = NA_character_,
-      county = "overall",
+      county = overall,
       county_fips = NA_character_
     )
     
@@ -92,18 +94,15 @@ add_geo_vars <- function(data, params, geo_type) {
     rest <- data.frame(
       region = toupper(data$geo_id),
       state = toupper(data$geo_id),
-      county = "overall",
+      county = overall,
       county_fips = NA_character_
     )
     
-    rest$state <- toupper(rest$state)
-    states$state <- toupper(states$state)
-    
     rest <- left_join(rest, states, by = "state") %>%
       select(region, GID_1, state, state_fips, county, county_fips)
   }
   
-  geo_vars <- bind_cols(start, rest)
+  geo_vars <- bind_cols(first, rest)
   
   # Insert the geographic variables in place of the "geo_id" variable.
   index <- which(names(data) == "geo_id")
diff --git a/facebook/static/state_list.csv b/facebook/static/state_list.csv
@@ -1,52 +1,52 @@
-state,GID_1,state_fips
-"ak","USA.2_1","02"
-"al","USA.1_1","01"
-"ar","USA.4_1","05"
-"az","USA.3_1","04"
-"ca","USA.5_1","06"
-"co","USA.6_1","08"
-"ct","USA.7_1","09"
-"dc","USA.9_1","11"
-"de","USA.8_1","10"
-"fl","USA.10_1","12"
-"ga","USA.11_1","13"
-"hi","USA.12_1","15"
-"ia","USA.16_1","19"
-"id","USA.13_1","16"
-"il","USA.14_1","17"
-"in","USA.15_1","18"
-"ks","USA.17_1","20"
-"ky","USA.18_1","21"
-"la","USA.19_1","22"
-"ma","USA.22_1","25"
-"md","USA.21_1","24"
-"me","USA.20_1","23"
-"mi","USA.23_1","26"
-"mn","USA.24_1","27"
-"mo","USA.26_1","29"
-"ms","USA.25_1","28"
-"mt","USA.27_1","30"
-"nc","USA.34_1","37"
-"nd","USA.35_1","38"
-"ne","USA.28_1","31"
-"nh","USA.30_1","33"
-"nj","USA.31_1","34"
-"nm","USA.32_1","35"
-"nv","USA.29_1","32"
-"ny","USA.33_1","36"
-"oh","USA.36_1","39"
-"ok","USA.37_1","40"
-"or","USA.38_1","41"
-"pa","USA.39_1","42"
-"ri","USA.40_1","44"
-"sc","USA.41_1","45"
-"sd","USA.42_1","46"
-"tn","USA.43_1","47"
-"tx","USA.44_1","48"
-"ut","USA.45_1","49"
-"va","USA.47_1","51"
-"vt","USA.46_1","50"
-"wa","USA.48_1","53"
-"wi","USA.50_1","55"
-"wv","USA.49_1","54"
-"wy","USA.51_1","56"
+"state","GID_1","state_fips"
+"AK","USA.2_1","02"
+"AL","USA.1_1","01"
+"AR","USA.4_1","05"
+"AZ","USA.3_1","04"
+"CA","USA.5_1","06"
+"CO","USA.6_1","08"
+"CT","USA.7_1","09"
+"DC","USA.9_1","11"
+"DE","USA.8_1","10"
+"FL","USA.10_1","12"
+"GA","USA.11_1","13"
+"HI","USA.12_1","15"
+"IA","USA.16_1","19"
+"ID","USA.13_1","16"
+"IL","USA.14_1","17"
+"IN","USA.15_1","18"
+"KS","USA.17_1","20"
+"KY","USA.18_1","21"
+"LA","USA.19_1","22"
+"MA","USA.22_1","25"
+"MD","USA.21_1","24"
+"ME","USA.20_1","23"
+"MI","USA.23_1","26"
+"MN","USA.24_1","27"
+"MO","USA.26_1","29"
+"MS","USA.25_1","28"
+"MT","USA.27_1","30"
+"NC","USA.34_1","37"
+"ND","USA.35_1","38"
+"NE","USA.28_1","31"
+"NH","USA.30_1","33"
+"NJ","USA.31_1","34"
+"NM","USA.32_1","35"
+"NV","USA.29_1","32"
+"NY","USA.33_1","36"
+"OH","USA.36_1","39"
+"OK","USA.37_1","40"
+"OR","USA.38_1","41"
+"PA","USA.39_1","42"
+"RI","USA.40_1","44"
+"SC","USA.41_1","45"
+"SD","USA.42_1","46"
+"TN","USA.43_1","47"
+"TX","USA.44_1","48"
+"UT","USA.45_1","49"
+"VA","USA.47_1","51"
+"VT","USA.46_1","50"
+"WA","USA.48_1","53"
+"WI","USA.50_1","55"
+"WV","USA.49_1","54"
+"WY","USA.51_1","56"