remove rlang pronouns for speed

nmdefries · nmdefries · commit 12026466e135 · 2023-02-28T11:26:20.000-05:00
diff --git a/backfill_corrections/delphiBackfillCorrection/DESCRIPTION b/backfill_corrections/delphiBackfillCorrection/DESCRIPTION
@@ -25,7 +25,6 @@ Imports:
     tidyr,
     zoo,
     utils,
-    rlang,
     parallel
 Suggests:
     knitr (>= 1.15),
diff --git a/backfill_corrections/delphiBackfillCorrection/NAMESPACE b/backfill_corrections/delphiBackfillCorrection/NAMESPACE
@@ -33,9 +33,7 @@ importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_split)
 importFrom(dplyr,if_else)
-importFrom(dplyr,mutate)
 importFrom(dplyr,pull)
-importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(dplyr,starts_with)
 importFrom(dplyr,summarize)
@@ -50,9 +48,6 @@ importFrom(lubridate,year)
 importFrom(parallel,detectCores)
 importFrom(quantgen,quantile_lasso)
 importFrom(readr,write_csv)
-importFrom(rlang,":=")
-importFrom(rlang,.data)
-importFrom(rlang,.env)
 importFrom(stats,coef)
 importFrom(stats,nlm)
 importFrom(stats,pbeta)
diff --git a/backfill_corrections/delphiBackfillCorrection/R/io.R b/backfill_corrections/delphiBackfillCorrection/R/io.R
@@ -13,18 +13,15 @@ read_data <- function(input_file) {
 #' Make sure data contains a `geo_value` field
 #'
 #' @template df-template
-#'
-#' @importFrom dplyr rename select
-#' @importFrom rlang .data
 fips_to_geovalue <- function(df) {
   if ( !("geo_value" %in% colnames(df)) ) {
     if ( !("fips" %in% colnames(df)) ) {
       stop("Either `fips` or `geo_value` field must be available")
     }
-    df <- rename(df, geo_value = .data$fips)
+    df$geo_value <- df$fips
   }
   if ( "fips" %in% colnames(df) ) {
-    df <- select(df, -.data$fips)
+    df$fips <- NULL
   }
   return(df)
 }
diff --git a/backfill_corrections/delphiBackfillCorrection/R/main.R b/backfill_corrections/delphiBackfillCorrection/R/main.R
@@ -9,15 +9,14 @@
 #' @template indicator-template
 #' @template signal-template
 #' 
-#' @importFrom dplyr %>% filter select group_by summarize across everything group_split ungroup
+#' @importFrom dplyr %>% filter group_by summarize across everything group_split ungroup
 #' @importFrom tidyr drop_na
-#' @importFrom rlang .data .env
 #' 
 #' @export
 run_backfill <- function(df, params,
                          refd_col = "time_value", lag_col = "lag", issued_col = "issue_date",
                          signal_suffixes = c(""), indicator = "", signal = "") {
-  df <- filter(df, .data$lag < params$ref_lag + 30) # a rough filtration to save memory
+  df <- filter(df, lag < params$ref_lag + 30) # a rough filtration to save memory
 
   geo_levels <- params$geo_levels
   if ("state" %in% geo_levels) {
@@ -34,16 +33,17 @@ run_backfill <- function(df, params,
       # Aggregate counties up to state level
       agg_cols <- c("geo_value", issued_col, refd_col, lag_col)
       # Sum all non-agg columns. Summarized columns keep original names
+      df$geo_value <- df$state_id
+      df$state_id <- NULL
       df <- df %>%
-        select(-.data$geo_value, geo_value = .data$state_id) %>%
         group_by(across(agg_cols)) %>%
         summarize(across(everything(), sum)) %>%
         ungroup()
     }
     if (geo_level == "county") {
       # Keep only 200 most populous (within the US) counties
       top_200_geos <- get_populous_counties()
-      df <- filter(df, .data$geo_value %in% top_200_geos)
+      df <- filter(df, geo_value %in% top_200_geos)
     }
       
     test_data_list <- list()
@@ -58,7 +58,7 @@ run_backfill <- function(df, params,
     }
     
     msg_ts("Splitting data into geo groups")
-    group_dfs <- group_split(df, .data$geo_value)
+    group_dfs <- group_split(df, geo_value)
 
     # Build model for each location
     for (subdf in group_dfs) {
@@ -112,15 +112,15 @@ run_backfill <- function(df, params,
             )
           }
           combined_df <- add_params_for_dates(combined_df, refd_col, lag_col)
-          combined_df <- combined_df %>% filter(.data$lag < params$ref_lag)
+          combined_df <- combined_df %>% filter(lag < params$ref_lag)
 
           geo_train_data <- combined_df %>%
-            filter(.data$issue_date < params$training_end_date) %>%
-            filter(.data$target_date <= params$training_end_date) %>%
-            filter(.data$target_date > params$training_start_date) %>%
+            filter(issue_date < params$training_end_date) %>%
+            filter(target_date <= params$training_end_date) %>%
+            filter(target_date > params$training_start_date) %>%
             drop_na()
           geo_test_data <- combined_df %>%
-            filter(.data$issue_date %in% params$test_dates) %>%
+            filter(issue_date %in% params$test_dates) %>%
             drop_na()
 
           if (nrow(geo_test_data) == 0) {
@@ -135,8 +135,8 @@ run_backfill <- function(df, params,
           if (value_type == "fraction") {
             # Use beta prior approach to adjust fractions
             geo_prior_test_data = combined_df %>%
-              filter(.data$issue_date > min(params$test_dates) - 7) %>%
-              filter(.data$issue_date <= max(params$test_dates))
+              filter(issue_date > min(params$test_dates) - 7) %>%
+              filter(issue_date <= max(params$test_dates))
             updated_data <- frac_adj(geo_train_data, geo_test_data, geo_prior_test_data,
                                      indicator = indicator, signal = signal,
                                      geo_level = geo_level, signal_suffix = signal_suffix,
@@ -236,9 +236,8 @@ run_backfill <- function(df, params,
 #' @template lag_col-template
 #' @template issued_col-template
 #'
-#' @importFrom dplyr bind_rows mutate %>%
+#' @importFrom dplyr bind_rows %>%
 #' @importFrom parallel detectCores
-#' @importFrom rlang .data :=
 #' @importFrom stringr str_interp
 #' 
 #' @export
@@ -251,7 +250,7 @@ main <- function(params,
 
   indicators_subset <- INDICATORS_AND_SIGNALS
   if (params$indicators != "all") {
-    indicators_subset <- filter(indicators_subset, .data$indicator == params$indicators)
+    indicators_subset <- filter(indicators_subset, indicator == params$indicators)
   }
   if (nrow(indicators_subset) == 0) {
     stop("no indicators to process")
@@ -307,14 +306,12 @@ main <- function(params,
     input_data <- lapply(
       files_list,
       function(file) {
-        read_data(file) %>%
-        fips_to_geovalue() %>%
-        mutate(
-          # Use `glue` syntax to construct a new field by variable,
-          # from https://stackoverflow.com/a/26003971/14401472
-          "{refd_col}" := as.Date(.data[[refd_col]], "%Y-%m-%d"),
-          "{issued_col}" := as.Date(.data[[issued_col]], "%Y-%m-%d")
-        )
+        df <- read_data(file) %>%
+          fips_to_geovalue()
+        df[[refd_col]] <- as.Date(df[[refd_col]], "%Y-%m-%d")
+        df[[issued_col]] <- as.Date(df[[issued_col]], "%Y-%m-%d")
+
+        return(df)
       }
     ) %>%
       bind_rows()
diff --git a/backfill_corrections/delphiBackfillCorrection/R/model.R b/backfill_corrections/delphiBackfillCorrection/R/model.R
@@ -5,8 +5,6 @@
 #' @param geo_train_data training data for a certain location
 #' @param geo_test_data testing data for a certain location
 #' 
-#' @importFrom rlang .data .env
-#'
 #' @export
 data_filteration <- function(test_lag, geo_train_data, geo_test_data, lag_pad) {
   if (test_lag <= 14){
@@ -23,11 +21,11 @@ data_filteration <- function(test_lag, geo_train_data, geo_test_data, lag_pad) {
     test_lag_pad2=9
   }
   train_data = geo_train_data %>% 
-    filter(.data$lag >= .env$test_lag - .env$test_lag_pad ) %>%
-    filter(.data$lag <= .env$test_lag + .env$test_lag_pad )
+    filter(lag >= test_lag - test_lag_pad ) %>%
+    filter(lag <= test_lag + test_lag_pad )
   test_data = geo_test_data %>%
-    filter(.data$lag >= .env$test_lag - .env$test_lag_pad1 ) %>%
-    filter(.data$lag <= .env$test_lag + .env$test_lag_pad2)
+    filter(lag >= test_lag - test_lag_pad1 ) %>%
+    filter(lag <= test_lag + test_lag_pad2)
 
   return (list(train_data, test_data))
 }
diff --git a/backfill_corrections/delphiBackfillCorrection/R/utils.R b/backfill_corrections/delphiBackfillCorrection/R/utils.R
@@ -203,17 +203,16 @@ training_days_check <- function(issue_date, training_days) {
 #' Subset list of counties to those included in the 200 most populous in the US
 #' 
 #' @importFrom dplyr select %>% arrange desc pull
-#' @importFrom rlang .data
 #' @importFrom utils head
 #' @import covidcast
 get_populous_counties <- function() {
   return(
     covidcast::county_census %>%
-      dplyr::select(pop = .data$POPESTIMATE2019, fips = .data$FIPS) %>%
+      dplyr::select(pop = POPESTIMATE2019, fips = FIPS) %>%
       # Drop megacounties (states)
-      filter(!endsWith(.data$fips, "000")) %>%
-      arrange(desc(.data$pop)) %>%
-      pull(.data$fips) %>%
+      filter(!endsWith(fips, "000")) %>%
+      arrange(desc(pop)) %>%
+      pull(fips) %>%
       head(n=200)
   )
 }
diff --git a/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-model.R b/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-model.R
@@ -1,5 +1,7 @@
 context("Testing the helper functions for modeling")
 
+library(dplyr)
+
 # Constants
 indicator <- "chng"
 signal <- "outpatient" 

Original file line number	Diff line number	Diff line change
`@@ -13,18 +13,15 @@ read_data <- function(input_file) {`
`13`	`13`	#' Make sure data contains a `geo_value` field
`14`	`14`	`#'`
`15`	`15`	`#' @template df-template`
`16`		`-#'`
`17`		`-#' @importFrom dplyr rename select`
`18`		`-#' @importFrom rlang .data`
`19`	`16`	`fips_to_geovalue <- function(df) {`
`20`	`17`	`if ( !("geo_value" %in% colnames(df)) ) {`
`21`	`18`	`if ( !("fips" %in% colnames(df)) ) {`
`22`	`19`	stop("Either `fips` or `geo_value` field must be available")
`23`	`20`	`}`
`24`		`- df <- rename(df, geo_value = .data$fips)`
	`21`	`+ df$geo_value <- df$fips`
`25`	`22`	`}`
`26`	`23`	`if ( "fips" %in% colnames(df) ) {`
`27`		`- df <- select(df, -.data$fips)`
	`24`	`+ df$fips <- NULL`
`28`	`25`	`}`
`29`	`26`	`return(df)`
`30`	`27`	`}`