8
8
9
9
def download_data (base_url : str , metric : str ) -> pd .DataFrame :
10
10
"""
11
+ Download and format JHU data.
12
+
11
13
Downloads the data from the JHU repo, extracts the UID and the date columns, and
12
14
enforces the date datatype on the the time column.
13
15
"""
@@ -24,6 +26,8 @@ def download_data(base_url: str, metric: str) -> pd.DataFrame:
24
26
25
27
def create_diffs_column (df : pd .DataFrame ) -> pd .DataFrame :
26
28
"""
29
+ Compute pairwise differences of cumulative values to get incidence.
30
+
27
31
Using the cumulative_counts column from the dataframe, partitions the dataframe
28
32
into separate time-series based on fips, and then computes pairwise differences
29
33
of the cumulative values to get the incidence values. Boundary cases are handled
@@ -41,9 +45,7 @@ def create_diffs_column(df: pd.DataFrame) -> pd.DataFrame:
41
45
42
46
43
47
def sanity_check_data (df : pd .DataFrame ) -> pd .DataFrame :
44
- """
45
- Perform a final set of sanity checks on the data.
46
- """
48
+ """Perform a final set of sanity checks on the data."""
47
49
days_by_fips = df .groupby ("fips" ).count ()["cumulative_counts" ].unique ()
48
50
unique_days = df ["timestamp" ].unique ()
49
51
@@ -62,7 +64,7 @@ def sanity_check_data(df: pd.DataFrame) -> pd.DataFrame:
62
64
63
65
64
66
def pull_jhu_data (base_url : str , metric : str , gmpr : GeoMapper ) -> pd .DataFrame :
65
- """Pulls the latest Johns Hopkins CSSE data, and conforms it into a dataset
67
+ """Pull the latest Johns Hopkins CSSE data, and conforms it into a dataset.
66
68
67
69
The output dataset has:
68
70
0 commit comments