Skip to content

Commit 31ec961

Browse files
committed
calculate update dates in a new function
1 parent ecca542 commit 31ec961

File tree

2 files changed

+15
-21
lines changed

2 files changed

+15
-21
lines changed

src/acquisition/rvdss/rvdss_update.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@
66
import pandas as pd
77
import os
88

9-
from delphi.epidata.acquisition.rvdss.utils import get_weekly_data, get_revised_data
9+
from delphi.epidata.acquisition.rvdss.utils import get_weekly_data, get_revised_data, get_dashboard_update_date
1010
from delphi.epidata.acquisition.rvdss.constants import DASHBOARD_BASE_URL, RESP_COUNTS_OUTPUT_FILE, POSITIVE_TESTS_OUTPUT_FILE
1111

1212

1313
def main():
14-
weekly_data = get_weekly_data(DASHBOARD_BASE_URL,2024).set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
15-
positive_data = get_revised_data(DASHBOARD_BASE_URL)
14+
headers = {
15+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
16+
}
17+
18+
update_date = get_dashboard_update_date(DASHBOARD_BASE_URL,headers)
19+
weekly_data = get_weekly_data(DASHBOARD_BASE_URL,2024,headers,update_date).set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
20+
positive_data = get_revised_data(DASHBOARD_BASE_URL,headers,update_date)
1621

1722
path1 = './' + RESP_COUNTS_OUTPUT_FILE
1823
path2 = './' + POSITIVE_TESTS_OUTPUT_FILE

src/acquisition/rvdss/utils.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,14 @@ def check_date_format(date_string):
5454

5555
return(new_date)
5656

57-
def get_revised_data(base_url):
58-
headers = {
59-
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
60-
}
61-
57+
def get_dashboard_update_date(base_url,headers):
6258
# Get update date
6359
update_date_url = base_url + DASHBOARD_UPDATE_DATE_FILE
6460
update_date_url_response = requests.get(update_date_url, headers=headers)
6561
update_date = datetime.strptime(update_date_url_response.text,"%m/%d/%Y %H:%M:%S").strftime("%Y-%m-%d")
62+
return(update_date)
6663

64+
def get_revised_data(base_url,headers,update_date):
6765
# Get update data
6866
url = base_url+DASHBOARD_DATA_FILE
6967

@@ -80,7 +78,7 @@ def get_revised_data(base_url):
8078
df['geo_type'] = [create_geo_types(g,"province") for g in df['geo_value']]
8179
df.insert(1,"issue",update_date)
8280

83-
df=df.drop(["weekorder","region","year","week"],axis=1)
81+
#df=df.drop(["weekorder","region","year","week"],axis=1)
8482

8583
df = df.pivot(index=['epiweek','time_value','issue','geo_type','geo_value'],
8684
columns="virus",values=['tests','percentpositive','positivetests'])
@@ -96,16 +94,7 @@ def get_revised_data(base_url):
9694

9795
return(df)
9896

99-
def get_weekly_data(base_url,start_year):
100-
headers = {
101-
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
102-
}
103-
104-
# Get update date
105-
update_date_url = base_url + "RVD_UpdateDate.csv"
106-
update_date_url_response = requests.get(update_date_url, headers=headers)
107-
update_date = datetime.strptime(update_date_url_response.text,"%m/%d/%Y %H:%M:%S").strftime("%Y-%m-%d")
108-
97+
def get_weekly_data(base_url,start_year,headers,update_date):
10998
# Get current week and year
11099
summary_url = base_url + "RVD_SummaryText.csv"
111100
summary_url_response = requests.get(summary_url, headers=headers)
@@ -145,7 +134,7 @@ def get_weekly_data(base_url,start_year):
145134
df_weekly['geo_value'] = [abbreviate_geo(g) for g in df_weekly['geo_value']]
146135
df_weekly['geo_type'] = [create_geo_types(g,"lab") for g in df_weekly['geo_value']]
147136

148-
if df_weekly.columns.isin(["weekorder","date","week"]).all():
149-
df_weekly=df_weekly.drop(["weekorder","date","week"],axis=1)
137+
# if df_weekly.columns.isin(["weekorder","date","week"]).all():
138+
# df_weekly=df_weekly.drop(["weekorder","date","week"],axis=1)
150139

151140
return(df_weekly)

0 commit comments

Comments
 (0)