Skip to content

Commit 5f1d312

Browse files
rzatsmelange396
andcommitted
Automatically parse columns from YAML file (#1114)
* Automatically parse columns from YAML file * Use constructor * try alt filename * Try absolute path * AGGREGATE_KEY_COLS * what if none aggregate? * fix table name * tests * ordered csv * use cols method * try pkgutil * cleanup unneeded methods * Review fixes * Revert to old filename processing * thread the name of the yaml schemadefs file through the whole process, and readd method to write that file Co-authored-by: rzats <[email protected]> --------- Co-authored-by: george haff <[email protected]>
1 parent 581b6f0 commit 5f1d312

File tree

8 files changed

+123
-655
lines changed

8 files changed

+123
-655
lines changed

dev/local/setup.cfg

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ packages =
2727
delphi.epidata.acquisition.twtr
2828
delphi.epidata.acquisition.wiki
2929
delphi.epidata.client
30+
delphi.epidata.common
31+
delphi.epidata.common.covid_hosp
3032
delphi.epidata.server
3133
delphi.epidata.server.admin
3234
delphi.epidata.server.admin.templates

src/acquisition/covid_hosp/facility/database.py

+6-203
Original file line numberDiff line numberDiff line change
@@ -1,213 +1,16 @@
11
# first party
22
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
3-
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
4-
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
53
from delphi.epidata.acquisition.covid_hosp.facility.network import Network
4+
from delphi.epidata.common.covid_hosp.covid_hosp_schema_io import CovidHospSomething
65

76

87
class Database(BaseDatabase):
98

10-
TABLE_NAME = 'covid_hosp_facility'
11-
KEY_COLS = ['hospital_pk', 'collection_week']
12-
AGGREGATE_KEY_COLS = ['address', 'ccn', 'city', 'fips_code', 'geocoded_hospital_address', 'hhs_ids', 'hospital_name', 'hospital_pk', 'hospital_subtype', 'is_metro_micro', 'state', 'zip']
13-
# These are 3-tuples of (
14-
# CSV header name,
15-
# SQL db column name,
16-
# data type
17-
# ) for all the columns in the CSV file.
18-
# Note that the corresponding database column names may be shorter
19-
# due to constraints on the length of column names. See
20-
# /src/ddl/covid_hosp.sql for more information.
21-
ORDERED_CSV_COLUMNS = [
22-
Columndef('hospital_pk', 'hospital_pk', str),
23-
Columndef('collection_week', 'collection_week', Utils.int_from_date),
24-
Columndef('address', 'address', str),
25-
Columndef('all_adult_hospital_beds_7_day_avg', 'all_adult_hospital_beds_7_day_avg', float),
26-
Columndef('all_adult_hospital_beds_7_day_coverage', 'all_adult_hospital_beds_7_day_coverage', int),
27-
Columndef('all_adult_hospital_beds_7_day_sum', 'all_adult_hospital_beds_7_day_sum', int),
28-
Columndef('all_adult_hospital_inpatient_bed_occupied_7_day_avg',
29-
'all_adult_hospital_inpatient_bed_occupied_7_day_avg', float),
30-
Columndef('all_adult_hospital_inpatient_bed_occupied_7_day_coverage',
31-
'all_adult_hospital_inpatient_bed_occupied_7_day_coverage', int),
32-
Columndef('all_adult_hospital_inpatient_bed_occupied_7_day_sum',
33-
'all_adult_hospital_inpatient_bed_occupied_7_day_sum', int),
34-
Columndef('all_adult_hospital_inpatient_beds_7_day_avg', 'all_adult_hospital_inpatient_beds_7_day_avg',
35-
float),
36-
Columndef('all_adult_hospital_inpatient_beds_7_day_coverage',
37-
'all_adult_hospital_inpatient_beds_7_day_coverage', int),
38-
Columndef('all_adult_hospital_inpatient_beds_7_day_sum', 'all_adult_hospital_inpatient_beds_7_day_sum',
39-
int),
40-
Columndef('ccn', 'ccn', str),
41-
Columndef('city', 'city', str),
42-
Columndef('fips_code', 'fips_code', str),
43-
Columndef('geocoded_hospital_address', 'geocoded_hospital_address', Utils.limited_geocode),
44-
Columndef('hhs_ids', 'hhs_ids', str),
45-
Columndef('hospital_name', 'hospital_name', str),
46-
Columndef('hospital_subtype', 'hospital_subtype', str),
47-
Columndef('icu_beds_used_7_day_avg', 'icu_beds_used_7_day_avg', float),
48-
Columndef('icu_beds_used_7_day_coverage', 'icu_beds_used_7_day_coverage', int),
49-
Columndef('icu_beds_used_7_day_sum', 'icu_beds_used_7_day_sum', int),
50-
Columndef('icu_patients_confirmed_influenza_7_day_avg', 'icu_patients_confirmed_influenza_7_day_avg',
51-
float),
52-
Columndef('icu_patients_confirmed_influenza_7_day_coverage',
53-
'icu_patients_confirmed_influenza_7_day_coverage', int),
54-
Columndef('icu_patients_confirmed_influenza_7_day_sum', 'icu_patients_confirmed_influenza_7_day_sum',
55-
int),
56-
Columndef('inpatient_beds_7_day_avg', 'inpatient_beds_7_day_avg', float),
57-
Columndef('inpatient_beds_7_day_coverage', 'inpatient_beds_7_day_coverage', int),
58-
Columndef('inpatient_beds_7_day_sum', 'inpatient_beds_7_day_sum', int),
59-
Columndef('inpatient_beds_used_7_day_avg', 'inpatient_beds_used_7_day_avg', float),
60-
Columndef('inpatient_beds_used_7_day_coverage', 'inpatient_beds_used_7_day_coverage', int),
61-
Columndef('inpatient_beds_used_7_day_sum', 'inpatient_beds_used_7_day_sum', int),
62-
Columndef('is_corrected', 'is_corrected', Utils.parse_bool),
63-
Columndef('is_metro_micro', 'is_metro_micro', Utils.parse_bool),
64-
Columndef('previous_day_admission_adult_covid_confirmed_18-19_7_day_sum',
65-
'previous_day_admission_adult_covid_confirmed_18_19_7_day_sum', int),
66-
Columndef('previous_day_admission_adult_covid_confirmed_20-29_7_day_sum',
67-
'previous_day_admission_adult_covid_confirmed_20_29_7_day_sum', int),
68-
Columndef('previous_day_admission_adult_covid_confirmed_30-39_7_day_sum',
69-
'previous_day_admission_adult_covid_confirmed_30_39_7_day_sum', int),
70-
Columndef('previous_day_admission_adult_covid_confirmed_40-49_7_day_sum',
71-
'previous_day_admission_adult_covid_confirmed_40_49_7_day_sum', int),
72-
Columndef('previous_day_admission_adult_covid_confirmed_50-59_7_day_sum',
73-
'previous_day_admission_adult_covid_confirmed_50_59_7_day_sum', int),
74-
Columndef('previous_day_admission_adult_covid_confirmed_60-69_7_day_sum',
75-
'previous_day_admission_adult_covid_confirmed_60_69_7_day_sum', int),
76-
Columndef('previous_day_admission_adult_covid_confirmed_70-79_7_day_sum',
77-
'previous_day_admission_adult_covid_confirmed_70_79_7_day_sum', int),
78-
Columndef('previous_day_admission_adult_covid_confirmed_7_day_coverage',
79-
'previous_day_admission_adult_covid_confirmed_7_day_coverage', int),
80-
Columndef('previous_day_admission_adult_covid_confirmed_7_day_sum',
81-
'previous_day_admission_adult_covid_confirmed_7_day_sum', int),
82-
Columndef('previous_day_admission_adult_covid_confirmed_80+_7_day_sum',
83-
'previous_day_admission_adult_covid_confirmed_80plus_7_day_sum', int),
84-
Columndef('previous_day_admission_adult_covid_confirmed_unknown_7_day_sum',
85-
'previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', int),
86-
Columndef('previous_day_admission_adult_covid_suspected_18-19_7_day_sum',
87-
'previous_day_admission_adult_covid_suspected_18_19_7_day_sum', int),
88-
Columndef('previous_day_admission_adult_covid_suspected_20-29_7_day_sum',
89-
'previous_day_admission_adult_covid_suspected_20_29_7_day_sum', int),
90-
Columndef('previous_day_admission_adult_covid_suspected_30-39_7_day_sum',
91-
'previous_day_admission_adult_covid_suspected_30_39_7_day_sum', int),
92-
Columndef('previous_day_admission_adult_covid_suspected_40-49_7_day_sum',
93-
'previous_day_admission_adult_covid_suspected_40_49_7_day_sum', int),
94-
Columndef('previous_day_admission_adult_covid_suspected_50-59_7_day_sum',
95-
'previous_day_admission_adult_covid_suspected_50_59_7_day_sum', int),
96-
Columndef('previous_day_admission_adult_covid_suspected_60-69_7_day_sum',
97-
'previous_day_admission_adult_covid_suspected_60_69_7_day_sum', int),
98-
Columndef('previous_day_admission_adult_covid_suspected_70-79_7_day_sum',
99-
'previous_day_admission_adult_covid_suspected_70_79_7_day_sum', int),
100-
Columndef('previous_day_admission_adult_covid_suspected_7_day_coverage',
101-
'previous_day_admission_adult_covid_suspected_7_day_coverage', int),
102-
Columndef('previous_day_admission_adult_covid_suspected_7_day_sum',
103-
'previous_day_admission_adult_covid_suspected_7_day_sum', int),
104-
Columndef('previous_day_admission_adult_covid_suspected_80+_7_day_sum',
105-
'previous_day_admission_adult_covid_suspected_80plus_7_day_sum', int),
106-
Columndef('previous_day_admission_adult_covid_suspected_unknown_7_day_sum',
107-
'previous_day_admission_adult_covid_suspected_unknown_7_day_sum', int),
108-
Columndef('previous_day_admission_influenza_confirmed_7_day_sum',
109-
'previous_day_admission_influenza_confirmed_7_day_sum', int),
110-
Columndef('previous_day_admission_pediatric_covid_confirmed_7_day_coverage',
111-
'previous_day_admission_pediatric_covid_confirmed_7_day_coverage', int),
112-
Columndef('previous_day_admission_pediatric_covid_confirmed_7_day_sum',
113-
'previous_day_admission_pediatric_covid_confirmed_7_day_sum', int),
114-
Columndef('previous_day_admission_pediatric_covid_suspected_7_day_coverage',
115-
'previous_day_admission_pediatric_covid_suspected_7_day_coverage', int),
116-
Columndef('previous_day_admission_pediatric_covid_suspected_7_day_sum',
117-
'previous_day_admission_pediatric_covid_suspected_7_day_sum', int),
118-
Columndef('previous_day_covid_ED_visits_7_day_sum', 'previous_day_covid_ed_visits_7_day_sum', int),
119-
Columndef('previous_day_total_ED_visits_7_day_sum', 'previous_day_total_ed_visits_7_day_sum', int),
120-
Columndef('previous_week_patients_covid_vaccinated_doses_all_7_day',
121-
'previous_week_patients_covid_vaccinated_doses_all_7_day', int),
122-
Columndef('previous_week_patients_covid_vaccinated_doses_all_7_day_sum',
123-
'previous_week_patients_covid_vaccinated_doses_all_7_day_sum', int),
124-
Columndef('previous_week_patients_covid_vaccinated_doses_one_7_day',
125-
'previous_week_patients_covid_vaccinated_doses_one_7_day', int),
126-
Columndef('previous_week_patients_covid_vaccinated_doses_one_7_day_sum',
127-
'previous_week_patients_covid_vaccinated_doses_one_7_day_sum', int),
128-
Columndef('previous_week_personnel_covid_vaccinated_doses_administered_7_day',
129-
'previous_week_personnel_covid_vaccd_doses_administered_7_day', int),
130-
Columndef('previous_week_personnel_covid_vaccinated_doses_administered_7_day_sum',
131-
'previous_week_personnel_covid_vaccd_doses_administered_7_day_sum', int),
132-
Columndef('staffed_adult_icu_bed_occupancy_7_day_avg', 'staffed_adult_icu_bed_occupancy_7_day_avg',
133-
float),
134-
Columndef('staffed_adult_icu_bed_occupancy_7_day_coverage',
135-
'staffed_adult_icu_bed_occupancy_7_day_coverage', int),
136-
Columndef('staffed_adult_icu_bed_occupancy_7_day_sum', 'staffed_adult_icu_bed_occupancy_7_day_sum',
137-
int),
138-
Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg',
139-
'staffed_icu_adult_patients_confirmed_suspected_covid_7d_avg', float),
140-
Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage',
141-
'staffed_icu_adult_patients_confirmed_suspected_covid_7d_cov', int),
142-
Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum',
143-
'staffed_icu_adult_patients_confirmed_suspected_covid_7d_sum', int),
144-
Columndef('staffed_icu_adult_patients_confirmed_covid_7_day_avg',
145-
'staffed_icu_adult_patients_confirmed_covid_7_day_avg', float),
146-
Columndef('staffed_icu_adult_patients_confirmed_covid_7_day_coverage',
147-
'staffed_icu_adult_patients_confirmed_covid_7_day_coverage', int),
148-
Columndef('staffed_icu_adult_patients_confirmed_covid_7_day_sum',
149-
'staffed_icu_adult_patients_confirmed_covid_7_day_sum', int),
150-
Columndef('state', 'state', str),
151-
Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg',
152-
'total_adult_patients_hosp_confirmed_suspected_covid_7d_avg', float),
153-
Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage',
154-
'total_adult_patients_hosp_confirmed_suspected_covid_7d_cov', int),
155-
Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum',
156-
'total_adult_patients_hosp_confirmed_suspected_covid_7d_sum', int),
157-
Columndef('total_adult_patients_hospitalized_confirmed_covid_7_day_avg',
158-
'total_adult_patients_hospitalized_confirmed_covid_7_day_avg', float),
159-
Columndef('total_adult_patients_hospitalized_confirmed_covid_7_day_coverage',
160-
'total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', int),
161-
Columndef('total_adult_patients_hospitalized_confirmed_covid_7_day_sum',
162-
'total_adult_patients_hospitalized_confirmed_covid_7_day_sum', int),
163-
Columndef('total_beds_7_day_avg', 'total_beds_7_day_avg', float),
164-
Columndef('total_beds_7_day_coverage', 'total_beds_7_day_coverage', int),
165-
Columndef('total_beds_7_day_sum', 'total_beds_7_day_sum', int),
166-
Columndef('total_icu_beds_7_day_avg', 'total_icu_beds_7_day_avg', float),
167-
Columndef('total_icu_beds_7_day_coverage', 'total_icu_beds_7_day_coverage', int),
168-
Columndef('total_icu_beds_7_day_sum', 'total_icu_beds_7_day_sum', int),
169-
Columndef('total_patients_hospitalized_confirmed_influenza_7_day_avg',
170-
'total_patients_hospitalized_confirmed_influenza_7_day_avg', float),
171-
Columndef('total_patients_hospitalized_confirmed_influenza_7_day_coverage',
172-
'total_patients_hospitalized_confirmed_influenza_7_day_coverage', int),
173-
Columndef('total_patients_hospitalized_confirmed_influenza_7_day_sum',
174-
'total_patients_hospitalized_confirmed_influenza_7_day_sum', int),
175-
Columndef('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg',
176-
'total_patients_hosp_confirmed_influenza_and_covid_7d_avg', float),
177-
Columndef('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage',
178-
'total_patients_hosp_confirmed_influenza_and_covid_7d_cov', int),
179-
Columndef('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum',
180-
'total_patients_hosp_confirmed_influenza_and_covid_7d_sum', int),
181-
Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg',
182-
'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_avg', float),
183-
Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage',
184-
'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_cov', int),
185-
Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum',
186-
'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_sum', int),
187-
Columndef('total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg',
188-
'total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', float),
189-
Columndef('total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage',
190-
'total_pediatric_patients_hosp_confirmed_covid_7d_cov', int),
191-
Columndef('total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum',
192-
'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum', int),
193-
Columndef('total_personnel_covid_vaccinated_doses_all_7_day',
194-
'total_personnel_covid_vaccinated_doses_all_7_day', int),
195-
Columndef('total_personnel_covid_vaccinated_doses_all_7_day_sum',
196-
'total_personnel_covid_vaccinated_doses_all_7_day_sum', int),
197-
Columndef('total_personnel_covid_vaccinated_doses_none_7_day',
198-
'total_personnel_covid_vaccinated_doses_none_7_day', int),
199-
Columndef('total_personnel_covid_vaccinated_doses_none_7_day_sum',
200-
'total_personnel_covid_vaccinated_doses_none_7_day_sum', int),
201-
Columndef('total_personnel_covid_vaccinated_doses_one_7_day',
202-
'total_personnel_covid_vaccinated_doses_one_7_day', int),
203-
Columndef('total_personnel_covid_vaccinated_doses_one_7_day_sum',
204-
'total_personnel_covid_vaccinated_doses_one_7_day_sum', int),
205-
Columndef('total_staffed_adult_icu_beds_7_day_avg', 'total_staffed_adult_icu_beds_7_day_avg', float),
206-
Columndef('total_staffed_adult_icu_beds_7_day_coverage', 'total_staffed_adult_icu_beds_7_day_coverage',
207-
int),
208-
Columndef('total_staffed_adult_icu_beds_7_day_sum', 'total_staffed_adult_icu_beds_7_day_sum', int),
209-
Columndef('zip', 'zip', str),
210-
]
9+
chs = CovidHospSomething()
10+
TABLE_NAME = chs.get_ds_table_name('covid_hosp_facility')
11+
KEY_COLS = chs.get_ds_key_cols('covid_hosp_facility')
12+
AGGREGATE_KEY_COLS = chs.get_ds_aggregate_key_cols('covid_hosp_facility')
13+
ORDERED_CSV_COLUMNS = chs.get_ds_ordered_csv_cols('covid_hosp_facility')
21114

21215
def __init__(self, *args, **kwargs):
21316
super().__init__(

0 commit comments

Comments
 (0)