diff --git a/deploy.json b/deploy.json index dda13ef86..dd5f1c79b 100644 --- a/deploy.json +++ b/deploy.json @@ -207,6 +207,15 @@ "dst": "/common/covidcast/README.md" }, + "// acquisition - covid_hosp", + { + "type": "move", + "src": "src/acquisition/covid_hosp/", + "dst": "[[package]]/acquisition/covid_hosp/", + "match": "^.*\\.(py)$", + "add-header-comment": true + }, + "// run unit and coverage tests", {"type": "py3test"} diff --git a/docs/api/README.md b/docs/api/README.md index ae09f0d07..1dd5e5478 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -98,6 +98,7 @@ The parameters available for each source are documented in each linked source-sp | --- | --- | --- | --- | | [`covidcast`](covidcast.md) | COVIDCast | Delphi's COVID-19 surveillance streams. | no | | [`covidcast_meta`](covidcast_meta.md) | COVIDCast Metadata | Metadata for Delphi's COVID-19 surveillance streams. | no | +| [`covid_hosp`](covid_hosp.md) | COVID-19 Hospitalization | COVID-19 Reported Patient Impact and Hospital Capacity. | no | ### Influenza Data diff --git a/docs/api/covid_hosp.md b/docs/api/covid_hosp.md new file mode 100644 index 000000000..6ae4abab9 --- /dev/null +++ b/docs/api/covid_hosp.md @@ -0,0 +1,158 @@ +--- +title: COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries +parent: Epidata API (Other Epidemics) +--- + +# COVID-19 Hospitalization + +This data source is a mirror of the "COVID-19 Reported Patient Impact and +Hospital Capacity by State Timeseries" dataset provided by the US Department of +Health & Human Services via healthdata.gov. + +See the +[official description at healthdata.gov](https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-state-timeseries) +for more information, including a +[data dictionary](https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-state-data-dictionary). + +General topics not specific to any particular data source are discussed in the +[API overview](README.md). Such topics include: +[contributing](README.md#contributing) and [citing](README.md#citing). + +## Metadata + +This data source provides various measures of COVID-19 burden on patients and healthcare in the US. +- Data source: [US Department of Health & Human Services](https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-state-timeseries) (HHS) +- Temporal Resolution: Daily, starting 2020-01-01 +- Spatial Resolution: US States plus DC, PR, and VI +- Open access via [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/1.0/) +- Versioned by Delphi according to "issue" date. New issues are expected to be released roughly weekly. + +# The API + +The base URL is: https://delphi.cmu.edu/epidata/api.php + +See [this documentation](README.md) for details on specifying locations and dates. + +## Parameters + +### Required + +| Parameter | Description | Type | +| --- | --- | --- | +| `states` | two-letter state abbreviations | `list` of states | +| `dates` | dates | `list` of dates or date ranges | + +### Optional + +| Parameter | Description | Type | +| --- | --- | --- | +| `issues` | issues | `list` of "issue" dates or date ranges | + +If `issues` is not specified, then the most recent issue is used by default. + +## Response + +| Field | Description | Type | +| --- | --- | --- | +| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | +| `epidata` | list of results | array of objects | +| `epidata[].state` | state pertaining to this row | string | +| `epidata[].date` | date pertaining to this row | integer | +| `epidata[].issue` | the date on which the dataset containing this row was published | integer | +| `epidata[].*` | see the [data dictionary](https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-state-data-dictionary) | | +| `message` | `success` or error message | string | + +# Example URLs + +### MA on 2020-05-10 (per most recent issue) +https://delphi.cmu.edu/epidata/api.php?source=covid_hosp&states=MA&dates=20200510 + +```json +{ + "result": 1, + "epidata": [ + { + "state": "MA", + "issue": 20201116, + "date": 20200510, + "hospital_onset_covid": 53, + "hospital_onset_covid_coverage": 84, + "inpatient_beds": 15691, + "inpatient_beds_coverage": 73, + "inpatient_beds_used": 12427, + "inpatient_beds_used_coverage": 83, + "inpatient_beds_used_covid": 3625, + "inpatient_beds_used_covid_coverage": 84, + "previous_day_admission_adult_covid_confirmed": null, + "previous_day_admission_adult_covid_confirmed_coverage": 0, + "previous_day_admission_adult_covid_suspected": null, + "previous_day_admission_adult_covid_suspected_coverage": 0, + "previous_day_admission_pediatric_covid_confirmed": null, + "previous_day_admission_pediatric_covid_confirmed_coverage": 0, + "previous_day_admission_pediatric_covid_suspected": null, + "previous_day_admission_pediatric_covid_suspected_coverage": 0, + "staffed_adult_icu_bed_occupancy": null, + "staffed_adult_icu_bed_occupancy_coverage": 0, + "staffed_icu_adult_patients_confirmed_suspected_covid": null, + "staffed_icu_adult_patients_confirmed_suspected_covid_coverage": 0, + "staffed_icu_adult_patients_confirmed_covid": null, + "staffed_icu_adult_patients_confirmed_covid_coverage": 0, + "total_adult_patients_hosp_confirmed_suspected_covid": null, + "total_adult_patients_hosp_confirmed_suspected_covid_coverage": 0, + "total_adult_patients_hosp_confirmed_covid": null, + "total_adult_patients_hosp_confirmed_covid_coverage": 0, + "total_pediatric_patients_hosp_confirmed_suspected_covid": null, + "total_pediatric_patients_hosp_confirmed_suspected_covid_coverage": 0, + "total_pediatric_patients_hosp_confirmed_covid": null, + "total_pediatric_patients_hosp_confirmed_covid_coverage": 0, + "total_staffed_adult_icu_beds": null, + "total_staffed_adult_icu_beds_coverage": 0, + "inpatient_beds_utilization_coverage": 72, + "inpatient_beds_utilization_numerator": 10876, + "inpatient_beds_utilization_denominator": 15585, + "percent_of_inpatients_with_covid_coverage": 83, + "percent_of_inpatients_with_covid_numerator": 3607, + "percent_of_inpatients_with_covid_denominator": 12427, + "inpatient_bed_covid_utilization_coverage": 73, + "inpatient_bed_covid_utilization_numerator": 3304, + "inpatient_bed_covid_utilization_denominator": 15691, + "adult_icu_bed_covid_utilization_coverage": null, + "adult_icu_bed_covid_utilization_numerator": null, + "adult_icu_bed_covid_utilization_denominator": null, + "adult_icu_bed_utilization_coverage": null, + "adult_icu_bed_utilization_numerator": null, + "adult_icu_bed_utilization_denominator": null, + "inpatient_beds_utilization": 0.6978504972730191, + "percent_of_inpatients_with_covid": 0.2902550897239881, + "inpatient_bed_covid_utilization": 0.21056656682174496, + "adult_icu_bed_covid_utilization": null, + "adult_icu_bed_utilization": null + } + ], + "message": "success" +} +``` + + +# Code Samples + +Libraries are available for [CoffeeScript](../../src/client/delphi_epidata.coffee), [JavaScript](../../src/client/delphi_epidata.js), [Python](../../src/client/delphi_epidata.py), and [R](../../src/client/delphi_epidata.R). +The following sample shows how to import the library and fetch MA on 2020-05-10 +(per most recent issue). + +### Python + +Optionally install the package using pip(env): +````bash +pip install delphi-epidata +```` + +Otherwise, place `delphi_epidata.py` from this repo next to your python script. + +````python +# Import +from delphi_epidata import Epidata +# Fetch data +res = Epidata.covid_hosp('MA', 20200510) +print(res['result'], res['message'], len(res['epidata'])) +```` diff --git a/integrations/acquisition/covid_hosp/test_scenarios.py b/integrations/acquisition/covid_hosp/test_scenarios.py new file mode 100644 index 000000000..e088cf2ff --- /dev/null +++ b/integrations/acquisition/covid_hosp/test_scenarios.py @@ -0,0 +1,87 @@ +"""Integration tests for acquisition of COVID hospitalization.""" + +# standard library +from pathlib import Path +import unittest +from unittest.mock import MagicMock + +# first party +from delphi.epidata.acquisition.covid_hosp.database import Database +from delphi.epidata.acquisition.covid_hosp.test_utils import TestUtils +from delphi.epidata.client.delphi_epidata import Epidata +import delphi.operations.secrets as secrets + +# py3tester coverage target (equivalent to `import *`) +__test_target__ = 'delphi.epidata.acquisition.covid_hosp.update' + + +class AcquisitionTests(unittest.TestCase): + + def setUp(self): + """Perform per-test setup.""" + + # configure test data + path_to_repo_root = Path(__file__).parent.parent.parent.parent + self.test_utils = TestUtils(path_to_repo_root) + + # use the local instance of the Epidata API + Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + + # use the local instance of the epidata database + secrets.db.host = 'delphi_database_epidata' + secrets.db.epi = ('user', 'pass') + + # clear relevant tables + with Database.connect() as db: + with db.new_cursor() as cur: + cur.execute('truncate table covid_hosp') + cur.execute('truncate table covid_hosp_meta') + + def test_acquire_dataset(self): + """Acquire a new dataset.""" + + # only mock out network calls to external hosts + mock_network = MagicMock() + mock_network.fetch_metadata.return_value = \ + self.test_utils.load_sample_metadata() + mock_network.fetch_dataset.return_value = \ + self.test_utils.load_sample_dataset() + + # make sure the data does not yet exist + with self.subTest(name='no data yet'): + response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + self.assertEqual(response['result'], -2) + + # acquire sample data into local database + with self.subTest(name='first acquisition'): + acquired = Update.run(network_impl=mock_network) + self.assertTrue(acquired) + + # make sure the data now exists + with self.subTest(name='initial data checks'): + response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + self.assertEqual(response['result'], 1) + self.assertEqual(len(response['epidata']), 1) + row = response['epidata'][0] + self.assertEqual(row['state'], 'MA') + self.assertEqual(row['date'], 20200510) + self.assertEqual(row['issue'], 20201116) + self.assertEqual(row['hospital_onset_covid'], 53) + actual = row['inpatient_bed_covid_utilization'] + expected = 0.21056656682174496 + self.assertAlmostEqual(actual, expected) + self.assertIsNone(row['adult_icu_bed_utilization']) + + # expect 55 fields per row (56 database columns, except `id`) + self.assertEqual(len(row), 55) + + # re-acquisition of the same dataset should be a no-op + with self.subTest(name='second acquisition'): + acquired = Update.run(network_impl=mock_network) + self.assertFalse(acquired) + + # make sure the data still exists + with self.subTest(name='final data checks'): + response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + self.assertEqual(response['result'], 1) + self.assertEqual(len(response['epidata']), 1) diff --git a/integrations/server/test_covid_hosp.py b/integrations/server/test_covid_hosp.py new file mode 100644 index 000000000..cb16aa09f --- /dev/null +++ b/integrations/server/test_covid_hosp.py @@ -0,0 +1,78 @@ +"""Integration tests for the `covid_meta` endpoint.""" + +# standard library +import unittest + +# first party +from delphi.epidata.acquisition.covid_hosp.database import Database +from delphi.epidata.client.delphi_epidata import Epidata +import delphi.operations.secrets as secrets + + +class ServerTests(unittest.TestCase): + """Tests the `covid_meta` endpoint.""" + + def setUp(self): + """Perform per-test setup.""" + + # use the local instance of the Epidata API + Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + + # use the local instance of the epidata database + secrets.db.host = 'delphi_database_epidata' + secrets.db.epi = ('user', 'pass') + + # clear relevant tables + with Database.connect() as db: + with db.new_cursor() as cur: + cur.execute('truncate table covid_hosp') + cur.execute('truncate table covid_hosp_meta') + + def test_query_by_issue(self): + """Query with and without specifying an issue.""" + + # insert dummy data + def insert_issue(cur, issue, value): + so_many_nulls = ', '.join(['null'] * 51) + cur.execute(f'''insert into covid_hosp values ( + 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls} + )''') + with Database.connect() as db: + with db.new_cursor() as cur: + # inserting out of order to test server-side order by + insert_issue(cur, 20201201, 123) + insert_issue(cur, 20201203, 789) + insert_issue(cur, 20201202, 456) + + # request without issue (defaulting to latest issue) + with self.subTest(name='no issue (latest)'): + response = Epidata.covid_hosp('PA', 20201118) + + self.assertEqual(response['result'], 1) + self.assertEqual(len(response['epidata']), 1) + self.assertEqual(response['epidata'][0]['issue'], 20201203) + self.assertEqual(response['epidata'][0]['hospital_onset_covid'], 789) + + # request for specific issue + with self.subTest(name='specific single issue'): + response = Epidata.covid_hosp('PA', 20201118, issues=20201201) + + self.assertEqual(response['result'], 1) + self.assertEqual(len(response['epidata']), 1) + self.assertEqual(response['epidata'][0]['issue'], 20201201) + self.assertEqual(response['epidata'][0]['hospital_onset_covid'], 123) + + # request for multiple issues + with self.subTest(name='specific multiple issues'): + issues = Epidata.range(20201201, 20201231) + response = Epidata.covid_hosp('PA', 20201118, issues=issues) + + self.assertEqual(response['result'], 1) + self.assertEqual(len(response['epidata']), 3) + rows = response['epidata'] + self.assertEqual(rows[0]['issue'], 20201201) + self.assertEqual(rows[0]['hospital_onset_covid'], 123) + self.assertEqual(rows[1]['issue'], 20201202) + self.assertEqual(rows[1]['hospital_onset_covid'], 456) + self.assertEqual(rows[2]['issue'], 20201203) + self.assertEqual(rows[2]['hospital_onset_covid'], 789) diff --git a/src/acquisition/covid_hosp/README.md b/src/acquisition/covid_hosp/README.md new file mode 100644 index 000000000..8d6277e17 --- /dev/null +++ b/src/acquisition/covid_hosp/README.md @@ -0,0 +1,19 @@ +# COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries + +- Data source: + https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-state-timeseries +- Data dictionary: + https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-state-data-dictionary +- Geographic resolution: US States plus DC, VI, and PR +- Temporal resolution: daily +- First date: 2020-01-01 +- First issue: 2020-11-16 + +# acquisition overview + +1. Fetch the dataset's metadata in JSON format. +1. If the metadata's `revision_timestamp` already appears in the database, then + stop here; otherwise continue. +1. Download the dataset in CSV format as determined by the metadata's `url` + field. +1. In a single transaction, insert the metadata and the dataset into database. diff --git a/src/acquisition/covid_hosp/database.py b/src/acquisition/covid_hosp/database.py new file mode 100644 index 000000000..b4a118897 --- /dev/null +++ b/src/acquisition/covid_hosp/database.py @@ -0,0 +1,209 @@ +# standard library +from contextlib import contextmanager +import math + +# third party +import mysql.connector + +# first party +import delphi.operations.secrets as secrets + + +class Database: + + # These are the names that appear in the CSV header, in order of appeareance + # in the database table. However, note that the corresponding database column + # names may be shorter due to constraints on the length of column names. See + # /src/ddl/covid_hosp.sql for more information. + ORDERED_CSV_COLUMNS = [ + 'state', + 'date', + 'hospital_onset_covid', + 'hospital_onset_covid_coverage', + 'inpatient_beds', + 'inpatient_beds_coverage', + 'inpatient_beds_used', + 'inpatient_beds_used_coverage', + 'inpatient_beds_used_covid', + 'inpatient_beds_used_covid_coverage', + 'previous_day_admission_adult_covid_confirmed', + 'previous_day_admission_adult_covid_confirmed_coverage', + 'previous_day_admission_adult_covid_suspected', + 'previous_day_admission_adult_covid_suspected_coverage', + 'previous_day_admission_pediatric_covid_confirmed', + 'previous_day_admission_pediatric_covid_confirmed_coverage', + 'previous_day_admission_pediatric_covid_suspected', + 'previous_day_admission_pediatric_covid_suspected_coverage', + 'staffed_adult_icu_bed_occupancy', + 'staffed_adult_icu_bed_occupancy_coverage', + 'staffed_icu_adult_patients_confirmed_and_suspected_covid', + 'staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', + 'staffed_icu_adult_patients_confirmed_covid', + 'staffed_icu_adult_patients_confirmed_covid_coverage', + 'total_adult_patients_hospitalized_confirmed_and_suspected_covid', + 'total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', + 'total_adult_patients_hospitalized_confirmed_covid', + 'total_adult_patients_hospitalized_confirmed_covid_coverage', + 'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', + 'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', + 'total_pediatric_patients_hospitalized_confirmed_covid', + 'total_pediatric_patients_hospitalized_confirmed_covid_coverage', + 'total_staffed_adult_icu_beds', + 'total_staffed_adult_icu_beds_coverage', + 'inpatient_beds_utilization', + 'inpatient_beds_utilization_coverage', + 'inpatient_beds_utilization_numerator', + 'inpatient_beds_utilization_denominator', + 'percent_of_inpatients_with_covid', + 'percent_of_inpatients_with_covid_coverage', + 'percent_of_inpatients_with_covid_numerator', + 'percent_of_inpatients_with_covid_denominator', + 'inpatient_bed_covid_utilization', + 'inpatient_bed_covid_utilization_coverage', + 'inpatient_bed_covid_utilization_numerator', + 'inpatient_bed_covid_utilization_denominator', + 'adult_icu_bed_covid_utilization', + 'adult_icu_bed_covid_utilization_coverage', + 'adult_icu_bed_covid_utilization_numerator', + 'adult_icu_bed_covid_utilization_denominator', + 'adult_icu_bed_utilization', + 'adult_icu_bed_utilization_coverage', + 'adult_icu_bed_utilization_numerator', + 'adult_icu_bed_utilization_denominator', + ] + + def __init__(self, connection): + """Create a new Database object. + + Parameters + ---------- + connection + An open connection to a database. + """ + + self.connection = connection + + @contextmanager + def connect(mysql_connector_impl=mysql.connector): + """Connect to a database and provide the connection as a context manager. + + As long as the context manager exits normally, the connection's transaction + will be committed. Otherwise, if the context is exited by an Exception, the + transaction will be rolled back. + + In any case, the connection will be gracefully closed upon exiting the + context manager. + """ + + # connect to the database + user, password = secrets.db.epi + connection = mysql_connector_impl.connect( + host=secrets.db.host, + user=user, + password=password, + database='epidata') + + try: + # provide the connection to the context manager + yield Database(connection) + + # rollback by default; the following commit will only take place if no + # exception was raised in calling code + connection.commit() + finally: + # close the connection in any case + connection.close() + + @contextmanager + def new_cursor(self): + """Create and provide a database cursor as a context manager. + + The cursor will be gracefully closed upon exiting the context manager. + """ + + cursor = self.connection.cursor() + try: + yield cursor + finally: + cursor.close() + + def contains_revision(self, revision): + """Return whether the given revision already exists in the database. + + Parameters + ---------- + revision : str + Unique revision string. + + Returns + ------- + bool + True iff the revision already exists. + """ + + with self.new_cursor() as cursor: + cursor.execute(''' + SELECT + count(1) > 0 + FROM + `covid_hosp_meta` + WHERE + `revision_timestamp` = %s + ''', (revision,)) + for (result,) in cursor: + return bool(result) + + def insert_metadata(self, issue, revision, meta_json): + """Add revision metadata to the database. + + Parameters + ---------- + issue : int + Issue of the dataset in YYYYMMDD format. + revision : str + Unique revision string. + meta_json : str + Metadata serialized as a JSON string. + """ + + with self.new_cursor() as cursor: + cursor.execute(''' + INSERT INTO + `covid_hosp_meta` ( + `issue`, + `revision_timestamp`, + `metadata_json`, + `acquisition_datetime` + ) + VALUES + (%s, %s, %s, NOW()) + ''', (issue, revision, meta_json)) + + def insert_dataset(self, issue, dataframe): + """Add revision metadata to the database. + + Parameters + ---------- + issue : int + Issue of the dataset in YYYYMMDD format. + dataframe : pandas.DataFrame + The dataset. + """ + + # the database requires `nan` to be converted to `None` for `NULL` values + dataframe = dataframe.replace({math.nan: None}) + + num_columns = 2 + len(Database.ORDERED_CSV_COLUMNS) + value_placeholders = ', '.join(['%s'] * num_columns) + sql = f''' + INSERT INTO + `covid_hosp` + VALUES + ({value_placeholders}) + ''' + + id_and_issue = (0, issue) + with self.new_cursor() as cursor: + for _, row in dataframe.iterrows(): + values = tuple(row[name] for name in Database.ORDERED_CSV_COLUMNS) + cursor.execute(sql, id_and_issue + values) diff --git a/src/acquisition/covid_hosp/network.py b/src/acquisition/covid_hosp/network.py new file mode 100644 index 000000000..809df54cc --- /dev/null +++ b/src/acquisition/covid_hosp/network.py @@ -0,0 +1,40 @@ +# third party +import pandas +import requests + + +class Network: + + METADATA_URL = ( + 'https://healthdata.gov/api/3/action/package_show' + '?id=83b4a668-9321-4d8c-bc4f-2bef66c49050&page=0' + ) + + def fetch_metadata(requests_impl=requests): + """Download and return metadata. + + Returns + ------- + object + The metadata object. + """ + + print(f'fetching metadata at {Network.METADATA_URL}') + return requests_impl.get(Network.METADATA_URL).json() + + def fetch_dataset(url, pandas_impl=pandas): + """Download and return a dataset. + + Parameters + ---------- + url : str + URL to the dataset in CSV format. + + Returns + ------- + pandas.DataFrame + The dataset. + """ + + print(f'fetching dataset at {url}') + return pandas_impl.read_csv(url) diff --git a/src/acquisition/covid_hosp/test_utils.py b/src/acquisition/covid_hosp/test_utils.py new file mode 100644 index 000000000..2c11fe889 --- /dev/null +++ b/src/acquisition/covid_hosp/test_utils.py @@ -0,0 +1,31 @@ +"""Utility functions only used in tests. + +This code is not used in production. + +The functions in this file are used by both unit and integration tests. +However, unit tests can't import code that lives in integration tests, and vice +versa. As a result, common test code has to live under the top-level `/src` +dir, hence the existence of this file. +""" + +# standard library +import json +from pathlib import Path + +# third party +import pandas + + +class TestUtils: + + def __init__(self, path_to_repo_root): + self.data_dir = ( + Path(path_to_repo_root) / 'testdata/acquisition/covid_hosp/' + ).resolve() + + def load_sample_metadata(self): + with open(self.data_dir / 'metadata.json', 'rb') as f: + return json.loads(f.read().decode('utf-8')) + + def load_sample_dataset(self): + return pandas.read_csv(self.data_dir / 'dataset.csv') diff --git a/src/acquisition/covid_hosp/update.py b/src/acquisition/covid_hosp/update.py new file mode 100644 index 000000000..fc2207011 --- /dev/null +++ b/src/acquisition/covid_hosp/update.py @@ -0,0 +1,176 @@ +""" +Acquires the "COVID-19 Reported Patient Impact and Hospital Capacity by State +Timeseries" dataset provided by the US Department of Health & Human Services +via healthdata.gov. +""" + +# standard library +import json +import re + +# first party +from delphi.epidata.acquisition.covid_hosp.database import Database +from delphi.epidata.acquisition.covid_hosp.network import Network + + +class UpdateException(Exception): + """Exception raised exclusively by the Update class.""" + + +class Update: + + # regex to extract issue date from revision field + # example revision: "Mon, 11/16/2020 - 00:55" + REVISION_PATTERN = re.compile(r'^.*\s(\d+)/(\d+)/(\d+)\s.*$') + + def get_entry(obj, *path): + """Get a deeply nested field from an arbitrary object. + + Parameters + ---------- + obj : dict + The object to traverse. + path : tuple of names and indices + Path to the desired field in the object. + + Returns + ------- + object + The nested object. + + Raises + ------ + UpdateException + If the field can't be found. + """ + + try: + for elem in path: + is_index = isinstance(elem, int) + is_list = isinstance(obj, list) + if is_index != is_list: + raise UpdateException('index given for non-list or vice versa') + obj = obj[elem] + return obj + except Exception as ex: + path_str = '/'.join(map(str, path)) + msg = f'unable to access object path "/{path_str}"' + raise UpdateException(msg) from ex + + def get_issue_from_revision(revision): + """Extract and return an issue from a revision string. + + Parameters + ---------- + revision : str + The free-form revision string. + + Returns + ------- + int + The issue in YYYYMMDD format. + + Raises + ------ + UpdateException + If the issue can't be extracted. + """ + + match = Update.REVISION_PATTERN.match(revision) + if not match: + raise UpdateException(f'unable to extract issue from "{revision}"') + y, m, d = match.group(3), match.group(1), match.group(2) + return int(y) * 10000 + int(m) * 100 + int(d) + + def get_date_as_int(date): + """Convert a YYYY-MM-DD date from a string to a YYYYMMDD int. + + Parameters + ---------- + date : str + Date in YYYY-MM-DD format. + + Returns + ------- + int + Date in YYYYMMDD format. + """ + + return int(date.replace('-', '')) + + def extract_resource_details(metadata): + """Extract resource details, like URL and revision, from metadata. + + Parameters + ---------- + metadata : dict + Metadata object as returned from healthcare.gov. + + Returns + ------- + url : str + URL of the dataset. + revision : str + Free-form revision timestamp of the dataset. + + Raises + ------ + UpdateException + If the metadata does not match the expected format. + """ + + # check data integrity + if Update.get_entry(metadata, 'success') is not True: + raise UpdateException('metadata does not have `success` equal to `True`') + if len(Update.get_entry(metadata, 'result')) != 1: + raise UpdateException('metadata does not have exactly 1 result') + if len(Update.get_entry(metadata, 'result', 0, 'resources')) != 1: + raise UpdateException('metadata does not have exactly 1 resource') + + # return resource details + resource = Update.get_entry(metadata, 'result', 0, 'resources', 0) + return resource['url'], resource['revision_timestamp'] + + def run(database_impl=Database, network_impl=Network): + """Acquire the most recent dataset, unless it was previously acquired. + + Returns + ------- + bool + Whether a new dataset was acquired. + """ + + # get dataset details from metadata + metadata = network_impl.fetch_metadata() + url, revision = Update.extract_resource_details(metadata) + issue = Update.get_issue_from_revision(revision) + print(f'issue: {issue}') + print(f'revision: {revision}') + + # connect to the database + with database_impl.connect() as database: + + # bail if the dataset has already been acquired + if database.contains_revision(revision): + print('already have this revision, nothing to do') + return False + + # add metadata to the database + metadata_json = json.dumps(metadata) + database.insert_metadata(issue, revision, metadata_json) + + # download the dataset and add it to the database + # the date column needs to be reformatted as an int to match the table + # definition + dataset = network_impl.fetch_dataset(url) + dataset['date'] = dataset['date'].apply(Update.get_date_as_int) + database.insert_dataset(issue, dataset) + + print(f'successfully acquired {len(dataset)} rows') + + # note that the transaction is committed by exiting the `with` block + return True + + +# main entry point +(Update.run if __name__ == '__main__' else lambda: None)() diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 05f73fb59..0aad39c65 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -547,6 +547,25 @@ Epidata <- (function() { return(.request(list(source='covidcast_meta', cached='true'))) } + # Fetch COVID hospitalization data + covid_hosp <- function(states, dates, issues) { + # Check parameters + if(missing(states) || missing(dates)) { + stop('`states` and `dates` are both required') + } + # Set up request + params <- list( + source = 'covid_hosp', + states = .list(states), + dates = .list(dates) + ) + if(!missing(issues)) { + params$issues <- .list(issues) + } + # Make the API call + return(.request(params)) + } + # Export the public methods return(list( range = range, @@ -573,6 +592,7 @@ Epidata <- (function() { dengue_nowcast = dengue_nowcast, meta = meta, covidcast = covidcast, - covidcast_meta = covidcast_meta + covidcast_meta = covidcast_meta, + covid_hosp = covid_hosp )) })() diff --git a/src/client/delphi_epidata.coffee b/src/client/delphi_epidata.coffee index b742705e9..9695896d9 100644 --- a/src/client/delphi_epidata.coffee +++ b/src/client/delphi_epidata.coffee @@ -389,5 +389,20 @@ class Epidata @covidcast_meta: (callback) -> _request(callback, {'source': 'covidcast_meta'}) + # Fetch COVID hospitalization data + @covid_hosp: (callback, states, dates, issues) -> + # Check parameters + unless states? and dates? + throw { msg: '`states` and `dates` are both required' } + # Set up request + params = + 'source': 'covid_hosp' + 'states': _list(states) + 'dates': _list(dates) + if issues? + params.issues = _list(issues) + # Make the API call + _request(callback, params) + # Export the API to the global environment (exports ? window).Epidata = Epidata diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js index 344487375..39794385a 100644 --- a/src/client/delphi_epidata.js +++ b/src/client/delphi_epidata.js @@ -1,6 +1,8 @@ "use strict"; -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } +function _instanceof(left, right) { if (right != null && typeof Symbol !== "undefined" && right[Symbol.hasInstance]) { return !!right[Symbol.hasInstance](left); } else { return left instanceof right; } } + +function _classCallCheck(instance, Constructor) { if (!_instanceof(instance, Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } @@ -608,6 +610,32 @@ function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _d return _request(callback, { 'source': 'covidcast_meta' }); + } // Fetch COVID hospitalization data + + }, { + key: "covid_hosp", + value: function covid_hosp(callback, states, dates, issues) { + var params; // Check parameters + + if (!(states != null && dates != null)) { + throw { + msg: '`states` and `dates` are both required' + }; + } // Set up request + + + params = { + 'source': 'covid_hosp', + 'states': _list(states), + 'dates': _list(dates) + }; + + if (issues != null) { + params.issues = _list(issues); + } // Make the API call + + + return _request(callback, params); } }]); @@ -690,4 +718,4 @@ function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _d (typeof exports !== "undefined" && exports !== null ? exports : window).Epidata = Epidata; -}).call(void 0); \ No newline at end of file +}).call(void 0); diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index 25f11598f..7f579e12d 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -598,3 +598,21 @@ def covidcast( def covidcast_meta(): """Fetch Delphi's COVID-19 Surveillance Streams metadata""" return Epidata._request({'source': 'covidcast_meta'}) + + # Fetch COVID hospitalization data + @staticmethod + def covid_hosp(states, dates, issues=None): + """Fetch COVID hospitalization data.""" + # Check parameters + if states is None or dates is None: + raise Exception('`states` and `dates` are both required') + # Set up request + params = { + 'source': 'covid_hosp', + 'states': Epidata._list(states), + 'dates': Epidata._list(dates), + } + if issues is not None: + params['issues'] = Epidata._list(issues) + # Make the API call + return Epidata._request(params) diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql new file mode 100644 index 000000000..e1e45aa15 --- /dev/null +++ b/src/ddl/covid_hosp.sql @@ -0,0 +1,404 @@ +/* +These tables store a mirror of the "COVID-19 Reported Patient Impact and +Hospital Capacity by State Timeseries" dataset provided by the US Department of +Health & Human Services via healthdata.gov. + +See +https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-state-timeseries +for more information. +*/ + + +/* +`covid_hosp_meta` stores metadata about each version of the dataset. + +Data is public. However, it will likely only be used internally and will not be +surfaced through the Epidata API. + ++----------------------+---------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++----------------------+---------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| issue | int(11) | NO | | NULL | | +| revision_timestamp | varchar(1024) | NO | UNI | NULL | | +| metadata_json | longtext | NO | | NULL | | +| acquisition_datetime | datetime | NO | | NULL | | ++----------------------+---------------+------+-----+---------+----------------+ + +- `id` + unique identifier for each record +- `issue` + the day (YYYYMMDD) that the dataset was published +- `revision_timestamp` + free-form text field indicating when the dataset was last revised; will + generally contain some type of timestamp, although the format is not + well-defined. copied from the metadata object. +- `metadata_json` + a JSON blob containing verbatim metadata as returned by healthdata.gov +- `acquisition_datetime` + datetime when the dataset was acquired by delphi +*/ + +CREATE TABLE `covid_hosp_meta` ( + `id` INT NOT NULL AUTO_INCREMENT, + `issue` INT NOT NULL, + `revision_timestamp` VARCHAR(1024) NOT NULL, + `metadata_json` JSON NOT NULL, + `acquisition_datetime` DATETIME NOT NULL, + PRIMARY KEY (`id`), + -- for uniqueness + -- for fast lookup of a particular revision + UNIQUE KEY (`revision_timestamp`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + + +/* +`covid_hosp` stores the versioned dataset. + +Data is public under the Open Data Commons Open Database License (ODbL). + ++------------------------------------------------------------------+---------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++------------------------------------------------------------------+---------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| issue | int(11) | NO | MUL | NULL | | +| state | char(2) | NO | MUL | NULL | | +| date | int(11) | NO | | NULL | | +| hospital_onset_covid | int(11) | YES | | NULL | | +| hospital_onset_covid_coverage | int(11) | YES | | NULL | | +| inpatient_beds | int(11) | YES | | NULL | | +| inpatient_beds_coverage | int(11) | YES | | NULL | | +| inpatient_beds_used | int(11) | YES | | NULL | | +| inpatient_beds_used_coverage | int(11) | YES | | NULL | | +| inpatient_beds_used_covid | int(11) | YES | | NULL | | +| inpatient_beds_used_covid_coverage | int(11) | YES | | NULL | | +| previous_day_admission_adult_covid_confirmed | int(11) | YES | | NULL | | +| previous_day_admission_adult_covid_confirmed_coverage | int(11) | YES | | NULL | | +| previous_day_admission_adult_covid_suspected | int(11) | YES | | NULL | | +| previous_day_admission_adult_covid_suspected_coverage | int(11) | YES | | NULL | | +| previous_day_admission_pediatric_covid_confirmed | int(11) | YES | | NULL | | +| previous_day_admission_pediatric_covid_confirmed_coverage | int(11) | YES | | NULL | | +| previous_day_admission_pediatric_covid_suspected | int(11) | YES | | NULL | | +| previous_day_admission_pediatric_covid_suspected_coverage | int(11) | YES | | NULL | | +| staffed_adult_icu_bed_occupancy | int(11) | YES | | NULL | | +| staffed_adult_icu_bed_occupancy_coverage | int(11) | YES | | NULL | | +| staffed_icu_adult_patients_confirmed_suspected_covid | int(11) | YES | | NULL | | +| staffed_icu_adult_patients_confirmed_suspected_covid_coverage | int(11) | YES | | NULL | | +| staffed_icu_adult_patients_confirmed_covid | int(11) | YES | | NULL | | +| staffed_icu_adult_patients_confirmed_covid_coverage | int(11) | YES | | NULL | | +| total_adult_patients_hosp_confirmed_suspected_covid | int(11) | YES | | NULL | | +| total_adult_patients_hosp_confirmed_suspected_covid_coverage | int(11) | YES | | NULL | | +| total_adult_patients_hosp_confirmed_covid | int(11) | YES | | NULL | | +| total_adult_patients_hosp_confirmed_covid_coverage | int(11) | YES | | NULL | | +| total_pediatric_patients_hosp_confirmed_suspected_covid | int(11) | YES | | NULL | | +| total_pediatric_patients_hosp_confirmed_suspected_covid_coverage | int(11) | YES | | NULL | | +| total_pediatric_patients_hosp_confirmed_covid | int(11) | YES | | NULL | | +| total_pediatric_patients_hosp_confirmed_covid_coverage | int(11) | YES | | NULL | | +| total_staffed_adult_icu_beds | int(11) | YES | | NULL | | +| total_staffed_adult_icu_beds_coverage | int(11) | YES | | NULL | | +| inpatient_beds_utilization | double | YES | | NULL | | +| inpatient_beds_utilization_coverage | int(11) | YES | | NULL | | +| inpatient_beds_utilization_numerator | int(11) | YES | | NULL | | +| inpatient_beds_utilization_denominator | int(11) | YES | | NULL | | +| percent_of_inpatients_with_covid | double | YES | | NULL | | +| percent_of_inpatients_with_covid_coverage | int(11) | YES | | NULL | | +| percent_of_inpatients_with_covid_numerator | int(11) | YES | | NULL | | +| percent_of_inpatients_with_covid_denominator | int(11) | YES | | NULL | | +| inpatient_bed_covid_utilization | double | YES | | NULL | | +| inpatient_bed_covid_utilization_coverage | int(11) | YES | | NULL | | +| inpatient_bed_covid_utilization_numerator | int(11) | YES | | NULL | | +| inpatient_bed_covid_utilization_denominator | int(11) | YES | | NULL | | +| adult_icu_bed_covid_utilization | double | YES | | NULL | | +| adult_icu_bed_covid_utilization_coverage | int(11) | YES | | NULL | | +| adult_icu_bed_covid_utilization_numerator | int(11) | YES | | NULL | | +| adult_icu_bed_covid_utilization_denominator | int(11) | YES | | NULL | | +| adult_icu_bed_utilization | double | YES | | NULL | | +| adult_icu_bed_utilization_coverage | int(11) | YES | | NULL | | +| adult_icu_bed_utilization_numerator | int(11) | YES | | NULL | | +| adult_icu_bed_utilization_denominator | int(11) | YES | | NULL | | ++------------------------------------------------------------------+---------+------+-----+---------+----------------+ + +- `id` + unique identifier for each record +- `issue` + the day (YYYYMMDD) that the dataset was published +- `date` + the day (YYYYMMDD) to which the data applies + +NOTE: Names have been shortened to 64 characters, as this is a technical +limitation of the database. Affected names are: + +staffed_icu_adult_patients_confirmed_and_suspected_covid -> + staffed_icu_adult_patients_confirmed_suspected_covid +staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage -> + staffed_icu_adult_patients_confirmed_suspected_covid_coverage +total_adult_patients_hospitalized_confirmed_and_suspected_covid -> + total_adult_patients_hosp_confirmed_suspected_covid +total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage -> + total_adult_patients_hosp_confirmed_suspected_covid_coverage +total_adult_patients_hospitalized_confirmed_covid -> + total_adult_patients_hosp_confirmed_covid +total_adult_patients_hospitalized_confirmed_covid_coverage -> + total_adult_patients_hosp_confirmed_covid_coverage +total_pediatric_patients_hospitalized_confirmed_and_suspected_covid -> + total_pediatric_patients_hosp_confirmed_suspected_covid +total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage -> + total_pediatric_patients_hosp_confirmed_suspected_covid_coverage +total_pediatric_patients_hospitalized_confirmed_covid -> + total_pediatric_patients_hosp_confirmed_covid +total_pediatric_patients_hospitalized_confirmed_covid_coverage -> + total_pediatric_patients_hosp_confirmed_covid_coverage + +NOTE: the following data dictionary is copied from +https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-state-data-dictionary +version entitled "November 16, 2020 release 2.3". + +- `state` + The two digit state code +- `hospital_onset_covid` + Total current inpatients with onset of suspected or laboratory-confirmed + COVID-19 fourteen or more days after admission for a condition other than + COVID-19 in this state. +- `hospital_onset_covid_coverage` + Number of hospitals reporting "hospital_onset_covid" in this state +- `inpatient_beds` + Reported total number of staffed inpatient beds including all overflow and + surge/expansion beds used for inpatients (includes all ICU beds) in this + state +- `inpatient_beds_coverage` + Number of hospitals reporting "inpatient_beds" in this state +- `inpatient_beds_used` + Reported total number of staffed inpatient beds that are occupied in this + state +- `inpatient_beds_used_coverage` + Number of hospitals reporting "inpatient_beds_used" in this state +- `inpatient_beds_used_covid` + Reported patients currently hospitalized in an inpatient bed who have + suspected or confirmed COVID-19 in this state +- `inpatient_beds_used_covid_coverage` + Number of hospitals reporting "inpatient_beds_used_covid" in this state +- `previous_day_admission_adult_covid_confirmed` + Number of patients who were admitted to an adult inpatient bed on the + previous calendar day who had confirmed COVID-19 at the time of admission in + this state +- `previous_day_admission_adult_covid_confirmed_coverage` + Number of hospitals reporting "previous_day_admission_adult_covid_confirmed" + in this state +- `previous_day_admission_adult_covid_suspected` + Number of patients who were admitted to an adult inpatient bed on the + previous calendar day who had suspected COVID-19 at the time of admission in + this state +- `previous_day_admission_adult_covid_suspected_coverage` + Number of hospitals reporting "previous_day_admission_adult_covid_suspected" + in this state +- `previous_day_admission_pediatric_covid_confirmed` + Number of pediatric patients who were admitted to an inpatient bed, including + NICU, PICU, newborn, and nursery, on the previous calendar day who had + confirmed COVID-19 at the time of admission in this state +- `previous_day_admission_pediatric_covid_confirmed_coverage` + Number of hospitals reporting + "previous_day_admission_pediatric_covid_confirmed" in this state +- `previous_day_admission_pediatric_covid_suspected` + Number of pediatric patients who were admitted to an inpatient bed, including + NICU, PICU, newborn, and nursery, on the previous calendar day who had + suspected COVID-19 at the time of admission in this state +- `previous_day_admission_pediatric_covid_suspected_coverage` + Number of hospitals reporting + "previous_day_admission_pediatric_covid_suspected" in this state +- `staffed_adult_icu_bed_occupancy` + Reported total number of staffed inpatient adult ICU beds that are occupied + in this state +- `staffed_adult_icu_bed_occupancy_coverage` + Number of hospitals reporting "staffed_adult_icu_bed_occupancy" in this state +- `staffed_icu_adult_patients_confirmed_and_suspected_covid` + Reported patients currently hospitalized in an adult ICU bed who have + suspected or confirmed COVID-19 in this state +- `staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage` + Number of hospitals reporting + "staffed_icu_adult_patients_confirmed_and_suspected_covid" in this state +- `staffed_icu_adult_patients_confirmed_covid` + Reported patients currently hospitalized in an adult ICU bed who have + confirmed COVID-19 in this state +- `staffed_icu_adult_patients_confirmed_covid_coverage` + Number of hospitals reporting "staffed_icu_adult_patients_confirmed_covid" in + this state +- `total_adult_patients_hospitalized_confirmed_and_suspected_covid` + Reported patients currently hospitalized in an adult inpatient bed who have + laboratory-confirmed or suspected COVID-19. This include those in observation + beds. +- `total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage` + Number of hospitals reporting + "total_adult_patients_hospitalized_confirmed_and_suspected_covid" in this + state +- `total_adult_patients_hospitalized_confirmed_covid` + Reported patients currently hospitalized in an adult inpatient bed who have + laboratory-confirmed COVID-19. This include those in observation beds. +- `total_adult_patients_hospitalized_confirmed_covid_coverage` + Number of hospitals reporting + "total_adult_patients_hospitalized_confirmed_covid" in this state +- `total_pediatric_patients_hospitalized_confirmed_and_suspected_covid` + Reported patients currently hospitalized in a pediatric inpatient bed, + including NICU, newborn, and nursery, who are suspected or + laboratory-confirmed-positive for COVID-19. This include those in observation + beds. +- `total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage` + Number of hospitals reporting + "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid" in this + state +- `total_pediatric_patients_hospitalized_confirmed_covid` + Reported patients currently hospitalized in a pediatric inpatient bed, + including NICU, newborn, and nursery, who are laboratory-confirmed-positive + for COVID-19. This include those in observation beds. +- `total_pediatric_patients_hospitalized_confirmed_covid_coverage` + Number of hospitals reporting + "total_pediatric_patients_hospitalized_confirmed_covid" in this state +- `total_staffed_adult_icu_beds` + Reported total number of staffed inpatient adult ICU beds in this state +- `total_staffed_adult_icu_beds_coverage` + Number of hospitals reporting "total_staffed_adult_icu_beds" in this state +- `inpatient_beds_utilization` + Percentage of inpatient beds that are being utilized in this state. This + number only accounts for hospitals in the state that report both + "inpatient_beds_used" and "inpatient_beds" fields. +- `inpatient_beds_utilization_coverage` + Number of hospitals reporting both "inpatient_beds_used" and "inpatient_beds" +- `inpatient_beds_utilization_numerator` + Sum of "inpatient_beds_used" for hospitals reporting both + "inpatient_beds_used" and "inpatient_beds" +- `inpatient_beds_utilization_denominator` + Sum of "inpatient_beds" for hospitals reporting both "inpatient_beds_used" + and "inpatient_beds" +- `percent_of_inpatients_with_covid` + Percentage of inpatient population who have suspected or confirmed COVID-19 + in this state. This number only accounts for hospitals in the state that + report both "inpatient_beds_used_covid" and "inpatient_beds_used" fields. +- `percent_of_inpatients_with_covid_coverage` + Number of hospitals reporting both "inpatient_beds_used_covid" and + "inpatient_beds_used". +- `percent_of_inpatients_with_covid_numerator` + Sum of "inpatient_beds_used_covid" for hospitals reporting both + "inpatient_beds_used_covid" and "inpatient_beds_used". +- `percent_of_inpatients_with_covid_denominator` + Sum of "inpatient_beds_used" for hospitals reporting both + "inpatient_beds_used_covid" and "inpatient_beds_used". +- `inpatient_bed_covid_utilization` + Percentage of total (used/available) inpatient beds currently utilized by + patients who have suspected or confirmed COVID-19 in this state. This number + only accounts for hospitals in the state that report both + "inpatient_beds_used_covid" and "inpatient_beds" fields. +- `inpatient_bed_covid_utilization_coverage` + Number of hospitals reporting both "inpatient_beds_used_covid" and + "inpatient_beds". +- `inpatient_bed_covid_utilization_numerator` + Sum of "inpatient_beds_used_covid" for hospitals reporting both + "inpatient_beds_used_covid" and "inpatient_beds". +- `inpatient_bed_covid_utilization_denominator` + Sum of "inpatient_beds" for hospitals reporting both + "inpatient_beds_used_covid" and "inpatient_beds". +- `adult_icu_bed_covid_utilization` + Percentage of total staffed adult ICU beds currently utilized by patients who + have suspected or confirmed COVID-19 in this state. This number only accounts + for hospitals in the state that report both + "staffed_icu_adult_patients_confirmed_and_suspected_covid" and + "total_staffed_adult_icu_beds" fields. +- `adult_icu_bed_covid_utilization_coverage` + Number of hospitals reporting both both + "staffed_icu_adult_patients_confirmed_and_suspected_covid" and + "total_staffed_adult_icu_beds". +- `adult_icu_bed_covid_utilization_numerator` + Sum of "staffed_icu_adult_patients_confirmed_and_suspected_covid" for + hospitals reporting both + "staffed_icu_adult_patients_confirmed_and_suspected_covid" and + "total_staffed_adult_icu_beds". +- `adult_icu_bed_covid_utilization_denominator` + Sum of "total_staffed_adult_icu_beds" for hospitals reporting both + "staffed_icu_adult_patients_confirmed_and_suspected_covid" and + "total_staffed_adult_icu_beds". +- `adult_icu_bed_utilization` + Percentage of staffed adult ICU beds that are being utilized in this state. + This number only accounts for hospitals in the state that report both + "staffed_adult_icu_bed_occupancy" and "total_staffed_adult_icu_beds" fields. +- `adult_icu_bed_utilization_coverage` + Number of hospitals reporting both both "staffed_adult_icu_bed_occupancy" and + "total_staffed_adult_icu_beds". +- `adult_icu_bed_utilization_numerator` + Sum of "staffed_adult_icu_bed_occupancy" for hospitals reporting both + "staffed_adult_icu_bed_occupancy" and "total_staffed_adult_icu_beds". +- `adult_icu_bed_utilization_denominator` + Sum of "total_staffed_adult_icu_beds" for hospitals reporting both + "staffed_adult_icu_bed_occupancy" and "total_staffed_adult_icu_beds". + +NOTE: the following field is defined in the data dictionary but does not +actually appear in the dataset. + +- `reporting_cutoff_start` + Look back date start - The latest reports from each hospital is summed for + this report starting with this date. +*/ + +CREATE TABLE `covid_hosp` ( + `id` INT NOT NULL AUTO_INCREMENT, + `issue` INT NOT NULL, + `state` CHAR(2) NOT NULL, + `date` INT NOT NULL, + `hospital_onset_covid` INT, + `hospital_onset_covid_coverage` INT, + `inpatient_beds` INT, + `inpatient_beds_coverage` INT, + `inpatient_beds_used` INT, + `inpatient_beds_used_coverage` INT, + `inpatient_beds_used_covid` INT, + `inpatient_beds_used_covid_coverage` INT, + `previous_day_admission_adult_covid_confirmed` INT, + `previous_day_admission_adult_covid_confirmed_coverage` INT, + `previous_day_admission_adult_covid_suspected` INT, + `previous_day_admission_adult_covid_suspected_coverage` INT, + `previous_day_admission_pediatric_covid_confirmed` INT, + `previous_day_admission_pediatric_covid_confirmed_coverage` INT, + `previous_day_admission_pediatric_covid_suspected` INT, + `previous_day_admission_pediatric_covid_suspected_coverage` INT, + `staffed_adult_icu_bed_occupancy` INT, + `staffed_adult_icu_bed_occupancy_coverage` INT, + `staffed_icu_adult_patients_confirmed_suspected_covid` INT, + `staffed_icu_adult_patients_confirmed_suspected_covid_coverage` INT, + `staffed_icu_adult_patients_confirmed_covid` INT, + `staffed_icu_adult_patients_confirmed_covid_coverage` INT, + `total_adult_patients_hosp_confirmed_suspected_covid` INT, + `total_adult_patients_hosp_confirmed_suspected_covid_coverage` INT, + `total_adult_patients_hosp_confirmed_covid` INT, + `total_adult_patients_hosp_confirmed_covid_coverage` INT, + `total_pediatric_patients_hosp_confirmed_suspected_covid` INT, + `total_pediatric_patients_hosp_confirmed_suspected_covid_coverage` INT, + `total_pediatric_patients_hosp_confirmed_covid` INT, + `total_pediatric_patients_hosp_confirmed_covid_coverage` INT, + `total_staffed_adult_icu_beds` INT, + `total_staffed_adult_icu_beds_coverage` INT, + `inpatient_beds_utilization` DOUBLE, + `inpatient_beds_utilization_coverage` INT, + `inpatient_beds_utilization_numerator` INT, + `inpatient_beds_utilization_denominator` INT, + `percent_of_inpatients_with_covid` DOUBLE, + `percent_of_inpatients_with_covid_coverage` INT, + `percent_of_inpatients_with_covid_numerator` INT, + `percent_of_inpatients_with_covid_denominator` INT, + `inpatient_bed_covid_utilization` DOUBLE, + `inpatient_bed_covid_utilization_coverage` INT, + `inpatient_bed_covid_utilization_numerator` INT, + `inpatient_bed_covid_utilization_denominator` INT, + `adult_icu_bed_covid_utilization` DOUBLE, + `adult_icu_bed_covid_utilization_coverage` INT, + `adult_icu_bed_covid_utilization_numerator` INT, + `adult_icu_bed_covid_utilization_denominator` INT, + `adult_icu_bed_utilization` DOUBLE, + `adult_icu_bed_utilization_coverage` INT, + `adult_icu_bed_utilization_numerator` INT, + `adult_icu_bed_utilization_denominator` INT, + PRIMARY KEY (`id`), + -- for uniqueness + -- for fast lookup of most recent issue for a given state and date + UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`), + -- for fast lookup of a time-series for a given state and issue + KEY `date_by_issue_and_state` (`issue`, `state`, `date`), + -- for fast lookup of all states for a given date and issue + KEY `state_by_issue_and_date` (`issue`, `date`, `state`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/src/server/api.php b/src/server/api.php index fd9a8bf72..2a647bba1 100644 --- a/src/server/api.php +++ b/src/server/api.php @@ -1072,6 +1072,153 @@ function get_covidcast_meta() { return $has_values ? $epidata : null; } +// queries the `covid_hosp` table +// $states (required): array of state abbreviations +// $dates (required): array of date values/ranges +// $issues (optional): array of date values/ranges +// default: most recent issue +function get_covid_hosp($states, $dates, $issues) { + $epidata = array(); + $table = '`covid_hosp` c'; + $fields = implode(', ', array( + 'c.`issue`', + 'c.`state`', + 'c.`date`', + 'c.`hospital_onset_covid`', + 'c.`hospital_onset_covid_coverage`', + 'c.`inpatient_beds`', + 'c.`inpatient_beds_coverage`', + 'c.`inpatient_beds_used`', + 'c.`inpatient_beds_used_coverage`', + 'c.`inpatient_beds_used_covid`', + 'c.`inpatient_beds_used_covid_coverage`', + 'c.`previous_day_admission_adult_covid_confirmed`', + 'c.`previous_day_admission_adult_covid_confirmed_coverage`', + 'c.`previous_day_admission_adult_covid_suspected`', + 'c.`previous_day_admission_adult_covid_suspected_coverage`', + 'c.`previous_day_admission_pediatric_covid_confirmed`', + 'c.`previous_day_admission_pediatric_covid_confirmed_coverage`', + 'c.`previous_day_admission_pediatric_covid_suspected`', + 'c.`previous_day_admission_pediatric_covid_suspected_coverage`', + 'c.`staffed_adult_icu_bed_occupancy`', + 'c.`staffed_adult_icu_bed_occupancy_coverage`', + 'c.`staffed_icu_adult_patients_confirmed_suspected_covid`', + 'c.`staffed_icu_adult_patients_confirmed_suspected_covid_coverage`', + 'c.`staffed_icu_adult_patients_confirmed_covid`', + 'c.`staffed_icu_adult_patients_confirmed_covid_coverage`', + 'c.`total_adult_patients_hosp_confirmed_suspected_covid`', + 'c.`total_adult_patients_hosp_confirmed_suspected_covid_coverage`', + 'c.`total_adult_patients_hosp_confirmed_covid`', + 'c.`total_adult_patients_hosp_confirmed_covid_coverage`', + 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid`', + 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid_coverage`', + 'c.`total_pediatric_patients_hosp_confirmed_covid`', + 'c.`total_pediatric_patients_hosp_confirmed_covid_coverage`', + 'c.`total_staffed_adult_icu_beds`', + 'c.`total_staffed_adult_icu_beds_coverage`', + 'c.`inpatient_beds_utilization`', + 'c.`inpatient_beds_utilization_coverage`', + 'c.`inpatient_beds_utilization_numerator`', + 'c.`inpatient_beds_utilization_denominator`', + 'c.`percent_of_inpatients_with_covid`', + 'c.`percent_of_inpatients_with_covid_coverage`', + 'c.`percent_of_inpatients_with_covid_numerator`', + 'c.`percent_of_inpatients_with_covid_denominator`', + 'c.`inpatient_bed_covid_utilization`', + 'c.`inpatient_bed_covid_utilization_coverage`', + 'c.`inpatient_bed_covid_utilization_numerator`', + 'c.`inpatient_bed_covid_utilization_denominator`', + 'c.`adult_icu_bed_covid_utilization`', + 'c.`adult_icu_bed_covid_utilization_coverage`', + 'c.`adult_icu_bed_covid_utilization_numerator`', + 'c.`adult_icu_bed_covid_utilization_denominator`', + 'c.`adult_icu_bed_utilization`', + 'c.`adult_icu_bed_utilization_coverage`', + 'c.`adult_icu_bed_utilization_numerator`', + 'c.`adult_icu_bed_utilization_denominator`', + )); + // basic query info + $order = "c.`date` ASC, c.`state` ASC, c.`issue` ASC"; + // build the date filter + $condition_date = filter_integers('c.`date`', $dates); + // build the state filter + $condition_state = filter_strings('c.`state`', $states); + if($issues !== null) { + // build the issue filter + $condition_issue = filter_integers('c.`issue`', $issues); + // final query using specific issues + $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_date}) AND ({$condition_state}) AND ({$condition_issue}) ORDER BY {$order}"; + } else { + // final query using most recent issues + $subquery = "(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {$table} WHERE ({$condition_date}) AND ({$condition_state}) GROUP BY `date`, `state`) x"; + $condition = "x.`max_issue` = c.`issue` AND x.`date` = c.`date` AND x.`state` = c.`state`"; + $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}"; + } + // get the data from the database + $fields_string = array('state'); + $fields_int = array( + 'issue', + 'date', + 'hospital_onset_covid', + 'hospital_onset_covid_coverage', + 'inpatient_beds', + 'inpatient_beds_coverage', + 'inpatient_beds_used', + 'inpatient_beds_used_coverage', + 'inpatient_beds_used_covid', + 'inpatient_beds_used_covid_coverage', + 'previous_day_admission_adult_covid_confirmed', + 'previous_day_admission_adult_covid_confirmed_coverage', + 'previous_day_admission_adult_covid_suspected', + 'previous_day_admission_adult_covid_suspected_coverage', + 'previous_day_admission_pediatric_covid_confirmed', + 'previous_day_admission_pediatric_covid_confirmed_coverage', + 'previous_day_admission_pediatric_covid_suspected', + 'previous_day_admission_pediatric_covid_suspected_coverage', + 'staffed_adult_icu_bed_occupancy', + 'staffed_adult_icu_bed_occupancy_coverage', + 'staffed_icu_adult_patients_confirmed_suspected_covid', + 'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', + 'staffed_icu_adult_patients_confirmed_covid', + 'staffed_icu_adult_patients_confirmed_covid_coverage', + 'total_adult_patients_hosp_confirmed_suspected_covid', + 'total_adult_patients_hosp_confirmed_suspected_covid_coverage', + 'total_adult_patients_hosp_confirmed_covid', + 'total_adult_patients_hosp_confirmed_covid_coverage', + 'total_pediatric_patients_hosp_confirmed_suspected_covid', + 'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', + 'total_pediatric_patients_hosp_confirmed_covid', + 'total_pediatric_patients_hosp_confirmed_covid_coverage', + 'total_staffed_adult_icu_beds', + 'total_staffed_adult_icu_beds_coverage', + 'inpatient_beds_utilization_coverage', + 'inpatient_beds_utilization_numerator', + 'inpatient_beds_utilization_denominator', + 'percent_of_inpatients_with_covid_coverage', + 'percent_of_inpatients_with_covid_numerator', + 'percent_of_inpatients_with_covid_denominator', + 'inpatient_bed_covid_utilization_coverage', + 'inpatient_bed_covid_utilization_numerator', + 'inpatient_bed_covid_utilization_denominator', + 'adult_icu_bed_covid_utilization_coverage', + 'adult_icu_bed_covid_utilization_numerator', + 'adult_icu_bed_covid_utilization_denominator', + 'adult_icu_bed_utilization_coverage', + 'adult_icu_bed_utilization_numerator', + 'adult_icu_bed_utilization_denominator', + ); + $fields_float = array( + 'inpatient_beds_utilization', + 'percent_of_inpatients_with_covid', + 'inpatient_bed_covid_utilization', + 'adult_icu_bed_covid_utilization', + 'adult_icu_bed_utilization', + ); + execute_query($query, $epidata, $fields_string, $fields_int, $fields_float); + // return the data + return count($epidata) === 0 ? null : $epidata; +} + // queries a bunch of epidata tables function get_meta() { // query and return metadata @@ -1562,6 +1709,16 @@ function meta_delphi() { // get the metadata $epidata = get_covidcast_meta(); store_result($data, $epidata); + } else if($source === 'covid_hosp') { + if(require_all($data, array('states', 'dates'))) { + // parse the request + $states = extract_values($_REQUEST['states'], 'str'); + $dates = extract_values($_REQUEST['dates'], 'int'); + $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null; + // get the data + $epidata = get_covid_hosp($states, $dates, $issues); + store_result($data, $epidata); + } } else { $data['message'] = 'no data source specified'; } diff --git a/testdata/acquisition/covid_hosp/dataset.csv b/testdata/acquisition/covid_hosp/dataset.csv new file mode 100644 index 000000000..1e3806f3b --- /dev/null +++ b/testdata/acquisition/covid_hosp/dataset.csv @@ -0,0 +1,21 @@ +state,date,hospital_onset_covid,hospital_onset_covid_coverage,inpatient_beds,inpatient_beds_coverage,inpatient_beds_used,inpatient_beds_used_coverage,inpatient_beds_used_covid,inpatient_beds_used_covid_coverage,previous_day_admission_adult_covid_confirmed,previous_day_admission_adult_covid_confirmed_coverage,previous_day_admission_adult_covid_suspected,previous_day_admission_adult_covid_suspected_coverage,previous_day_admission_pediatric_covid_confirmed,previous_day_admission_pediatric_covid_confirmed_coverage,previous_day_admission_pediatric_covid_suspected,previous_day_admission_pediatric_covid_suspected_coverage,staffed_adult_icu_bed_occupancy,staffed_adult_icu_bed_occupancy_coverage,staffed_icu_adult_patients_confirmed_and_suspected_covid,staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage,staffed_icu_adult_patients_confirmed_covid,staffed_icu_adult_patients_confirmed_covid_coverage,total_adult_patients_hospitalized_confirmed_and_suspected_covid,total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage,total_adult_patients_hospitalized_confirmed_covid,total_adult_patients_hospitalized_confirmed_covid_coverage,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage,total_pediatric_patients_hospitalized_confirmed_covid,total_pediatric_patients_hospitalized_confirmed_covid_coverage,total_staffed_adult_icu_beds,total_staffed_adult_icu_beds_coverage,inpatient_beds_utilization,inpatient_beds_utilization_coverage,inpatient_beds_utilization_numerator,inpatient_beds_utilization_denominator,percent_of_inpatients_with_covid,percent_of_inpatients_with_covid_coverage,percent_of_inpatients_with_covid_numerator,percent_of_inpatients_with_covid_denominator,inpatient_bed_covid_utilization,inpatient_bed_covid_utilization_coverage,inpatient_bed_covid_utilization_numerator,inpatient_bed_covid_utilization_denominator,adult_icu_bed_covid_utilization,adult_icu_bed_covid_utilization_coverage,adult_icu_bed_covid_utilization_numerator,adult_icu_bed_covid_utilization_denominator,adult_icu_bed_utilization,adult_icu_bed_utilization_coverage,adult_icu_bed_utilization_numerator,adult_icu_bed_utilization_denominator +MI,2020-05-30,3,117,21064,128,12758,123,1011,127,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.6130706391158097,123,12758,20810,0.0786957203323405,123,1004,12758,0.048053614715528305,127,1011,21039,,,,,,,, +NV,2020-10-20,13,60,8497,61,6401,62,613,59,53,62,99,60,0,59,3,59,630,60,138,57,117,59,566,58,448,60,3,58,7,58,859,60,0.7444980581381664,61,6326,8497,0.09753381066030231,59,613,6285,0.06847996148754362,58,569,8309,0.17424242424242425,57,138,792,0.7334109429569267,60,630,859 +FL,2020-04-21,51,133,32614,133,15878,129,3653,268,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.5024796032634778,127,15705,31255,0.1568837385061091,129,2491,15878,0.07588765560802109,133,2475,32614,,,,,,,, +NV,2020-04-12,0,22,3255,22,1804,19,738,61,,0,,0,,0,,0,,0,,0,,0,405,40,214,40,,0,,0,,0,0.6220689655172413,19,1804,2900,0.17239467849223947,19,311,1804,0.10455259026687598,21,333,3185,,,,,,,, +ME,2020-05-05,0,29,3045,35,1629,35,67,29,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.5355830270631473,34,1603,2993,0.043676662320730114,28,67,1534,0.02292768959435626,28,65,2835,,,,,,,, +IN,2020-04-28,2,83,8731,74,4582,84,788,87,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.4827834433113377,71,4024,8335,0.16477520733304235,84,755,4582,0.08704615737028977,74,760,8731,,,,,,,, +KY,2020-08-06,4,102,12879,103,8856,106,760,106,205,100,587,100,2,100,10,100,1068,99,726,100,140,99,731,100,382,100,29,100,2,100,1570,100,0.6816523021973756,103,8779,12879,0.08581752484191509,106,760,8856,0.05901079276341331,103,760,12879,0.4671814671814672,99,726,1554,0.6802547770700637,98,1068,1570 +MD,2020-05-13,0,19,8778,58,7207,57,2106,58,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.8305866082747493,57,7207,8677,0.2898570833911475,57,2089,7207,0.23991797676008203,58,2106,8778,,,,,,,, +UT,2020-04-23,0,40,3841,40,2158,48,225,49,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.45411013567438147,39,1707,3759,0.10194624652455977,48,220,2158,0.04946628482166102,40,190,3841,,,,,,,, +PR,2020-04-04,1,8,1680,8,537,8,39,8,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.3196428571428571,8,537,1680,0.07262569832402235,8,39,537,0.023214285714285715,8,39,1680,,,,,,,, +GA,2020-04-07,1,64,7119,65,3991,65,860,64,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.5606124455681978,65,3991,7119,0.21635220125786164,64,860,3975,0.12122920778122356,64,860,7094,,,,,,,, +VT,2020-08-12,1,14,566,12,399,12,9,15,1,8,5,9,0,7,0,7,27,7,2,9,1,9,9,11,2,10,0,11,0,10,48,6,0.7049469964664311,12,399,566,0.020050125313283207,12,8,399,0.014134275618374558,12,8,566,0.020833333333333332,6,1,48,0.5625,6,27,48 +AZ,2020-05-03,2,67,10054,67,6448,81,857,67,27,17,,0,,0,,0,,0,,0,,0,,0,97,22,,0,,0,,0,0.6039379880049791,58,5337,8837,0.1516019330588867,64,847,5587,0.07699028065960467,61,705,9157,,,,,,,, +UT,2020-08-26,3,53,5468,53,3254,54,162,53,25,54,14,53,4,43,0,41,329,45,42,43,44,44,148,49,132,49,5,49,5,48,556,42,0.581016825164594,53,3177,5468,0.050577583515454265,53,162,3203,0.029193008553365565,52,157,5378,0.07806691449814127,40,42,538,0.5845323741007195,41,325,556 +AR,2020-06-03,11,76,8873,85,5284,82,254,83,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.615034965034965,81,5277,8580,0.047330556607345704,81,250,5282,0.028863636363636362,82,254,8800,,,,,,,, +NH,2020-06-22,2,15,2957,27,1794,26,58,24,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.611869031377899,26,1794,2932,0.036318096430807766,23,58,1597,0.02279874213836478,24,58,2544,,,,,,,, +MN,2020-01-12,0,1,46,1,,0,0,1,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,,,,,,,,,,,,,,,,,,, +ME,2020-10-29,0,38,3251,39,2282,39,38,38,8,39,24,38,0,38,2,38,235,39,13,38,7,39,38,38,21,39,0,38,0,38,330,39,0.7019378652722239,39,2282,3251,0.016888888888888887,38,38,2250,0.01181592039800995,38,38,3216,0.040625,38,13,320,0.7121212121212122,39,235,330 +OR,2020-10-14,1,59,6178,59,4780,61,252,60,15,62,94,60,0,56,5,56,517,59,55,57,39,59,243,59,160,61,9,59,2,59,703,56,0.7518614438329556,59,4645,6178,0.05354866128346791,59,252,4706,0.040605758029622234,57,244,6009,0.07555555555555556,54,51,675,0.6927453769559033,56,487,703 +MA,2020-05-10,53,84,15691,73,12427,83,3625,84,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,,0,0.697850497273019,72,10876,15585,0.2902550897239881,83,3607,12427,0.21056656682174496,73,3304,15691,,,,,,,, diff --git a/testdata/acquisition/covid_hosp/metadata.json b/testdata/acquisition/covid_hosp/metadata.json new file mode 100644 index 000000000..64d121387 --- /dev/null +++ b/testdata/acquisition/covid_hosp/metadata.json @@ -0,0 +1,67 @@ +{ + "help": "Return the metadata of a dataset (package) and its resources. :param id: the id or name of the dataset :type id: string", + "success": true, + "result": [ + { + "id": "83b4a668-9321-4d8c-bc4f-2bef66c49050", + "name": "covid-19-reported-patient-impact-and-hospital-capacity-state-timeseries", + "title": "COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries", + "maintainer": "HealthData.gov", + "maintainer_email": "HealthData@hhs.gov", + "license_title": "https://opendatacommons.org/licenses/odbl/1.0/", + "notes": "

The following dataset provides state-aggregated data for hospital utilization in a timeseries format dating back to January 1, 2020. These are derived from reports with facility-level granularity across three main sources: (1) HHS TeleTracking, (2) reporting provided directly to HHS Protect by state/territorial health departments on behalf of their healthcare facilities and (3) National Healthcare Safety Network (before July 15).

\n

The file will be updated regularly and provides the latest values reported by each facility within the last four days for all time. This allows for a more comprehensive picture of the hospital utilization within a state by ensuring a hospital is represented, even if they miss a single day of reporting.

\n

No statistical analysis is applied to account for non-response and/or to account for missing data.

\n

The below table displays one value for each field (i.e., column). Sometimes, reports for a given facility will be provided to more than one reporting source: HHS TeleTracking, NHSN, and HHS Protect. When this occurs, to ensure that there are not duplicate reports, prioritization is applied to the numbers for each facility.

\n

COVID-19 Reported Patient Impact and Hospital Capacity by State Data Dictionary

\n", + "url": "https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-state-timeseries", + "state": "Active", + "log_message": "Update to resource COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries", + "private": true, + "revision_timestamp": "Mon, 11/16/2020 - 00:55", + "metadata_created": "Tue, 11/03/2020 - 19:36", + "metadata_modified": "Mon, 11/16/2020 - 00:55", + "creator_user_id": "4be8b03b-ca18-4c6c-92fe-93fb680f6983", + "type": "Dataset", + "resources": [ + { + "id": "3ce11e2c-05b4-4480-9a11-bc9e9ea769f9", + "revision_id": "", + "url": "https://healthdata.gov/sites/default/files/reported_hospital_utilization_timeseries_20201115_2134.csv", + "description": "

Updated 11/16/2020 12:55 AM

\n", + "format": "csv", + "state": "Active", + "revision_timestamp": "Mon, 11/16/2020 - 00:55", + "name": "COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries", + "mimetype": "text/csv", + "size": "3.01 MB", + "created": "Tue, 11/03/2020 - 19:38", + "resource_group_id": "af7749f7-beeb-453f-97e7-c958adeb40dc", + "last_modified": "Date changed Mon, 11/16/2020 - 00:55" + } + ], + "tags": [ + { + "id": "10b1ca03-8c76-4a95-91e6-86d31816f575", + "vocabulary_id": "6", + "name": "Covid-19" + }, + { + "id": "00a48b5c-b18a-4ac2-864b-160d453e4f1c", + "vocabulary_id": "6", + "name": "Covid" + }, + { + "id": "56f3cdad-8acb-46c8-bc71-aa1ded8407fb", + "vocabulary_id": "6", + "name": "Covid19" + } + ], + "groups": [ + { + "description": "

The United States Department of Health and Human Services (HHS), also known as the Health Department, is a cabinet-level department of the U.S. federal government with the goal of protecting the health of all Americans and providing essential human services. The official HHS motto is \"Improving the health, safety, and well-being of America.\"

\n", + "id": "af7749f7-beeb-453f-97e7-c958adeb40dc", + "image_display_url": "https://healthdata.gov/sites/default/files/US-DeptOfHHS-Seal1_0.jpg", + "title": "Department of Health & Human Services", + "name": "agencies/department-health-human-services" + } + ] + } + ] +} diff --git a/tests/acquisition/covid_hosp/test_database.py b/tests/acquisition/covid_hosp/test_database.py new file mode 100644 index 000000000..41a104b0e --- /dev/null +++ b/tests/acquisition/covid_hosp/test_database.py @@ -0,0 +1,145 @@ +"""Unit tests for database.py.""" + +# standard library +from pathlib import Path +import unittest +from unittest.mock import MagicMock +from unittest.mock import sentinel + +# first party +from delphi.epidata.acquisition.covid_hosp.test_utils import TestUtils + +# py3tester coverage target +__test_target__ = 'delphi.epidata.acquisition.covid_hosp.database' + + +class DatabaseTests(unittest.TestCase): + + def setUp(self): + """Perform per-test setup.""" + + # configure test data + path_to_repo_root = Path(__file__).parent.parent.parent.parent + self.test_utils = TestUtils(path_to_repo_root) + + def test_commit_and_close_on_success(self): + """Commit and close the connection after success.""" + + mock_connector = MagicMock() + + with Database.connect(mysql_connector_impl=mock_connector) as database: + connection = database.connection + + mock_connector.connect.assert_called_once() + connection.commit.assert_called_once() + connection.close.assert_called_once() + + def test_rollback_and_close_on_failure(self): + """Rollback and close the connection after failure.""" + + mock_connector = MagicMock() + + try: + with Database.connect(mysql_connector_impl=mock_connector) as database: + connection = database.connection + raise Exception('intentional test of exception handling') + except Exception: + pass + + mock_connector.connect.assert_called_once() + connection.commit.assert_not_called() + connection.close.assert_called_once() + + def test_new_cursor_cleanup(self): + """Cursors are unconditionally closed.""" + + mock_connection = MagicMock() + mock_cursor = mock_connection.cursor() + database = Database(mock_connection) + + try: + with database.new_cursor() as cursor: + raise Exception('intentional test of exception handling') + except Exception: + pass + + mock_cursor.close.assert_called_once() + + def test_contains_revision(self): + """Check whether a revision is already in the database.""" + + # Note that query logic is tested separately by integration tests. This + # test just checks that the function maps inputs to outputs as expected. + + mock_connection = MagicMock() + mock_cursor = mock_connection.cursor() + database = Database(mock_connection) + + with self.subTest(name='new revision'): + mock_cursor.__iter__.return_value = [(0,)] + + result = database.contains_revision(sentinel.revision) + + # compare with boolean literal to test the type cast + self.assertIs(result, False) + query_values = mock_cursor.execute.call_args[0][-1] + self.assertEqual(query_values, (sentinel.revision,)) + + with self.subTest(name='old revision'): + mock_cursor.__iter__.return_value = [(1,)] + + result = database.contains_revision(sentinel.revision) + + # compare with boolean literal to test the type cast + self.assertIs(result, True) + query_values = mock_cursor.execute.call_args[0][-1] + self.assertEqual(query_values, (sentinel.revision,)) + + def test_insert_metadata(self): + """Add new metadata to the database.""" + + # Note that query logic is tested separately by integration tests. This + # test just checks that the function maps inputs to outputs as expected. + + mock_connection = MagicMock() + mock_cursor = mock_connection.cursor() + database = Database(mock_connection) + + result = database.insert_metadata( + sentinel.issue, sentinel.revision, sentinel.meta_json) + + self.assertIsNone(result) + query_values = mock_cursor.execute.call_args[0][-1] + self.assertEqual( + query_values, (sentinel.issue, sentinel.revision, sentinel.meta_json)) + + def test_insert_dataset(self): + """Add a new dataset to the database.""" + + # Note that query logic is tested separately by integration tests. This + # test just checks that the function maps inputs to outputs as expected. + + mock_connection = MagicMock() + mock_cursor = mock_connection.cursor() + database = Database(mock_connection) + dataset = self.test_utils.load_sample_dataset() + + result = database.insert_dataset(sentinel.issue, dataset) + + self.assertIsNone(result) + self.assertEqual(mock_cursor.execute.call_count, 20) + + last_query_values = mock_cursor.execute.call_args[0][-1] + expected_query_values = ( + 0, sentinel.issue, 'MA', '2020-05-10', 53, 84, 15691, 73, 12427, 83, + 3625, 84, None, 0, None, 0, None, 0, None, 0, None, 0, None, 0, None, + 0, None, 0, None, 0, None, 0, None, 0, None, 0, 0.697850497273019, 72, + 10876, 15585, 0.2902550897239881, 83, 3607, 12427, 0.21056656682174496, + 73, 3304, 15691, None, None, None, None, None, None, None, None) + self.assertEqual(len(last_query_values), len(expected_query_values)) + + for actual, expected in zip(last_query_values, expected_query_values): + if isinstance(expected, float): + self.assertAlmostEqual(actual, expected) + else: + self.assertEqual(actual, expected) diff --git a/tests/acquisition/covid_hosp/test_network.py b/tests/acquisition/covid_hosp/test_network.py new file mode 100644 index 000000000..0a7710c2c --- /dev/null +++ b/tests/acquisition/covid_hosp/test_network.py @@ -0,0 +1,36 @@ +"""Unit tests for network.py.""" + +# standard library +import unittest +from unittest.mock import MagicMock +from unittest.mock import sentinel + +# py3tester coverage target +__test_target__ = 'delphi.epidata.acquisition.covid_hosp.network' + + +class NetworkTests(unittest.TestCase): + + def test_fetch_metadata(self): + """Fetch metadata as JSON.""" + + mock_response = MagicMock() + mock_response.json.return_value = sentinel.metadata + mock_requests = MagicMock() + mock_requests.get.return_value = mock_response + + result = Network.fetch_metadata(requests_impl=mock_requests) + + self.assertEqual(result, sentinel.metadata) + mock_requests.get.assert_called_once_with(Network.METADATA_URL) + + def test_fetch_dataset(self): + """Fetch dataset as CSV.""" + + mock_pandas = MagicMock() + mock_pandas.read_csv.return_value = sentinel.dataset + + result = Network.fetch_dataset(sentinel.url, pandas_impl=mock_pandas) + + self.assertEqual(result, sentinel.dataset) + mock_pandas.read_csv.assert_called_once_with(sentinel.url) diff --git a/tests/acquisition/covid_hosp/test_update.py b/tests/acquisition/covid_hosp/test_update.py new file mode 100644 index 000000000..9fddba09d --- /dev/null +++ b/tests/acquisition/covid_hosp/test_update.py @@ -0,0 +1,143 @@ +"""Unit tests for update.py.""" + +# standard library +from pathlib import Path +import unittest +from unittest.mock import MagicMock + +# first party +from delphi.epidata.acquisition.covid_hosp.test_utils import TestUtils + +# py3tester coverage target +__test_target__ = 'delphi.epidata.acquisition.covid_hosp.update' + + +class UpdateTests(unittest.TestCase): + + def setUp(self): + """Perform per-test setup.""" + + # configure test data + path_to_repo_root = Path(__file__).parent.parent.parent.parent + self.test_utils = TestUtils(path_to_repo_root) + + def test_get_entry_success(self): + """Get a deeply nested field from an arbitrary object.""" + + obj = self.test_utils.load_sample_metadata() + + result = Update.get_entry(obj, 'result', 0, 'tags', 2, 'id') + + self.assertEqual(result, '56f3cdad-8acb-46c8-bc71-aa1ded8407fb') + + def test_get_entry_failure(self): + """Fail with a helpful message when a nested field doesn't exist.""" + + obj = self.test_utils.load_sample_metadata() + + with self.assertRaises(UpdateException): + Update.get_entry(obj, -1) + + def test_get_issue_from_revision(self): + """Extract an issue date from a free-form revision string.""" + + revisions = ('Tue, 11/03/2020 - 19:38', 'Mon, 11/16/2020 - 00:55', 'foo') + issues = (20201103, 20201116, None) + + for revision, issue in zip(revisions, issues): + with self.subTest(revision=revision): + + if issue: + result = Update.get_issue_from_revision(revision) + self.assertEqual(result, issue) + else: + with self.assertRaises(UpdateException): + Update.get_issue_from_revision(revision) + + def test_get_date_as_int(self): + """Convert a YYY-MM-DD date to a YYYYMMDD int.""" + + result = Update.get_date_as_int('2020-11-17') + + self.assertEqual(result, 20201117) + + def test_extract_resource_details(self): + """Extract URL and revision from metadata.""" + + with self.subTest(name='invalid success'): + metadata = self.test_utils.load_sample_metadata() + metadata['success'] = False + + with self.assertRaises(UpdateException): + Update.extract_resource_details(metadata) + + with self.subTest(name='invalid result'): + metadata = self.test_utils.load_sample_metadata() + metadata['result'] = [] + + with self.assertRaises(UpdateException): + Update.extract_resource_details(metadata) + + with self.subTest(name='invalid resource'): + metadata = self.test_utils.load_sample_metadata() + metadata['result'][0]['resources'] = [] + + with self.assertRaises(UpdateException): + Update.extract_resource_details(metadata) + + with self.subTest(name='valid'): + metadata = self.test_utils.load_sample_metadata() + + url, revision = Update.extract_resource_details(metadata) + + expected_url = ( + 'https://healthdata.gov/sites/default/files/' + 'reported_hospital_utilization_timeseries_20201115_2134.csv' + ) + self.assertEqual(url, expected_url) + self.assertEqual(revision, 'Mon, 11/16/2020 - 00:55') + + def test_run_skip_old_dataset(self): + """Don't re-acquire an old dataset.""" + + mock_network = MagicMock() + mock_network.fetch_metadata.return_value = \ + self.test_utils.load_sample_metadata() + mock_database = MagicMock() + with mock_database.connect() as mock_connection: + pass + mock_connection.contains_revision.return_value = True + + result = Update.run(database_impl=mock_database, network_impl=mock_network) + + self.assertFalse(result) + mock_network.fetch_dataset.assert_not_called() + mock_connection.insert_metadata.assert_not_called() + mock_connection.insert_dataset.assert_not_called() + + def test_run_acquire_new_dataset(self): + """Acquire a new dataset.""" + + mock_network = MagicMock() + mock_network.fetch_metadata.return_value = \ + self.test_utils.load_sample_metadata() + mock_network.fetch_dataset.return_value = \ + self.test_utils.load_sample_dataset() + mock_database = MagicMock() + with mock_database.connect() as mock_connection: + pass + mock_connection.contains_revision.return_value = False + + result = Update.run(database_impl=mock_database, network_impl=mock_network) + + self.assertTrue(result) + + mock_connection.insert_metadata.assert_called_once() + args = mock_connection.insert_metadata.call_args[0] + self.assertEqual(args[:2], (20201116, 'Mon, 11/16/2020 - 00:55')) + + mock_connection.insert_dataset.assert_called_once() + args = mock_connection.insert_dataset.call_args[0] + self.assertEqual(args[0], 20201116) + self.assertEqual(len(args[1]), 20) + self.assertEqual(args[1]['date'][19], 20200510)