diff --git a/.dockerignore b/.dockerignore index 058bb7939..820d11c43 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,7 @@ /delphi-epidata -/.mypy_cache +**/.mypy_cache /.github /docs -__pycache__ -/node_modules \ No newline at end of file +**/__pycache__ +**/.pytest_cache +**/node_modules \ No newline at end of file diff --git a/.env.example b/.env.example index 212706e08..51cac1ae4 100644 --- a/.env.example +++ b/.env.example @@ -1,18 +1,6 @@ FLASK_DEBUG=True SQLALCHEMY_DATABASE_URI=sqlite:///test.db FLASK_SECRET=abc -SECRET_TWITTER=abc -SECRET_GHT=abc -SECRET_FLUVIEW=abc -SECRET_CDC=abc -SECRET_SENSORS=abc -SECRET_SENSOR_TWTR=abc -SECRET_SENSOR_GFT=abc -SECRET_SENSOR_GHT=abc -SECRET_SENSOR_GHTJ=abc -SECRET_SENSOR_CDC=abc -SECRET_SENSOR_QUID=abc -SECRET_SENSOR_WIKI=abc -SECRET_QUIDEL=abc -SECRET_NOROSTAT=abc -SECRET_AFHSB=abc \ No newline at end of file +#API_KEY_REQUIRED_STARTING_AT=2021-07-30 +API_KEY_ADMIN_PASSWORD=abc +API_KEY_REGISTER_WEBHOOK_TOKEN=abc diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 438a805fd..1f7664b46 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -53,24 +53,31 @@ jobs: run: | docker build -t delphi_database_epidata -f ./repos/delphi/delphi-epidata/dev/docker/database/epidata/Dockerfile . docker build -t delphi_web_python -f repos/delphi/delphi-epidata/dev/docker/python/Dockerfile . + sudo docker build -t delphi_redis -f repos/delphi/delphi-epidata/dev/docker/redis/Dockerfile . cd ./repos/delphi/delphi-epidata docker build -t delphi_web_epidata -f ./devops/Dockerfile . cd ../../../ # MODULE_NAME specifies the location of the `app` variable, the actual WSGI application object to run. # see https://github.com/tiangolo/meinheld-gunicorn-docker#module_name - - name: Start services + - name: Start database and Redis services run: | docker network create --driver bridge delphi-net docker run --rm -d -p 13306:3306 --network delphi-net --name delphi_database_epidata --cap-add=sys_nice delphi_database_epidata - docker run --rm -d -p 10080:80 --env "MODULE_NAME=delphi.epidata.server.main" --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --network delphi-net --name delphi_web_epidata delphi_web_epidata - docker ps + docker run --rm -d -p 6379:6379 --network delphi-net --env "REDIS_PASSWORD=1234" --name delphi_redis delphi_redis + - run: | wget https://raw.githubusercontent.com/eficode/wait-for/master/wait-for chmod +x wait-for ./wait-for localhost:13306 -- echo 'ready' sleep 10s + + - name: Start delphi_web_epidata + run: | + docker run --rm -d -p 10080:80 --env "MODULE_NAME=delphi.epidata.server.main" --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "REDIS_HOST=delphi_redis" --env "REDIS_PASSWORD=1234" --env "API_KEY_REGISTER_WEBHOOK_TOKEN=abc" --env "API_KEY_ADMIN_PASSWORD=test_admin_password" --network delphi-net --name delphi_web_epidata delphi_web_epidata + docker ps + - name: Run Unit Tests run: | docker run --rm --network delphi-net --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" delphi_web_python python -m pytest --import-mode importlib repos/delphi/delphi-epidata/tests @@ -81,7 +88,7 @@ jobs: - name: Clean Up run: | - docker stop delphi_database_epidata delphi_web_epidata + docker stop delphi_database_epidata delphi_web_epidata delphi_redis docker network remove delphi-net build_js_client: @@ -108,7 +115,8 @@ jobs: image: needs: build # only on main and dev branch - if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' + # TODO: #1112 Remove `|| github.ref == 'refs/heads/api-keys'` after transition to production status. + if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/api-keys' runs-on: ubuntu-latest steps: diff --git a/dev/docker/python/Dockerfile b/dev/docker/python/Dockerfile index ffce16b0f..1836e4a5b 100644 --- a/dev/docker/python/Dockerfile +++ b/dev/docker/python/Dockerfile @@ -1,5 +1,7 @@ FROM python:3.8-buster +RUN apt-get update && apt-get install -y r-base && Rscript -e "install.packages(c('httr','xml2'))" + WORKDIR /usr/src/app COPY repos repos diff --git a/dev/docker/redis/Dockerfile b/dev/docker/redis/Dockerfile new file mode 100644 index 000000000..3470ed39f --- /dev/null +++ b/dev/docker/redis/Dockerfile @@ -0,0 +1,5 @@ +FROM redis + +ENV REDIS_PASSWORD=$REDIS_PASSWORD + +CMD ["sh", "-c", "exec redis-server --requirepass \"$REDIS_PASSWORD\""] \ No newline at end of file diff --git a/dev/local/Makefile b/dev/local/Makefile index 55210d790..e7e896aa6 100644 --- a/dev/local/Makefile +++ b/dev/local/Makefile @@ -64,8 +64,10 @@ CWD:=$(dir $(abspath $(lastword $(MAKEFILE_LIST)))) NOW:=$(shell date "+%Y-%m-%d") LOG_WEB:=delphi_web_epidata_$(NOW).log LOG_DB:=delphi_database_epidata_$(NOW).log +LOG_REDIS:=delphi_redis_instance_$(NOW).log WEB_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_web_epidata') DATABASE_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_database_epidata') +REDIS_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_redis') M1= ifeq ($(shell uname -smp), Darwin arm64 arm) @@ -98,6 +100,10 @@ web: --env "MODULE_NAME=delphi.epidata.server.main" \ --env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \ --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "LOG_DEBUG" \ + --env "REDIS_HOST=delphi_redis" \ + --env "REDIS_PASSWORD=1234" \ + --env "API_KEY_ADMIN_PASSWORD=test_admin_password" \ + --env "API_KEY_REGISTER_WEBHOOK_TOKEN=abc" \ --network delphi-net --name delphi_web_epidata \ delphi_web_epidata >$(LOG_WEB) 2>&1 & @@ -136,8 +142,25 @@ py: $(M1) \ -f repos/delphi/delphi-epidata/dev/docker/python/Dockerfile . +.PHONY=redis +redis: + @# Stop container if running + @if [ $(REDIS_CONTAINER_ID) ]; then\ + docker stop $(REDIS_CONTAINER_ID);\ + fi + + @docker build -t delphi_redis \ + $(M1) \ + -f repos/delphi/delphi-epidata/dev/docker/redis/Dockerfile . + + @docker run --rm -d -p 127.0.0.1:6379:6379 \ + $(M1) \ + --network delphi-net \ + --env "REDIS_PASSWORD=1234" \ + --name delphi_redis delphi_redis >$(LOG_REDIS) 2>&1 & + .PHONY=all -all: db web py +all: db web py redis .PHONY=test test: @@ -149,6 +172,17 @@ test: --env "FLASK_SECRET=abc" \ delphi_web_python python -m pytest --import-mode importlib $(pdb) $(test) | tee test_output_$(NOW).log +.PHONY=r-test +r-test: + @docker run -i --rm --network delphi-net \ + $(M1) \ + --mount type=bind,source=$(CWD)repos/delphi/delphi-epidata,target=/usr/src/app/repos/delphi/delphi-epidata,readonly \ + --mount type=bind,source=$(CWD)repos/delphi/delphi-epidata/src,target=/usr/src/app/delphi/epidata,readonly \ + --env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \ + --env "FLASK_SECRET=abc" \ + delphi_web_python Rscript repos/delphi/delphi-epidata/integrations/client/test_delphi_epidata.R | tee r-test_output_$(NOW).log + + .PHONY=bash bash: @docker run -it --rm --network delphi-net \ diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg index 374009e0f..8bcff9681 100644 --- a/dev/local/setup.cfg +++ b/dev/local/setup.cfg @@ -28,6 +28,8 @@ packages = delphi.epidata.acquisition.wiki delphi.epidata.client delphi.epidata.server + delphi.epidata.server.admin + delphi.epidata.server.admin.templates delphi.epidata.server.covidcast_issues_migration delphi.epidata.server.endpoints delphi.epidata.server.endpoints.covidcast_utils diff --git a/docs/api/api_keys.md b/docs/api/api_keys.md index 4b2080324..80f74101c 100644 --- a/docs/api/api_keys.md +++ b/docs/api/api_keys.md @@ -16,11 +16,11 @@ change as we learn more about their impact: For example, a query for three signals on one date across all counties can be submitted anonymously, but a query for three signals on a period of four weeks -across all counties requires an API key. +across all counties requires an API key. An API key is a pseudonymous access token that grants privileged access to the -Epidata API. You can request an API key by -[registering with us](https://forms.gle/hkBr5SfQgxguAfEt7). +Epidata API. You can request an API key by +[registering with us](https://api.delphi.cmu.edu/epidata/admin/registration_form). Privileges of registration may include: 1. no rate limit @@ -36,7 +36,8 @@ store the information you provide us at registration time, see our ## Usage -If you choose to [register for an API key](https://forms.gle/hkBr5SfQgxguAfEt7), +If you choose to +[register for an API key](https://api.delphi.cmu.edu/epidata/admin/registration_form), there are several ways to use your key to authenticate your requests: ### Via request parameter diff --git a/docs/api/privacy_statement.md b/docs/api/privacy_statement.md index 002367931..eccaec14d 100644 --- a/docs/api/privacy_statement.md +++ b/docs/api/privacy_statement.md @@ -29,7 +29,7 @@ purposes: * to identify excessive or abnormal usage patterns which may harm our system The logs are only available to members of our operations team, and are expunged -at or before they reach five years in age. +at or before they reach five years in age. If you provide us with your email address, we will only use it to contact you in the following scenarios: @@ -47,9 +47,9 @@ security practices to help us keep your information secure. We only retrieve this mapping to resolve cases of excessive or abnormal usage. We automatically disassociate an email address from its API key when the API key has not been used for six months, or upon user request. You can request that your -email address be removed from our records by filling out a -[deletion request](https://forms.gle/GucFmZHTMgEFjH197). +email address be removed from our records by filling out a +[deletion request](https://api.delphi.cmu.edu/epidata/admin/removal_request). -For more information, see +For more information, see [Carnegie Mellon’s privacy notice](https://www.cmu.edu/legal/privacy-notice.html). Further questions can be directed to delphi-support+privacy@andrew.cmu.edu. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 1ddce45dc..931434984 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,14 +14,14 @@ group](https://delphi.cmu.edu/). The Epidata API includes: quick access to COVID data are available. - [Data about other diseases](api/README.md), including influenza, dengue, and other diseases tracked by Delphi through various data streams. - + Anyone may access the Epidata API anonymously without providing any personal data. Anonymous API access is currently rate-limited and restricted to public datasets with a maximum of two of the requested parameters having multiple selections (signals, dates, versions, regions, etc). To request access with no rate limit and unlimited multiple -selections, you can [request a registered API key](https://forms.gle/hkBr5SfQgxguAfEt7). +selections, you can [request a registered API key](https://api.delphi.cmu.edu/epidata/admin/registration_form). For policy and usage details, consult the [Epidata API keys documentation](api/api_keys.md). If you regularly or frequently use our system, please consider using an API key @@ -30,10 +30,11 @@ us understand who and how others are using our Delphi Epidata API, which may in turn inform our future research, data partnerships, and funding. For more information about how we use the data you provide us through your -registration and API request activity, see our -[Privacy Statement](api/privacy_statement.md). At any time, you may submit a -[Deletion Request](https://forms.gle/GucFmZHTMgEFjH197) to have us deactivate your key and destroy all -information associating that key with your identity. +registration and API request activity, see our +[Privacy Statement](api/privacy_statement.md). At any time, you may submit a +[Deletion Request](https://api.delphi.cmu.edu/epidata/admin/removal_request) to +have us deactivate your key and destroy all information associating that key +with your identity. The Delphi group is extremely grateful to Pedrito Maynard-Zhang for all his help with the Epidata API [documentation](api/README.md). diff --git a/integrations/acquisition/covid_hosp/facility/test_scenarios.py b/integrations/acquisition/covid_hosp/facility/test_scenarios.py index aaa3c5e3b..c6c51e2f5 100644 --- a/integrations/acquisition/covid_hosp/facility/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/facility/test_scenarios.py @@ -29,6 +29,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -40,6 +41,8 @@ def setUp(self): cur.execute('truncate table covid_hosp_facility') cur.execute('truncate table covid_hosp_facility_key') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "emai")') @freeze_time("2021-03-16") def test_acquire_dataset(self): diff --git a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py index e55bc8ca6..2054d19c8 100644 --- a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py @@ -33,6 +33,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -43,6 +44,8 @@ def setUp(self): with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values("key", "email")') @freeze_time("2021-03-16") def test_acquire_dataset(self): diff --git a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py index 5d13ccbb0..8565b8e7f 100644 --- a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py @@ -29,6 +29,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -39,6 +40,8 @@ def setUp(self): with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') @freeze_time("2021-03-17") def test_acquire_dataset(self): diff --git a/integrations/acquisition/covidcast/test_covidcast_meta_caching.py b/integrations/acquisition/covidcast/test_covidcast_meta_caching.py index 99008a0f1..e746c4ef1 100644 --- a/integrations/acquisition/covidcast/test_covidcast_meta_caching.py +++ b/integrations/acquisition/covidcast/test_covidcast_meta_caching.py @@ -60,12 +60,20 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = BASE_URL + Epidata.auth = ('epidata', 'key') def tearDown(self): """Perform per-test teardown.""" self.cur.close() self.cnx.close() + @staticmethod + def _make_request(): + params = {'endpoint': 'covidcast_meta', 'cached': 'true'} + response = requests.get(Epidata.BASE_URL, params=params, auth=Epidata.auth) + response.raise_for_status() + return response.json() + def test_caching(self): """Populate, query, cache, query, and verify the cache.""" @@ -147,10 +155,7 @@ def test_caching(self): self.cnx.commit() # fetch the cached version (manually) - params = {'endpoint': 'covidcast_meta', 'cached': 'true'} - response = requests.get(BASE_URL, params=params) - response.raise_for_status() - epidata4 = response.json() + epidata4 = self._make_request() # make sure the cache was actually served self.assertEqual(epidata4, { @@ -170,10 +175,7 @@ def test_caching(self): self.cnx.commit() # fetch the cached version (manually) - params = {'endpoint': 'covidcast_meta', 'cached': 'true'} - response = requests.get(BASE_URL, params=params) - response.raise_for_status() - epidata5 = response.json() + epidata5 = self._make_request() # make sure the cache was returned anyhow self.assertEqual(epidata4, epidata5) diff --git a/integrations/acquisition/covidcast/test_csv_uploading.py b/integrations/acquisition/covidcast/test_csv_uploading.py index 040eb5f1a..e4c9d881e 100644 --- a/integrations/acquisition/covidcast/test_csv_uploading.py +++ b/integrations/acquisition/covidcast/test_csv_uploading.py @@ -57,6 +57,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def tearDown(self): """Perform per-test teardown.""" diff --git a/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py b/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py index 9dc163a2b..1299c6144 100644 --- a/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py +++ b/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py @@ -41,6 +41,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table covidcast_nowcast') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() @@ -54,6 +56,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def tearDown(self): """Perform per-test teardown.""" diff --git a/integrations/client/test_delphi_epidata.R b/integrations/client/test_delphi_epidata.R new file mode 100644 index 000000000..3f05386f5 --- /dev/null +++ b/integrations/client/test_delphi_epidata.R @@ -0,0 +1,106 @@ +#!/usr/bin/env Rscript + +# disgusting hack to set up cwd +initial_options <- commandArgs(trailingOnly = FALSE) +file_arg_name <- "--file=" +basename <- dirname(sub(file_arg_name, "", initial_options[grep(file_arg_name, initial_options)])) +setwd(basename) + +options(epidata.url="http://delphi_web_epidata/epidata/") +source("../../src/client/delphi_epidata.R") + +out <- function(res) { + cat(paste(res$result, res$message, length(res$epidata), "\n")) +} + +check <- function(res, exp_result, exp_message, exp_length) { + stopifnot(res$result == exp_result) + stopifnot(res$message == exp_message) + stopifnot(length(res$epidata) == exp_length) +} + +call_region_epiweeks <- function(fn_name) { + fn <- Epidata[[fn_name]] + cat(paste(fn_name,"\n")) + fn(list('nat'), list(201440)) +} + +cat("server_version\n") +res <- Epidata$server_version() +stopifnot("version" %in% names(res)) +cat("fluview\n") +res <- Epidata$fluview(list('nat'), list(201440, Epidata$range(201501, 201510)), auth="test") +check(res, -2, "no results", 0) +cat("fluview_meta\n") +res <- Epidata$fluview_meta() +check(res, 1, "success", 1) +res <- call_region_epiweeks("fluview_clinical") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("flusurv") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("ecdc_ili") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("kcdc_ili") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("gft") +check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$ght("abc", list("nat"), list(201440), "cough") +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$twitter("abc", list("nat"), epiweeks=list(201440)) +#check(res, -2, "no results", 0) +cat("wiki\n") +res <- Epidata$wiki(list("abc"), list(20141201)) +check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$cdc("abc", list(201440), list("nat")) +#check(res, -2, "no results", 0 +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$quidel("abc", list(201440), list("nat")) +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$norostat("abc", "nat", list(201440)) +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$meta_norostat("abc") +#out(res) +res <- call_region_epiweeks("nidss.flu") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("nidss.dengue") +check(res, -2, "no results", 0) +cat("delphi\n") +res <- Epidata$delphi("xyz", 201440) +check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$sensors("abc", list("def"), list("nat"), list(201440)) +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$dengue_sensors("abc", list("def"), list("nat"), list(201440)) +#check(res, -2, "no results", 0) +res <- call_region_epiweeks("nowcast") +check(res, -2, "no results", 0) +## Fails with database error: +#res <- call_region_epiweeks("dengue_nowcast") +#out(res) +#check(res, -2, "no results", 0) +cat("meta\n") +res <- Epidata$meta() +check(res, 1, "success", 1) +cat("covidcast\n") +res <- Epidata$covidcast("abc", "def", "day", "nation", list(20200401), "us") +check(res, -2, "no results", 0) +cat("covidcast_meta\n") +res <- Epidata$covidcast_meta() +check(res, 1, "success", 16) +cat("covid_hosp\n") +res <- Epidata$covid_hosp("pa", 20201201) +check(res, -2, "no results", 0) +cat("covid_hosp_facility\n") +res <- Epidata$covid_hosp_facility(list("abc"), list(202050)) +check(res, -2, "no results", 0) +cat("covid_hosp_facility_lookup\n") +res <- Epidata$covid_hosp_facility_lookup("pa") +check(res, 1, "success", 1) + + diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index f5ce462f2..9698d1274 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -40,6 +40,7 @@ def localSetUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' diff --git a/integrations/client/test_nowcast.py b/integrations/client/test_nowcast.py index dc1a20794..f5124e021 100644 --- a/integrations/client/test_nowcast.py +++ b/integrations/client/test_nowcast.py @@ -28,6 +28,8 @@ def setUp(self): cur = cnx.cursor() cur.execute('truncate table covidcast_nowcast') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() @@ -38,6 +40,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' diff --git a/integrations/server/test_covid_hosp.py b/integrations/server/test_covid_hosp.py index 16538b82d..37aa77363 100644 --- a/integrations/server/test_covid_hosp.py +++ b/integrations/server/test_covid_hosp.py @@ -17,6 +17,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -27,6 +28,8 @@ def setUp(self): with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') def insert_issue(self, cur, issue, value, record_type): diff --git a/integrations/server/test_covidcast.py b/integrations/server/test_covidcast.py index 01d81bf29..73787d664 100644 --- a/integrations/server/test_covidcast.py +++ b/integrations/server/test_covidcast.py @@ -6,15 +6,12 @@ # third party import mysql.connector -import requests # first party from delphi_utils import Nans from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA from delphi.epidata.client.delphi_epidata import Epidata -# use the local instance of the Epidata API -BASE_URL = 'http://delphi_web_epidata/epidata/api.php' class CovidcastTests(CovidcastBase): """Tests the `covidcast` endpoint.""" @@ -25,7 +22,9 @@ def localSetUp(self): def request_based_on_row(self, row: CovidcastTestRow, **kwargs): params = self.params_from_row(row, endpoint='covidcast', **kwargs) - Epidata.BASE_URL = BASE_URL + # use the local instance of the Epidata API + Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') response = Epidata.covidcast(**params) return response diff --git a/integrations/server/test_covidcast_endpoints.py b/integrations/server/test_covidcast_endpoints.py index 1f7e7ade5..41b74ac03 100644 --- a/integrations/server/test_covidcast_endpoints.py +++ b/integrations/server/test_covidcast_endpoints.py @@ -15,6 +15,7 @@ # use the local instance of the Epidata API BASE_URL = "http://delphi_web_epidata/epidata/covidcast" BASE_URL_OLD = "http://delphi_web_epidata/epidata/api.php" +AUTH = ('epidata', 'key') class CovidcastEndpointTests(CovidcastBase): @@ -36,7 +37,7 @@ def _fetch(self, endpoint="/", is_compatibility=False, **params): params.setdefault("data_source", params.get("source")) else: url = f"{BASE_URL}{endpoint}" - response = requests.get(url, params=params) + response = requests.get(url, params=params, auth=AUTH) response.raise_for_status() return response.json() diff --git a/integrations/server/test_covidcast_meta.py b/integrations/server/test_covidcast_meta.py index d0aef6fe5..95a51e354 100644 --- a/integrations/server/test_covidcast_meta.py +++ b/integrations/server/test_covidcast_meta.py @@ -14,6 +14,7 @@ # use the local instance of the Epidata API BASE_URL = 'http://delphi_web_epidata/epidata/api.php' +AUTH = ('epidata', 'key') class CovidcastMetaTests(unittest.TestCase): @@ -135,6 +136,14 @@ def insert_placeholder_data(self): def _get_id(self): self.id_counter += 1 return self.id_counter + + @staticmethod + def _fetch(**kwargs): + params = kwargs.copy() + params['endpoint'] = 'covidcast_meta' + response = requests.get(BASE_URL, params=params, auth=AUTH) + response.raise_for_status() + return response.json() def test_round_trip(self): """Make a simple round-trip with some sample data.""" @@ -143,9 +152,7 @@ def test_round_trip(self): expected = self.insert_placeholder_data() # make the request - response = requests.get(BASE_URL, params={'endpoint': 'covidcast_meta'}) - response.raise_for_status() - response = response.json() + response = self._fetch() # assert that the right data came back self.assertEqual(response, { @@ -160,71 +167,63 @@ def test_filter(self): # insert placeholder data and accumulate expected results (in sort order) expected = self.insert_placeholder_data() - def fetch(**kwargs): - # make the request - params = kwargs.copy() - params['endpoint'] = 'covidcast_meta' - response = requests.get(BASE_URL, params=params) - response.raise_for_status() - return response.json() - - res = fetch() + res = self._fetch() self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), len(expected)) # time types - res = fetch(time_types='day') + res = self._fetch(time_types='day') self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['time_type'] == 'day'])) - res = fetch(time_types='day,week') + res = self._fetch(time_types='day,week') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), len(expected)) - res = fetch(time_types='sec') + res = self._fetch(time_types='sec') self.assertEqual(res['result'], -2) # geo types - res = fetch(geo_types='hrr') + res = self._fetch(geo_types='hrr') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['geo_type'] == 'hrr'])) - res = fetch(geo_types='hrr,msa') + res = self._fetch(geo_types='hrr,msa') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), len(expected)) - res = fetch(geo_types='state') + res = self._fetch(geo_types='state') self.assertEqual(res['result'], -2) # signals - res = fetch(signals='src1:sig1') + res = self._fetch(signals='src1:sig1') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1' and s['signal'] == 'sig1'])) - res = fetch(signals='src1') + res = self._fetch(signals='src1') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1'])) - res = fetch(signals='src1:*') + res = self._fetch(signals='src1:*') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1'])) - res = fetch(signals='src1:src4') + res = self._fetch(signals='src1:src4') self.assertEqual(res['result'], -2) - res = fetch(signals='src1:*,src2:*') + res = self._fetch(signals='src1:*,src2:*') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), len(expected)) # filter fields - res = fetch(fields='data_source,min_time') + res = self._fetch(fields='data_source,min_time') self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), len(expected)) self.assertTrue('data_source' in res['epidata'][0]) @@ -232,7 +231,7 @@ def fetch(**kwargs): self.assertFalse('max_time' in res['epidata'][0]) self.assertFalse('signal' in res['epidata'][0]) - res = fetch(fields='xx') + res = self._fetch(fields='xx') self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), len(expected)) self.assertEqual(res['epidata'][0], {}) diff --git a/integrations/server/test_covidcast_nowcast.py b/integrations/server/test_covidcast_nowcast.py index 7df695038..889d962dd 100644 --- a/integrations/server/test_covidcast_nowcast.py +++ b/integrations/server/test_covidcast_nowcast.py @@ -10,6 +10,7 @@ # use the local instance of the Epidata API BASE_URL = 'http://delphi_web_epidata/epidata/api.php' +AUTH = ('epidata', 'key') class CovidcastTests(unittest.TestCase): @@ -26,6 +27,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table covidcast_nowcast') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values("key", "email")') cnx.commit() cur.close() @@ -38,6 +41,12 @@ def tearDown(self): self.cur.close() self.cnx.close() + @staticmethod + def _make_request(params: dict): + response = requests.get(BASE_URL, params=params, auth=AUTH) + response.raise_for_status() + return response.json() + def test_query(self): """Query nowcasts using default and specified issue.""" @@ -49,7 +58,7 @@ def test_query(self): self.cnx.commit() # make the request with specified issue date - response = requests.get(BASE_URL, params={ + params={ 'source': 'covidcast_nowcast', 'data_source': 'src', 'signals': 'sig', @@ -59,9 +68,8 @@ def test_query(self): 'time_values': 20200101, 'geo_value': '01001', 'issues': 20200101 - }) - response.raise_for_status() - response = response.json() + } + response = self._make_request(params=params) self.assertEqual(response, { 'result': 1, 'epidata': [{ @@ -76,7 +84,7 @@ def test_query(self): }) # make request without specific issue date - response = requests.get(BASE_URL, params={ + params={ 'source': 'covidcast_nowcast', 'data_source': 'src', 'signals': 'sig', @@ -85,9 +93,8 @@ def test_query(self): 'geo_type': 'county', 'time_values': 20200101, 'geo_value': '01001', - }) - response.raise_for_status() - response = response.json() + } + response = self._make_request(params=params) self.assertEqual(response, { 'result': 1, @@ -102,7 +109,7 @@ def test_query(self): 'message': 'success', }) - response = requests.get(BASE_URL, params={ + params={ 'source': 'covidcast_nowcast', 'data_source': 'src', 'signals': 'sig', @@ -112,9 +119,8 @@ def test_query(self): 'time_values': 20200101, 'geo_value': '01001', 'as_of': 20200101 - }) - response.raise_for_status() - response = response.json() + } + response = self._make_request(params=params) self.assertEqual(response, { 'result': 1, diff --git a/integrations/server/test_fluview.py b/integrations/server/test_fluview.py index 8bfc18376..c192da637 100644 --- a/integrations/server/test_fluview.py +++ b/integrations/server/test_fluview.py @@ -19,6 +19,7 @@ def setUpClass(cls): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def setUp(self): """Perform per-test setup.""" @@ -31,6 +32,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table fluview') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() diff --git a/integrations/server/test_fluview_meta.py b/integrations/server/test_fluview_meta.py index 137e9464a..1e2cf73e3 100644 --- a/integrations/server/test_fluview_meta.py +++ b/integrations/server/test_fluview_meta.py @@ -19,6 +19,7 @@ def setUpClass(cls): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def setUp(self): """Perform per-test setup.""" @@ -31,6 +32,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table fluview') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() diff --git a/requirements.api.txt b/requirements.api.txt index 5e31804b8..c7de90997 100644 --- a/requirements.api.txt +++ b/requirements.api.txt @@ -1,6 +1,7 @@ delphi_utils==0.3.15 epiweeks==2.1.2 Flask==2.2.2 +Flask-Limiter==3.3.0 itsdangerous<2.1 jinja2==3.0.3 more_itertools==8.4.0 @@ -9,6 +10,9 @@ newrelic orjson==3.4.7 pandas==1.2.3 python-dotenv==0.15.0 +pyyaml +redis==3.5.3 +requests==2.28.1 scipy==1.6.2 SQLAlchemy==1.4.40 structlog==22.1.0 diff --git a/requirements.dev.txt b/requirements.dev.txt index 0bd175eeb..6b7b6b5db 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -16,10 +16,8 @@ pycountry==22.3.5 pymysql==1.0.2 pytest==7.2.0 pytest-check==1.3.0 -requests==2.28.1 sas7bdat==2.2.3 selenium==4.7.2 sqlalchemy-stubs>=0.3 -structlog==22.1.0 tenacity==7.0.0 xlrd==2.0.1 diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 4f72b5f91..8c179fffd 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -13,10 +13,14 @@ library(httr) Epidata <- (function() { # API base url - BASE_URL <- 'https://api.delphi.cmu.edu/epidata/api.php' + BASE_URL <- getOption('epidata.url', default = 'https://api.delphi.cmu.edu/epidata/') client_version <- '0.4.12' + auth <- getOption("epidata.auth", default = NA) + + client_user_agent <- user_agent(paste("delphi_epidata/", client_version, " (R)", sep="")) + # Helper function to cast values and/or ranges to strings .listitem <- function(value) { if(is.list(value) && 'from' %in% names(value) && 'to' %in% names(value)) { @@ -36,10 +40,31 @@ Epidata <- (function() { # Helper function to request and parse epidata .request <- function(params) { + headers <- add_headers(Authorization = ifelse(is.na(auth), "", paste("Bearer", auth))) + url <- paste(BASE_URL, params$endpoint, sep="") + params <- within(params, rm(endpoint)) # API call - res <- GET(BASE_URL, query=params) + res <- GET(url, + client_user_agent, + headers, + query=params) if (res$status_code == 414) { - res <- POST(BASE_URL, body=params, encode='form') + res <- POST(url, + client_user_agent, + headers, + body=params, encode='form') + } + if (res$status_code != 200) { + # 500, 429, 401 are possible + msg <- "fetch data from API" + if (http_type(res) == "text/html") { + # grab the error information out of the returned HTML document + msg <- paste(msg, ":", xml2::xml_text(xml2::xml_find_all( + xml2::read_html(content(res, 'text')), + "//p" + ))) + } + stop_for_status(res, task = msg) } return(content(res, 'parsed')) } @@ -563,7 +588,7 @@ Epidata <- (function() { } # Set up request params <- list( - endpoint = 'covid_hosp', + endpoint = 'covid_hosp_state_timeseries', states = .list(states), dates = .list(dates) ) @@ -582,7 +607,7 @@ Epidata <- (function() { } # Set up request params <- list( - source = 'covid_hosp_facility', + endpoint = 'covid_hosp_facility', hospital_pks = .list(hospital_pks), collection_weeks = .list(collection_weeks) ) @@ -596,7 +621,7 @@ Epidata <- (function() { # Lookup COVID hospitalization facility identifiers covid_hosp_facility_lookup <- function(state, ccn, city, zip, fips_code) { # Set up request - params <- list(source = 'covid_hosp_facility_lookup') + params <- list(endpoint = 'covid_hosp_facility_lookup') if(!missing(state)) { params$state <- state } else if(!missing(ccn)) { @@ -614,14 +639,21 @@ Epidata <- (function() { return(.request(params)) } + server_version <- function() { + return(.request(list(endpoint = 'version'))) + } + # Export the public methods return(list( range = range, client_version = client_version, + server_version = server_version, fluview = fluview, fluview_meta = fluview_meta, fluview_clinical = fluview_clinical, flusurv = flusurv, + ecdc_ili = ecdc_ili, + kcdc_ili = kcdc_ili, gft = gft, ght = ght, twitter = twitter, diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index 61b5e41eb..a56527357 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -13,7 +13,7 @@ import asyncio from tenacity import retry, stop_after_attempt -from aiohttp import ClientSession, TCPConnector +from aiohttp import ClientSession, TCPConnector, BasicAuth from pkg_resources import get_distribution, DistributionNotFound # Obtain package version for the user-agent. Uses the installed version by @@ -25,7 +25,7 @@ _version = "0.script" _HEADERS = { - "user-agent": "delphi_epidata/" + _version + "user-agent": "delphi_epidata/" + _version + " (Python)" } # Because the API is stateless, the Epidata class only contains static methods @@ -34,6 +34,7 @@ class Epidata: # API base url BASE_URL = 'https://api.delphi.cmu.edu/epidata/api.php' + auth = None client_version = _version @@ -58,9 +59,11 @@ def _list(values): @retry(reraise=True, stop=stop_after_attempt(2)) def _request_with_retry(params): """Make request with a retry if an exception is thrown.""" - req = requests.get(Epidata.BASE_URL, params, headers=_HEADERS) + req = requests.get(Epidata.BASE_URL, params, auth=Epidata.auth, headers=_HEADERS) if req.status_code == 414: - req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) + req = requests.post(Epidata.BASE_URL, params, auth=Epidata.auth, headers=_HEADERS) + # handle 401 and 429 + req.raise_for_status() return req @staticmethod @@ -73,12 +76,15 @@ def _request(params): """ try: result = Epidata._request_with_retry(params) - if params is not None and "format" in params and params["format"]=="csv": - return result.text - else: - return result.json() except Exception as e: return {'result': 0, 'message': 'error: ' + str(e)} + if params is not None and "format" in params and params["format"]=="csv": + return result.text + else: + try: + return result.json() + except requests.exceptions.JSONDecodeError: + return {'result': 0, 'message': 'error decoding json: ' + result.text} # Raise an Exception on error, otherwise return epidata @staticmethod @@ -736,7 +742,11 @@ async def async_make_calls(param_combos): """Helper function to asynchronously make and aggregate Epidata GET requests.""" tasks = [] connector = TCPConnector(limit=batch_size) - async with ClientSession(connector=connector, headers=_HEADERS) as session: + if isinstance(Epidata.auth, tuple): + auth = BasicAuth(login=Epidata.auth[0], password=Epidata.auth[1], encoding='utf-8') + else: + auth = Epidata.auth + async with ClientSession(connector=connector, headers=_HEADERS, auth=auth) as session: for param in param_combos: task = asyncio.ensure_future(async_get(param, session)) tasks.append(task) diff --git a/src/ddl/api_analytics.sql b/src/ddl/api_analytics.sql deleted file mode 100644 index 7b8aa0279..000000000 --- a/src/ddl/api_analytics.sql +++ /dev/null @@ -1,32 +0,0 @@ -USE epidata; -/* -`api_analytics` logs API usage, which Delphi uses to improve the API. - -This data is private to Delphi. - -+----------+---------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+----------+---------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| datetime | datetime | NO | MUL | NULL | | -| ip | varchar(15) | NO | MUL | NULL | | -| ua | varchar(1024) | NO | | NULL | | -| source | varchar(32) | NO | MUL | NULL | | -| result | int(11) | NO | | NULL | | -| num_rows | int(11) | NO | | NULL | | -+----------+---------------+------+-----+---------+----------------+ -*/ - -CREATE TABLE `api_analytics` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `datetime` datetime NOT NULL, - `ip` varchar(15) NOT NULL, - `ua` varchar(1024) NOT NULL, - `source` varchar(32) NOT NULL, - `result` int(11) NOT NULL, - `num_rows` int(11) NOT NULL, - PRIMARY KEY (`id`), - KEY `datetime` (`datetime`), - KEY `ip` (`ip`), - KEY `source` (`source`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/src/ddl/api_user.sql b/src/ddl/api_user.sql new file mode 100644 index 000000000..90e56539f --- /dev/null +++ b/src/ddl/api_user.sql @@ -0,0 +1,31 @@ +USE epidata; + + +-- `api_user` API key and user management + +CREATE TABLE IF NOT EXISTS `api_user` ( + `id` int(11) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + `api_key` varchar(50) UNIQUE NOT NULL, + `email` varchar(320) UNIQUE NOT NULL, + `created` date, + `last_time_used` date, + UNIQUE KEY `api_user` (`api_key`, `email`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + + +-- `user_role` User roles + +CREATE TABLE IF NOT EXISTS `user_role` ( + `id` int(11) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + `name` varchar(50) NOT NULL, + UNIQUE KEY `name` (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + + +-- `user_role_link` User roles link table + +CREATE TABLE IF NOT EXISTS `user_role_link` ( + `user_id` int(11) UNSIGNED NOT NULL, + `role_id` int(11) UNSIGNED NOT NULL, + PRIMARY KEY (`user_id`, `role_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/src/server/_common.py b/src/server/_common.py index 2d2d3059f..6c424326e 100644 --- a/src/server/_common.py +++ b/src/server/_common.py @@ -2,15 +2,17 @@ import time from flask import Flask, g, request -from sqlalchemy import create_engine, event -from sqlalchemy.engine import Connection, Engine +from sqlalchemy import event +from sqlalchemy.engine import Connection +from werkzeug.exceptions import Unauthorized from werkzeug.local import LocalProxy from delphi.epidata.common.logger import get_structured_logger -from ._config import SECRET, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_ENGINE_OPTIONS +from ._config import SECRET, REVERSE_PROXIED +from ._db import engine from ._exceptions import DatabaseErrorException, EpiDataException +from ._security import current_user, _is_public_route, resolve_auth_token, show_no_api_key_warning, update_key_last_time_used, ERROR_MSG_INVALID_KEY -engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI, **SQLALCHEMY_ENGINE_OPTIONS) app = Flask("EpiData", static_url_path="") app.config["SECRET"] = SECRET @@ -22,12 +24,52 @@ def _get_db() -> Connection: g.db = conn return g.db - """ access to the SQL Alchemy connection for this request """ db: Connection = cast(Connection, LocalProxy(_get_db)) + +def get_real_ip_addr(req): # `req` should be a Flask.request object + if REVERSE_PROXIED: + # NOTE: ONLY trust these headers if reverse proxied!!! + if "X-Forwarded-For" in req.headers: + return req.headers["X-Forwarded-For"].split(",")[ + 0 + ] # take the first (or only) address from the comma-sep list + if "X-Real-Ip" in req.headers: + return req.headers["X-Real-Ip"] + return req.remote_addr + + +def log_info_with_request(message, **kwargs): + # TODO: make log level an option and check for key conflicts in kwargs + get_structured_logger("server_api").info( + message, + method=request.method, + url=request.url, + form_args=request.form, + req_length=request.content_length, + remote_addr=request.remote_addr, + real_remote_addr=get_real_ip_addr(request), + user_agent=request.user_agent.string, + api_key=resolve_auth_token(), + user_id=(current_user and current_user.id), + **kwargs + ) + +def log_info_with_request_and_response(message, response, **kwargs): + # TODO: make log level an option and check for key conflicts in kwargs + log_info_with_request( + message, + values=request.values.to_dict(flat=False), + blueprint=request.blueprint, + endpoint=request.endpoint, + response_status=response.status, + content_length=response.calculate_content_length(), + **kwargs + ) + @event.listens_for(engine, "before_cursor_execute") def before_cursor_execute(conn, cursor, statement, parameters, context, executemany): context._query_start_time = time.time() @@ -43,7 +85,9 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema # Convert to milliseconds total_time *= 1000 - get_structured_logger('server_api').info("Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time) + get_structured_logger("server_api").info( + "Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time + ) @app.before_request @@ -51,16 +95,37 @@ def before_request_execute(): # Set timer for statement g._request_start_time = time.time() - # Log statement - get_structured_logger('server_api').info("Received API request", method=request.method, url=request.url, form_args=request.form, req_length=request.content_length, remote_addr=request.remote_addr, user_agent=request.user_agent.string) - - if request.path.startswith('/lib'): + user = current_user + api_key = resolve_auth_token() + + # TODO: replace this next call with: log_info_with_request("Received API request") + get_structured_logger("server_api").info( + "Received API request", + method=request.method, + url=request.url, + form_args=request.form, + req_length=request.content_length, + remote_addr=request.remote_addr, + real_remote_addr=get_real_ip_addr(request), + user_agent=request.user_agent.string, + api_key=api_key, + user_id=(user and user.id) + ) + + if not show_no_api_key_warning(): + if not _is_public_route() and api_key and not user: + # if this is a privleged endpoint, and an api key was given but it does not look up to a user, raise exception: + get_structured_logger("server_api").info("bad api key used", api_key=api_key) + raise Unauthorized(ERROR_MSG_INVALID_KEY) + + if request.path.startswith("/lib"): + # files served from 'lib' directory don't need the database, so we can exit this early... return # try to get the db try: _get_db() except Exception as e: - get_structured_logger('server_error').error('database connection error', exception=e) + get_structured_logger("server_error").error("database connection error", exception=e) raise DatabaseErrorException() @@ -69,9 +134,29 @@ def after_request_execute(response): total_time = time.time() - g._request_start_time # Convert to milliseconds total_time *= 1000 - get_structured_logger('server_api').info('Served API request', method=request.method, url=request.url, form_args=request.form, req_length=request.content_length, remote_addr=request.remote_addr, user_agent=request.user_agent.string, - values=request.values.to_dict(flat=False), blueprint=request.blueprint, endpoint=request.endpoint, - response_status=response.status, content_length=response.calculate_content_length(), elapsed_time_ms=total_time) + + api_key = resolve_auth_token() + + update_key_last_time_used(current_user) + + # TODO: replace this next call with: log_info_with_request_and_response("Served API request", response, elapsed_time_ms=total_time) + get_structured_logger("server_api").info( + "Served API request", + method=request.method, + url=request.url, + form_args=request.form, + req_length=request.content_length, + remote_addr=request.remote_addr, + real_remote_addr=get_real_ip_addr(request), + user_agent=request.user_agent.string, + api_key=api_key, + values=request.values.to_dict(flat=False), + blueprint=request.blueprint, + endpoint=request.endpoint, + response_status=response.status, + content_length=response.calculate_content_length(), + elapsed_time_ms=total_time, + ) return response @@ -88,9 +173,9 @@ def teardown_db(exception=None): def handle_exception(e): # Log error and pass through; EpiDataExceptions are HTTPExceptions which are valid WSGI responses (see https://werkzeug.palletsprojects.com/en/2.2.x/exceptions/ ) if isinstance(e, DatabaseErrorException): - get_structured_logger('server_error').error('Received DatabaseErrorException', exception=str(e), exc_info=True) + get_structured_logger("server_error").error("Received DatabaseErrorException", exception=str(e), exc_info=True) else: - get_structured_logger('server_error').warn('Encountered user-side error', exception=str(e)) + get_structured_logger("server_error").warn("Encountered user-side error", exception=str(e)) return e @@ -98,7 +183,7 @@ def is_compatibility_mode() -> bool: """ checks whether this request is in compatibility mode """ - return 'compatibility' in g and g.compatibility + return "compatibility" in g and g.compatibility def set_compatibility_mode(): diff --git a/src/server/_config.py b/src/server/_config.py index 073da564a..54433e411 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -1,5 +1,8 @@ import json import os +from datetime import date +from enum import Enum + from dotenv import load_dotenv load_dotenv() @@ -13,45 +16,23 @@ # defaults SQLALCHEMY_ENGINE_OPTIONS = { - "pool_pre_ping": True, # enable ping test for validity of recycled pool connections on connect() calls - "pool_recycle": 5 # seconds after which a recycled pool connection is considered invalid + "pool_pre_ping": True, # enable ping test for validity of recycled pool connections on connect() calls + "pool_recycle": 5, # seconds after which a recycled pool connection is considered invalid } # update with overrides of defaults or additions from external configs -SQLALCHEMY_ENGINE_OPTIONS.update( - json.loads(os.environ.get("SQLALCHEMY_ENGINE_OPTIONS", "{}"))) +SQLALCHEMY_ENGINE_OPTIONS.update(json.loads(os.environ.get("SQLALCHEMY_ENGINE_OPTIONS", "{}"))) SECRET = os.environ.get("FLASK_SECRET", "secret") -URL_PREFIX = os.environ.get("FLASK_PREFIX", "/") - -AUTH = { - "twitter": os.environ.get("SECRET_TWITTER"), - "ght": os.environ.get("SECRET_GHT"), - "fluview": os.environ.get("SECRET_FLUVIEW"), - "cdc": os.environ.get("SECRET_CDC"), - "sensors": os.environ.get("SECRET_SENSORS"), - "quidel": os.environ.get("SECRET_QUIDEL"), - "norostat": os.environ.get("SECRET_NOROSTAT"), - "afhsb": os.environ.get("SECRET_AFHSB"), -} +URL_PREFIX = os.environ.get("FLASK_PREFIX", "") # if not empty, this value should begin but not end in a slash ('/') -# begin sensor query authentication configuration -# A multimap of sensor names to the "granular" auth tokens that can be used to access them; excludes the "global" sensor auth key that works for all sensors: -GRANULAR_SENSOR_AUTH_TOKENS = { - "twtr": os.environ.get("SECRET_SENSOR_TWTR", "").split(","), - "gft": os.environ.get("SECRET_SENSOR_GFT", "").split(","), - "ght": os.environ.get("SECRET_SENSOR_GHT", "").split(","), - "ghtj": os.environ.get("SECRET_SENSOR_GHTJ", "").split(","), - "cdc": os.environ.get("SECRET_SENSOR_CDC", "").split(","), - "quid": os.environ.get("SECRET_SENSOR_QUID", "").split(","), - "wiki": os.environ.get("SECRET_SENSOR_WIKI", "").split(","), -} - -# A set of sensors that do not require an auth key to access: -OPEN_SENSORS = [ - "sar3", - "epic", - "arch", -] +# REVERSE_PROXIED is a boolean value that indicates whether or not this server instance +# is running behind a reverse proxy (like nginx). +# in dev and testing, it is fine (or even preferable) for this variable to be set to 'TRUE' +# even if it is not actually the case. in prod, it is very important that this is set accurately -- +# it should _only_ be set to 'TRUE' if it really is behind a reverse proxy, as remote addresses can be "spoofed" +# which can carry security/identity implications. conversely, if this is set to 'FALSE' when in fact +# running behind a reverse proxy, it can hinder logging accuracy. it defaults to 'FALSE' for safety. +REVERSE_PROXIED = os.environ.get("REVERSE_PROXIED", "FALSE").upper() == "TRUE" REGION_TO_STATE = { "hhs1": ["VT", "CT", "ME", "MA", "NH", "RI"], @@ -75,3 +56,32 @@ "cen9": ["AK", "CA", "HI", "OR", "WA"], } NATION_REGION = "nat" + +API_KEY_REQUIRED_STARTING_AT = date.fromisoformat(os.environ.get("API_KEY_REQUIRED_STARTING_AT", "2023-06-21")) +TEMPORARY_API_KEY = os.environ.get("TEMPORARY_API_KEY", "TEMP-API-KEY-EXPIRES-2023-06-28") +# password needed for the admin application if not set the admin routes won't be available +ADMIN_PASSWORD = os.environ.get("API_KEY_ADMIN_PASSWORD", "abc") +# secret for the google form to give to the admin/register endpoint +REGISTER_WEBHOOK_TOKEN = os.environ.get("API_KEY_REGISTER_WEBHOOK_TOKEN") + +REDIS_HOST = os.environ.get("REDIS_HOST", "delphi_redis") +REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "1234") + +# https://flask-limiter.readthedocs.io/en/stable/#rate-limit-string-notation +RATE_LIMIT = os.environ.get("RATE_LIMIT", "60/hour") +# fixed-window, fixed-window-elastic-expiry, or moving-window +# see also https://flask-limiter.readthedocs.io/en/stable/#rate-limiting-strategies +RATELIMIT_STRATEGY = os.environ.get("RATELIMIT_STRATEGY", "fixed-window") + +# see https://flask-limiter.readthedocs.io/en/stable/#configuration +RATELIMIT_STORAGE_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:6379" + +API_KEY_REGISTRATION_FORM_LINK = "https://forms.gle/hkBr5SfQgxguAfEt7" +# ^ shortcut to "https://docs.google.com/forms/d/e/1FAIpQLSe5i-lgb9hcMVepntMIeEo8LUZUMTUnQD3hbrQI3vSteGsl4w/viewform?usp=sf_link" +API_KEY_REGISTRATION_FORM_LINK_LOCAL = "https://api.delphi.cmu.edu/epidata/admin/registration_form" +# ^ redirects to API_KEY_REGISTRATION_FORM_LINK + +API_KEY_REMOVAL_REQUEST_LINK = "https://forms.gle/GucFmZHTMgEFjH197" +# ^ shortcut to "https://docs.google.com/forms/d/e/1FAIpQLSff30tsq4xwPCoUbvaIygLSMs_Mt8eDhHA0rifBoIrjo8J5lw/viewform" +API_KEY_REMOVAL_REQUEST_LINK_LOCAL = "https://api.delphi.cmu.edu/epidata/admin/removal_request" +# ^ redirects to API_KEY_REMOVAL_REQUEST_LINK diff --git a/src/server/_db.py b/src/server/_db.py new file mode 100644 index 000000000..9057b8181 --- /dev/null +++ b/src/server/_db.py @@ -0,0 +1,18 @@ +from sqlalchemy import create_engine, MetaData +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from ._config import SQLALCHEMY_DATABASE_URI, SQLALCHEMY_ENGINE_OPTIONS + + +# _db.py exists so that we dont have a circular dependency: +# previously `_common` imported from `_security` which imported from `admin.models`, which imported (back again) from `_common` for database connection objects + + +engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI, **SQLALCHEMY_ENGINE_OPTIONS) + +metadata = MetaData(bind=engine) + +Session = sessionmaker(bind=engine) + + diff --git a/src/server/_exceptions.py b/src/server/_exceptions.py index 835bfc118..86ef028c1 100644 --- a/src/server/_exceptions.py +++ b/src/server/_exceptions.py @@ -25,11 +25,6 @@ def __init__(self, endpoints: Iterable[str]): super(MissingOrWrongSourceException, self).__init__(f"no data source specified, possible values: {','.join(endpoints)}", 400) -class UnAuthenticatedException(EpiDataException): - def __init__(self): - super(UnAuthenticatedException, self).__init__("unauthenticated", 401) - - class ValidationFailedException(EpiDataException): def __init__(self, message: str): super(ValidationFailedException, self).__init__(message, 400) diff --git a/src/server/_limiter.py b/src/server/_limiter.py new file mode 100644 index 000000000..4bf72e05b --- /dev/null +++ b/src/server/_limiter.py @@ -0,0 +1,148 @@ +from delphi.epidata.server.endpoints.covidcast_utils.dashboard_signals import DashboardSignals +from flask import Response, request, make_response, jsonify +from flask_limiter import Limiter, HEADERS +from redis import Redis +from werkzeug.exceptions import Unauthorized, TooManyRequests + +from ._common import app, get_real_ip_addr +from ._config import RATE_LIMIT, RATELIMIT_STORAGE_URL, REDIS_HOST, REDIS_PASSWORD +from ._exceptions import ValidationFailedException +from ._params import extract_dates, extract_integers, extract_strings +from ._security import _is_public_route, current_user, require_api_key, show_no_api_key_warning, resolve_auth_token, ERROR_MSG_RATE_LIMIT, ERROR_MSG_MULTIPLES + + +def deduct_on_success(response: Response) -> bool: + if response.status_code != 200: + return False + # check if we have the classic format + if not response.is_streamed and response.is_json: + out = response.json + if out and isinstance(out, dict) and out.get("result") == -1: + return False + return True + + +def get_multiples_count(request): + multiples = { + "articles": extract_strings, + "ccn": extract_strings, + "city": extract_strings, + "collection_weeks": extract_integers, + "dates": extract_integers, + "epiweeks": extract_integers, + "fips_code": extract_strings, + "flu_types": extract_strings, + "geo_value": extract_strings, + "geo_values": extract_strings, + "hospital_pks": extract_strings, + "issues": extract_integers, + "locations": extract_strings, + "names": extract_strings, + "publication_dates": extract_strings, + "regions": extract_strings, + "sensor_names": extract_strings, + "signal": extract_strings, + "signals": extract_strings, + "states": extract_strings, + "time_types": extract_strings, + "time_values": extract_dates, + "zip": extract_strings, + } + multiple_selection_allowed = 2 + if "window" in request.args.keys(): + multiple_selection_allowed -= 1 + for k, v in request.args.items(): + if v == "*": + multiple_selection_allowed -= 1 + try: + vals = multiples.get(k)(k) + if len(vals) >= 2: + multiple_selection_allowed -= 1 + elif len(vals) and isinstance(vals, list) and isinstance(vals[0], tuple): + # else we have one val which is a tuple, representing a range, and thus is a "multiple" + multiple_selection_allowed -= 1 + except ValidationFailedException: + continue + except TypeError: + continue + return multiple_selection_allowed + + +def check_signals_allowlist(request): + signals_allowlist = {":".join(ss_pair) for ss_pair in DashboardSignals().srcsig_list()} + request_signals = [] + if "signal" in request.args.keys(): + request_signals += extract_strings("signal") + if "signals" in request.args.keys(): + request_signals += extract_strings("signals") + if "data_source" in request.args: + request_signals = [f"{request.args['data_source']}:{request_signal}" for request_signal in request_signals] + if len(request_signals) == 0: + return False + return all([signal in signals_allowlist for signal in request_signals]) + + +def _resolve_tracking_key() -> str: + token = resolve_auth_token() + return token or get_real_ip_addr(request) + + +limiter = Limiter( + _resolve_tracking_key, + app=app, + storage_uri=RATELIMIT_STORAGE_URL, + request_identifier=lambda: "EpidataLimiter", + headers_enabled=True, + header_name_mapping={ + HEADERS.LIMIT: "X-My-Limit", + HEADERS.RESET: "X-My-Reset", + HEADERS.REMAINING: "X-My-Remaining", + }, +) + +apply_limit = limiter.limit(RATE_LIMIT, deduct_when=deduct_on_success) + + +@app.errorhandler(429) +def ratelimit_handler(e): + return TooManyRequests(ERROR_MSG_RATE_LIMIT) + + +def requests_left(): + r = Redis(host=REDIS_HOST, password=REDIS_PASSWORD) + allowed_count, period = RATE_LIMIT.split("/") + try: + remaining_count = int(allowed_count) - int( + r.get(f"LIMITER/{_resolve_tracking_key()}/EpidataLimiter/{allowed_count}/1/{period}") + ) + except TypeError: + return 1 + return remaining_count + + +@limiter.request_filter +def _no_rate_limit() -> bool: + if show_no_api_key_warning(): + # no rate limit in phase 0 + return True + if _is_public_route(): + # no rate limit for public routes + return True + if current_user: + # no rate limit if user is registered + return True + + if not require_api_key(): + # we are in phase 1 or 2 + if requests_left() > 0: + # ...and user is below rate limit, we still want to record this query for the rate computation... + return False + # ...otherwise, they have exceeded the limit, but we still want to allow them through + return True + + # phase 3 (full api-keys behavior) + multiples = get_multiples_count(request) + if multiples < 0: + # too many multiples + raise Unauthorized(ERROR_MSG_MULTIPLES) + return check_signals_allowlist(request) diff --git a/src/server/_printer.py b/src/server/_printer.py index 162ba2e36..6e32d7d43 100644 --- a/src/server/_printer.py +++ b/src/server/_printer.py @@ -2,12 +2,15 @@ from io import StringIO from typing import Any, Dict, Iterable, List, Optional, Union -from flask import Response, jsonify, stream_with_context +from flask import Response, jsonify, stream_with_context, request from flask.json import dumps import orjson from ._config import MAX_RESULTS, MAX_COMPATIBILITY_RESULTS -from ._common import is_compatibility_mode +# TODO: remove warnings after once we are past the API_KEY_REQUIRED_STARTING_AT date +from ._security import show_hard_api_key_warning, show_soft_api_key_warning, ROLLOUT_WARNING_RATE_LIMIT, ROLLOUT_WARNING_MULTIPLES, _ROLLOUT_WARNING_AD_FRAGMENT, PHASE_1_2_STOPGAP +from ._common import is_compatibility_mode, log_info_with_request +from ._limiter import requests_left, get_multiples_count from delphi.epidata.common.logger import get_structured_logger @@ -22,7 +25,15 @@ def print_non_standard(format: str, data): message = "no results" result = -2 else: - message = "success" + warning = "" + if show_hard_api_key_warning(): + if requests_left() == 0: + warning = f"{ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + message = warning.strip() or "success" result = 1 if result == -1 and is_compatibility_mode(): return jsonify(dict(result=result, message=message)) @@ -69,6 +80,7 @@ def gen(): yield r r = self._end() + log_info_with_request("APrinter finished processing rows", count=self.count) if r is not None: yield r @@ -90,6 +102,8 @@ def _print_row(self, row: Dict) -> Optional[Union[str, bytes]]: first = self.count == 0 if self.count >= self._max_results: # hit the limit + # TODO: consider making this a WARN-level log event + log_info_with_request("Max result limit reached", count=self.count) self.result = 2 return None if first: @@ -112,21 +126,40 @@ class ClassicPrinter(APrinter): """ def _begin(self): - if is_compatibility_mode(): + if is_compatibility_mode() and not show_hard_api_key_warning(): return "{ " - return '{ "epidata": [' + r = '{ "epidata": [' + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning != "": + return f'{r} "{warning.strip()}",' + return r def _format_row(self, first: bool, row: Dict): - if first and is_compatibility_mode(): + if first and is_compatibility_mode() and not show_hard_api_key_warning(): sep = b'"epidata": [' else: sep = b"," if not first else b"" return sep + orjson.dumps(row) def _end(self): - message = "success" + warning = "" + if show_soft_api_key_warning(): + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + message = warning.strip() or "success" prefix = "], " - if self.count == 0 and is_compatibility_mode(): + if self.count == 0 and is_compatibility_mode() and not show_hard_api_key_warning(): # no array to end prefix = "" @@ -160,7 +193,7 @@ def _format_row(self, first: bool, row: Dict): self._tree[group].append(row) else: self._tree[group] = [row] - if first and is_compatibility_mode(): + if first and is_compatibility_mode() and not show_hard_api_key_warning(): return b'"epidata": [' return None @@ -171,7 +204,10 @@ def _end(self): tree = orjson.dumps(self._tree) self._tree = dict() r = super(ClassicTreePrinter, self)._end() - return tree + r + r = tree + r + if show_hard_api_key_warning() and (requests_left() == 0 or get_multiples_count(request) < 0): + r = b", " + r + return r class CSVPrinter(APrinter): @@ -200,8 +236,20 @@ def _error(self, error: Exception) -> str: def _format_row(self, first: bool, row: Dict): if first: - self._writer = DictWriter(self._stream, list(row.keys()), lineterminator="\n") + columns = list(row.keys()) + self._writer = DictWriter(self._stream, columns, lineterminator="\n") self._writer.writeheader() + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning.strip() != "": + self._writer.writerow({columns[0]: warning}) + self._writer.writerow(row) # remove the stream content to print just one line at a time @@ -222,7 +270,18 @@ class JSONPrinter(APrinter): """ def _begin(self): - return b"[" + r = b"[" + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning.strip() != "": + r = b'["' + bytes(warning, "utf-8") + b'",' + return r def _format_row(self, first: bool, row: Dict): sep = b"," if not first else b"" @@ -240,6 +299,19 @@ class JSONLPrinter(APrinter): def make_response(self, gen): return Response(gen, mimetype=" text/plain; charset=utf8") + def _begin(self): + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning.strip() != "": + return bytes(warning, "utf-8") + b"\n" + return None + def _format_row(self, first: bool, row: Dict): # each line is a JSON file with a new line to separate them return orjson.dumps(row, option=orjson.OPT_APPEND_NEWLINE) diff --git a/src/server/_security.py b/src/server/_security.py new file mode 100644 index 000000000..36fb1d93b --- /dev/null +++ b/src/server/_security.py @@ -0,0 +1,128 @@ +from datetime import date, datetime, timedelta +from functools import wraps +from typing import Optional, cast + +import redis +from delphi.epidata.common.logger import get_structured_logger +from flask import g, request +from werkzeug.exceptions import Unauthorized +from werkzeug.local import LocalProxy + +from ._config import ( + API_KEY_REQUIRED_STARTING_AT, + REDIS_HOST, + REDIS_PASSWORD, + API_KEY_REGISTRATION_FORM_LINK_LOCAL, + TEMPORARY_API_KEY, + URL_PREFIX, +) +from .admin.models import User, UserRole + +API_KEY_HARD_WARNING = API_KEY_REQUIRED_STARTING_AT - timedelta(days=14) +API_KEY_SOFT_WARNING = API_KEY_HARD_WARNING - timedelta(days=14) + +# rollout warning messages +ROLLOUT_WARNING_RATE_LIMIT = "This request exceeded the rate limit on anonymous requests, which will be enforced starting {}.".format(API_KEY_REQUIRED_STARTING_AT) +ROLLOUT_WARNING_MULTIPLES = "This request exceeded the anonymous limit on selected multiples, which will be enforced starting {}.".format(API_KEY_REQUIRED_STARTING_AT) +_ROLLOUT_WARNING_AD_FRAGMENT = "To be exempt from this limit, authenticate your requests with a free API key, now available at {}.".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) + +PHASE_1_2_STOPGAP = ( + "A temporary public key `{}` is available for use between now and {} to give you time to register or adapt your requests without this message continuing to break your systems." +).format(TEMPORARY_API_KEY, (API_KEY_REQUIRED_STARTING_AT + timedelta(days=7))) + + +# steady-state error messages +ERROR_MSG_RATE_LIMIT = "Rate limit exceeded for anonymous queries. To remove this limit, register a free API key at {}".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) +ERROR_MSG_MULTIPLES = "Requested too many multiples for anonymous queries. To remove this limit, register a free API key at {}".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) +ERROR_MSG_INVALID_KEY = ( + "API key does not exist. Register a new key at {} or contact delphi-support+privacy@andrew.cmu.edu to troubleshoot".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) +) +ERROR_MSG_INVALID_ROLE = "Provided API key does not have access to this endpoint. Please contact delphi-support+privacy@andrew.cmu.edu." + + +def resolve_auth_token() -> Optional[str]: + for n in ("auth", "api_key", "token"): + if n in request.values: + return request.values[n] + # username password + if request.authorization and request.authorization.username == "epidata": + return request.authorization.password + # bearer token authentication + auth_header = request.headers.get("Authorization") + if auth_header and auth_header.startswith("Bearer "): + return auth_header[len("Bearer ") :] + return None + + +def show_no_api_key_warning() -> bool: + # aka "phase 0" + n = date.today() + return not current_user and n < API_KEY_SOFT_WARNING + + +def show_soft_api_key_warning() -> bool: + # aka "phase 1" + n = date.today() + return not current_user and API_KEY_SOFT_WARNING <= n < API_KEY_HARD_WARNING + + +def show_hard_api_key_warning() -> bool: + # aka "phase 2" + n = date.today() + return not current_user and API_KEY_HARD_WARNING <= n < API_KEY_REQUIRED_STARTING_AT + + +def require_api_key() -> bool: + # aka "phase 3" + n = date.today() + return API_KEY_REQUIRED_STARTING_AT <= n + + +def _get_current_user(): + if "user" not in g: + api_key = resolve_auth_token() + g.user = User.find_user(api_key=api_key) + return g.user + + +current_user: User = cast(User, LocalProxy(_get_current_user)) + + +def register_user_role(role_name: str) -> None: + UserRole.create_role(role_name) + + +def _is_public_route() -> bool: + public_routes_list = ["lib", "admin", "version"] + for route in public_routes_list: + if request.path.startswith(f"{URL_PREFIX}/{route}"): + return True + return False + + +def require_role(required_role: str): + def decorator_wrapper(f): + if not required_role: + return f + + @wraps(f) + def decorated_function(*args, **kwargs): + if not current_user or not current_user.has_role(required_role): + get_structured_logger("api_security").info( + ERROR_MSG_INVALID_ROLE, + role=required_role, + api_key=(current_user and current_user.api_key), + ) + raise Unauthorized(ERROR_MSG_INVALID_ROLE) + return f(*args, **kwargs) + + return decorated_function + + return decorator_wrapper + + +def update_key_last_time_used(user): + if user: + # update last usage for this user's api key to "now()" + r = redis.Redis(host=REDIS_HOST, password=REDIS_PASSWORD) + r.set(f"LAST_USED/{user.api_key}", datetime.strftime(datetime.now(), "%Y-%m-%d")) diff --git a/src/server/_validate.py b/src/server/_validate.py index 957bee09d..37adc470f 100644 --- a/src/server/_validate.py +++ b/src/server/_validate.py @@ -1,37 +1,6 @@ -from typing import Optional - from flask import Request -from ._exceptions import UnAuthenticatedException, ValidationFailedException - - -def resolve_auth_token(request: Request) -> Optional[str]: - # auth request param - if "auth" in request.values: - return request.values["auth"] - # user name password - if request.authorization and request.authorization.username == "epidata": - return request.authorization.password - # bearer token authentication - auth_header = request.headers.get("Authorization") - if auth_header and auth_header.startswith("Bearer "): - return auth_header[len("Bearer ") :] - return None - - -def check_auth_token(request: Request, token: str, optional=False) -> bool: - value = resolve_auth_token(request) - - if value is None: - if optional: - return False - else: - raise ValidationFailedException(f"missing parameter: auth") - - valid_token = value == token - if not valid_token and not optional: - raise UnAuthenticatedException() - return valid_token +from ._exceptions import ValidationFailedException def require_all(request: Request, *values: str) -> bool: diff --git a/src/server/admin/__init__.py b/src/server/admin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/server/admin/api_key_form_script.js b/src/server/admin/api_key_form_script.js new file mode 100644 index 000000000..2a48d1459 --- /dev/null +++ b/src/server/admin/api_key_form_script.js @@ -0,0 +1,66 @@ +// post-processing script for API key registration form submissions +// currently located at: +// https://script.google.com/u/1/home/projects/1hpgZcxqbeyfJLVEaipNqCJ7ItdkjNu2NsX2IWqpjOd1wZwhCBeKzlCAa/edit + + +var POST_URL = "https://api.delphi.cmu.edu/epidata/admin/register"; +var WEBHOOK_SECRET = "abc"; + +function onSubmit(e) { + + + var form = FormApp.getActiveForm(); + var allResponses = form.getResponses(); + var latestResponse = allResponses[allResponses.length - 1]; + + var user_api_key = Math.random().toString(16).substr(2, 18); + var user_email = latestResponse.getRespondentEmail(); + + var payload = { + 'token': WEBHOOK_SECRET, + 'user_api_key': user_api_key, + 'user_email': user_email, + }; + + var options = { + "method": "post", + "contentType": "application/json", + "muteHttpExceptions": true, + "payload": JSON.stringify(payload) + }; + + Logger.log('Sending registration webhook request.') + var result = UrlFetchApp.fetch(POST_URL, options); + console.log(result.getResponseCode()); + + if (result.getResponseCode() == 200) { + Logger.log('Registration successful, sending email'); + MailApp.sendEmail({ + to: user_email, + subject: "Delphi Epidata API Registration", + noReply: true, + body: `Thank you for registering with the Delphi Epidata API. + + Your API key is: ${user_api_key} + + For usage information, see the API Keys section of the documentation: https://cmu-delphi.github.io/delphi-epidata/api/api_keys.html + + Best, + Delphi Team` + }); + } else if (result.getResponseCode() == 409) { + Logger.log('Registration was not successful, %s %s', result.getContentText("UTF-8"), result.GetResponseCode); + MailApp.sendEmail({ + to: user_email, + subject: "Delphi Epidata API Registration", + noReply: true, + body: ` + API Key was not generated. + + This email address is already registered. Please contact us if you believe this to be in error. + + Best, + Delphi Team` + }); + } +}; diff --git a/src/server/admin/models.py b/src/server/admin/models.py new file mode 100644 index 000000000..3755a517b --- /dev/null +++ b/src/server/admin/models.py @@ -0,0 +1,154 @@ +from sqlalchemy import Table, ForeignKey, Column, Integer, String, Date, delete, update +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship +from copy import deepcopy + +from .._db import Session +from delphi.epidata.common.logger import get_structured_logger + +from typing import Set, Optional, List +from datetime import datetime as dtime + + +Base = declarative_base() + +association_table = Table( + "user_role_link", + Base.metadata, + Column("user_id", ForeignKey("api_user.id")), + Column("role_id", ForeignKey("user_role.id")), +) + + +class User(Base): + __tablename__ = "api_user" + id = Column(Integer, primary_key=True, autoincrement=True) + roles = relationship("UserRole", secondary=association_table) + api_key = Column(String(50), unique=True, nullable=False) + email = Column(String(320), unique=True, nullable=False) + created = Column(Date, default=dtime.strftime(dtime.now(), "%Y-%m-%d")) + last_time_used = Column(Date, default=dtime.strftime(dtime.now(), "%Y-%m-%d")) + + def __init__(self, api_key: str, email: str = None) -> None: + self.api_key = api_key + self.email = email + + @staticmethod + def list_users() -> List["User"]: + with Session() as session: + return session.query(User).all() + + @property + def as_dict(self): + return { + "id": self.id, + "api_key": self.api_key, + "email": self.email, + "roles": User.get_user_roles(self.id), + "created": self.created, + "last_time_used": self.last_time_used + } + + @staticmethod + def get_user_roles(user_id: int) -> Set[str]: + with Session() as session: + user = session.query(User).filter(User.id == user_id).first() + return set([role.name for role in user.roles]) + + def has_role(self, required_role: str) -> bool: + return required_role in User.get_user_roles(self.id) + + @staticmethod + def _assign_roles(user: "User", roles: Optional[Set[str]], session) -> None: + # NOTE: this uses a borrowed/existing `session`, and thus does not do a `session.commit()`... + # that is the responsibility of the caller! + get_structured_logger("api_user_models").info("setting roles", roles=roles, user_id=user.id, api_key=user.api_key) + db_user = session.query(User).filter(User.id == user.id).first() + # TODO: would it be sufficient to use the passed-in `user` instead of looking up this `db_user`? + if roles: + roles_to_assign = session.query(UserRole).filter(UserRole.name.in_(roles)).all() + db_user.roles = roles_to_assign + else: + db_user.roles = [] + + @staticmethod + def find_user(*, # asterisk forces explicit naming of all arguments when calling this method + user_id: Optional[int] = None, api_key: Optional[str] = None, user_email: Optional[str] = None + ) -> "User": + # NOTE: be careful, using multiple arguments could match multiple users, but this will return only one! + with Session() as session: + user = ( + session.query(User) + .filter((User.id == user_id) | (User.api_key == api_key) | (User.email == user_email)) + .first() + ) + return user if user else None + + @staticmethod + def create_user(api_key: str, email: str, user_roles: Optional[Set[str]] = None) -> "User": + get_structured_logger("api_user_models").info("creating user", api_key=api_key) + with Session() as session: + new_user = User(api_key=api_key, email=email) + # TODO: we may need to populate 'created' field/column here, if the default + # specified above gets bound to the time of when that line of python was evaluated. + session.add(new_user) + session.commit() + User._assign_roles(new_user, user_roles, session) + session.commit() + return new_user + + @staticmethod + def update_user( + user: "User", + email: Optional[str], + api_key: Optional[str], + roles: Optional[Set[str]] + ) -> "User": + get_structured_logger("api_user_models").info("updating user", user_id=user.id, new_api_key=api_key) + with Session() as session: + user = User.find_user(user_id=user.id) + if user: + update_stmt = ( + update(User) + .where(User.id == user.id) + .values(api_key=api_key, email=email) + ) + session.execute(update_stmt) + User._assign_roles(user, roles, session) + session.commit() + return user + + @staticmethod + def delete_user(user_id: int) -> None: + get_structured_logger("api_user_models").info("deleting user", user_id=user_id) + with Session() as session: + session.execute(delete(User).where(User.id == user_id)) + session.commit() + + +class UserRole(Base): + __tablename__ = "user_role" + id = Column(Integer, primary_key=True, autoincrement=True) + name = Column(String(50), unique=True) + + @staticmethod + def create_role(name: str) -> None: + get_structured_logger("api_user_models").info("creating user role", role=name) + with Session() as session: + session.execute( + f""" + INSERT INTO user_role (name) + SELECT '{name}' + WHERE NOT EXISTS + (SELECT * + FROM user_role + WHERE name='{name}') + """ + ) + session.commit() + + @staticmethod + def list_all_roles(): + with Session() as session: + roles = session.query(UserRole).all() + return [role.name for role in roles] diff --git a/src/server/admin/templates/index.html b/src/server/admin/templates/index.html new file mode 100644 index 000000000..518211199 --- /dev/null +++ b/src/server/admin/templates/index.html @@ -0,0 +1,81 @@ + + + + + + API Keys + + + + +
+

+ API Key Admin Interface +

+ {% if flags.banner %} + + {% endif %} + {% if mode == 'overview' %} +

Registered Users

+ + + + + + + + + + {% for user in users %} + + + + + + + + {% endfor %} + +
IDAPI KeyEmailRolesActions
{{ user.id }}{{ user.api_key }}{{ user.email }}{{ ','.join(user.roles) }} + Edit + Delete +
+

Register New User

+ + {% else %} +

+ < Back + Edit User {{user.id}} +

+ {% endif %} +
+ +
+ + +
+
+ + +
+
+ + {% for role in roles %} + + {% endfor %} +
+ {% if mode == 'overview' %} + + {% else %} + + + {% endif %} +
+
+ + \ No newline at end of file diff --git a/src/server/endpoints/admin.py b/src/server/endpoints/admin.py new file mode 100644 index 000000000..d352e473b --- /dev/null +++ b/src/server/endpoints/admin.py @@ -0,0 +1,121 @@ +from pathlib import Path +from typing import Dict, List, Set + +from flask import Blueprint, make_response, render_template_string, request +from werkzeug.exceptions import NotFound, Unauthorized +from werkzeug.utils import redirect + +from .._config import ADMIN_PASSWORD, API_KEY_REGISTRATION_FORM_LINK, API_KEY_REMOVAL_REQUEST_LINK, REGISTER_WEBHOOK_TOKEN +from .._security import resolve_auth_token +from ..admin.models import User, UserRole + +self_dir = Path(__file__).parent +# first argument is the endpoint name +bp = Blueprint("admin", __name__) + +templates_dir = Path(__file__).parent.parent / "admin" / "templates" + + +def enable_admin() -> bool: + # only enable admin endpoint if we have a password for it, so it is not exposed to the world + return bool(ADMIN_PASSWORD) + + +def _require_admin(): + token = resolve_auth_token() + if token is None or token != ADMIN_PASSWORD: + raise Unauthorized() + return token + + +def _parse_roles(roles: List[str]) -> Set[str]: + return set(roles) + + +def _render(mode: str, token: str, flags: Dict, **kwargs): + template = (templates_dir / "index.html").read_text("utf8") + return render_template_string( + template, mode=mode, token=token, flags=flags, roles=UserRole.list_all_roles(), **kwargs + ) + + +def user_exists(user_email: str = None, api_key: str = None): + user = User.find_user(user_email=user_email, api_key=api_key) + return True if user else False + + +@bp.route("/", methods=["GET", "POST"]) +def _index(): + token = _require_admin() + flags = dict() + if request.method == "POST": + # register a new user + if not user_exists(user_email=request.values["email"], api_key=request.values["api_key"]): + User.create_user( + request.values["api_key"], + request.values["email"], + _parse_roles(request.values.getlist("roles")), + ) + flags["banner"] = "Successfully Added" + else: + flags["banner"] = "User with such email and/or api key already exists." + users = [user.as_dict for user in User.list_users()] + return _render("overview", token, flags, users=users, user=dict()) + + +@bp.route("/", methods=["GET", "PUT", "POST", "DELETE"]) +def _detail(user_id: int): + token = _require_admin() + user = User.find_user(user_id=user_id) + if not user: + raise NotFound() + if request.method == "DELETE" or "delete" in request.values: + User.delete_user(user.id) + return redirect(f"./?auth={token}") + flags = dict() + if request.method in ["PUT", "POST"]: + user_check = User.find_user(api_key=request.values["api_key"], user_email=request.values["email"]) + if user_check and user_check.id != user.id: + flags["banner"] = "Could not update user; same api_key and/or email already exists." + else: + user = user.update_user( + user=user, + api_key=request.values["api_key"], + email=request.values["email"], + roles=_parse_roles(request.values.getlist("roles")), + ) + flags["banner"] = "Successfully Saved" + return _render("detail", token, flags, user=user.as_dict) + + +def register_new_key(api_key: str, email: str) -> str: + User.create_user(api_key=api_key, email=email) + return api_key + + +@bp.route("/registration_form", methods=["GET"]) +def registration_form_redirect(): + # TODO: replace this with our own hosted registration form instead of external + return redirect(API_KEY_REGISTRATION_FORM_LINK, code=302) + +@bp.route("/removal_request", methods=["GET"]) +def removal_request_redirect(): + # TODO: replace this with our own hosted form instead of external + return redirect(API_KEY_REMOVAL_REQUEST_LINK, code=302) + +@bp.route("/register", methods=["POST"]) +def _register(): + body = request.get_json() + token = body.get("token") + if token is None or token != REGISTER_WEBHOOK_TOKEN: + raise Unauthorized() + + user_api_key = body["user_api_key"] + user_email = body["user_email"] + if user_exists(user_email=user_email, api_key=user_api_key): + return make_response( + "User with email and/or API Key already exists, use different parameters or contact us for help", + 409, + ) + api_key = register_new_key(user_api_key, user_email) + return make_response(f"Successfully registered API key '{api_key}'", 200) diff --git a/src/server/endpoints/afhsb.py b/src/server/endpoints/afhsb.py index 92cee145c..a006defac 100644 --- a/src/server/endpoints/afhsb.py +++ b/src/server/endpoints/afhsb.py @@ -2,10 +2,10 @@ from flask import Blueprint, request -from .._config import AUTH from .._params import extract_integers, extract_strings from .._query import execute_queries, filter_integers, filter_strings -from .._validate import check_auth_token, require_all +from .._validate import require_all +from .._security import require_role # first argument is the endpoint name bp = Blueprint("afhsb", __name__) @@ -53,8 +53,8 @@ def _split_flu_types(flu_types: List[str]): @bp.route("/", methods=("GET", "POST")) +@require_role("afhsb") def handle(): - check_auth_token(request, AUTH["afhsb"]) require_all(request, "locations", "epiweeks", "flu_types") locations = extract_strings("locations") diff --git a/src/server/endpoints/cdc.py b/src/server/endpoints/cdc.py index e89eb94fb..1ba79c9e8 100644 --- a/src/server/endpoints/cdc.py +++ b/src/server/endpoints/cdc.py @@ -1,9 +1,10 @@ from flask import Blueprint, request -from .._config import AUTH, NATION_REGION, REGION_TO_STATE +from .._config import NATION_REGION, REGION_TO_STATE from .._params import extract_strings, extract_integers from .._query import filter_strings, execute_queries, filter_integers -from .._validate import require_all, check_auth_token +from .._validate import require_all +from .._security import require_role # first argument is the endpoint name bp = Blueprint("cdc", __name__) @@ -11,8 +12,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("cdc") def handle(): - check_auth_token(request, AUTH["cdc"]) require_all(request, "locations", "epiweeks") # parse the request diff --git a/src/server/endpoints/covidcast_utils/dashboard_signals.py b/src/server/endpoints/covidcast_utils/dashboard_signals.py new file mode 100644 index 000000000..42f3d612d --- /dev/null +++ b/src/server/endpoints/covidcast_utils/dashboard_signals.py @@ -0,0 +1,49 @@ +# Provides a list of `(source, signal)` tuples that enumerate those used by the +# COVIDcast Dashboard, found at https://delphi.cmu.edu/covidcast/ + +# This module uses two files in this current directory as a resource: `descriptions.raw.txt` & `questions.raw.txt`. +# These files are sourced from (and should be kept in sync with) files of the same name from the `www-covidcast` +# repository, found at https://github.com/cmu-delphi/www-covidcast/blob/dev/src/stores/ . As part of the +# "Create Release" github action for `www-covidcast`, a pull request is created in this repository (`delphi-epidata`) +# with any updates to these files (see: +# https://github.com/cmu-delphi/www-covidcast/blob/dev/.github/workflows/create_release.yml for more details). + +from pathlib import Path +import yaml + + +class DashboardSignals: + + # we use a singleton pattern to prevent reloading files every time this is used + _instance = None + + def __new__(cls): + if cls._instance is None: + # create a singular instance + cls._instance = cls._instance = super(DashboardSignals, cls).__new__(cls) + + # load sources + signals list from files + # NOTE: yaml's `load_all` is needed as these txt files are formatted to technically contain "multiple documents" + srcsigs = [] + module_dir = Path(__file__).parent + + with open(module_dir / 'descriptions.raw.txt', 'r') as f: + for desc in yaml.safe_load_all(f): + srcsigs.append( (desc['Id'], desc['Signal']) ) + + source_id = None + with open(module_dir / 'questions.raw.txt', 'r') as f: + for ques in yaml.safe_load_all(f): + if source_id is None: + # the first entry contains the source 'Id' but doesnt have a 'Signal' key + source_id = ques['Id'] # NOTE: this should always be 'fb-survey' + else: + srcsigs.append( (source_id, ques['Signal']) ) + + cls._instance._srcsigs = srcsigs + + return cls._instance + + + def srcsig_list(self): + return self._srcsigs diff --git a/src/server/endpoints/covidcast_utils/descriptions.raw.txt b/src/server/endpoints/covidcast_utils/descriptions.raw.txt new file mode 100644 index 000000000..85cfb1365 --- /dev/null +++ b/src/server/endpoints/covidcast_utils/descriptions.raw.txt @@ -0,0 +1,125 @@ +--- +Name: Symptom Searches (Smell and Taste) on Google +Id: google-symptoms +Signal: s05_smoothed_search +Unit: scaled search volume +UnitShort: +noMaps: true + +SignalTooltip: Google search volume of COVID-related symptom searches about smell and taste + +Description: Using Google Symptoms Searches, Delphi obtains the average of Google search volumes for searches related to anosmia (loss of smell), ageusia (loss of taste), and dysgeusia (altered or impaired taste) in each area, since they emerged as unusual symptoms that can be indicative of COVID-19. Note that the average of the three symptom search volumes is not equivalent to the search volume of their union. According to Google, the estimates are not comparable across regions since the values are normalized by population and scaled by region-specific maximum popularity (and for this reason, we omit beehive grids and choropleth maps of this signal on the dashboard). +--- +Name: Symptom Searches (Common Cold) on Google +Id: google-symptoms +Signal: s02_smoothed_search +Unit: scaled search volume +UnitShort: +noMaps: true + +SignalTooltip: Google search volume of COVID-related symptom searches about common cold + +Description: Using Google Symptoms Searches, Delphi obtains the average of Google search volumes for searches related to nasal congestion, post nasal drip, rhinorrhea, sinusitis, rhinitis, and common cold in each area. These symptoms have shown positive correlation with reported COVID cases, especially since Omicron was declared a variant of concern by the World Health Organization. Note that the average search volume of these symptoms is not equivalent to the search volume of their union. According to Google, the estimates are not comparable across regions since the values are normalized by population and scaled by region-specific maximum popularity (and for this reason, we omit beehive grids and choropleth maps of this signal on the dashboard). +--- +Name: COVID-Related Doctor Visits +Id: doctor-visits +Signal: smoothed_adj_cli +Unit: per 100 visits + + +SignalTooltip: Percentage of daily doctor visits that are due to COVID-like symptoms + + +Description: Delphi receives aggregated statistics from health system partners on COVID-related outpatient doctor visits, derived from ICD codes found in insurance claims. Using this data, we estimate the percentage of daily doctor’s visits in each area that are due to COVID-like illness. Note that these estimates are based only on visits by patients whose data is accessible to our partners. +--- +Name: Lab-Confirmed Flu in Doctor Visits +Id: chng +Signal: smoothed_adj_outpatient_flu +Unit: per 100 visits + + +SignalTooltip: Percentage of daily doctor visits that are due to lab-confirmed influenza + + +Description: Delphi receives aggregated statistics from Change Healthcare, Inc. on lab-confirmed influenza outpatient doctor visits, derived from ICD codes found in insurance claims. Using this data, we estimate the percentage of daily doctor’s visits in each area that resulted in a diagnosis of influenza. Note that these estimates are based only on visits by patients whose insurance claims are accessible to Change Healthcare. +--- +Name: COVID Antigen Test Positivity +Id: quidel-covid-ag +Signal: covid_ag_smoothed_pct_positive +Unit: per 100 tests + + +SignalTooltip: Positivity rate of COVID-19 antigen tests, based on data provided by Quidel, Inc. + + +Description: Delphi receives data from COVID-19 antigen tests conducted by Quidel, a national provider of networked lab testing devices (covering doctors' offices, clinics, and hospitals). Antigen tests can detect parts of the virus that are present during an active infection.We report the percentage of COVID antigen tests that are positive. Note that this signal only includes Quidel’s antigen tests, not those run by other test providers. + + +AgeStratifications: + - Name: 0-4 + Signal: covid_ag_smoothed_pct_positive_age_0_4 + - Name: 5-17 + Signal: covid_ag_smoothed_pct_positive_age_5_17 + - Name: 18-49 + Signal: covid_ag_smoothed_pct_positive_age_18_49 + - Name: 50-64 + Signal: covid_ag_smoothed_pct_positive_age_50_64 + - Name: 65+ + Signal: covid_ag_smoothed_pct_positive_age_65plus + + +--- +Name: COVID Cases +Id: jhu-csse +Signal: confirmed_7dav_incidence_prop +Highlight: [default] +ExtendedColorScale: true + + +SignalTooltip: Newly reported COVID-19 cases per 100,000 people, based on data from Johns Hopkins University + + +Description: This data shows the number of COVID-19 confirmed cases newly reported each day. It reflects only cases reported by state and local health authorities. It is based on case counts compiled and made public by [a team at Johns Hopkins University](https://systems.jhu.edu/research/public-health/ncov/). The signal may not be directly comparable across regions with vastly different testing capacity or reporting criteria. +--- +Name: COVID Hospital Admissions +Id: hhs +Signal: confirmed_admissions_covid_1d_prop_7dav +ExtendedColorScale: true +# override with additional county +Levels: [nation, state, county] +Overrides: + County: + Id: dsew-cpr + Signal: confirmed_admissions_covid_1d_prop_7dav + + + + +SignalTooltip: Confirmed COVID-19 hospital admissions per 100,000 people + + +Description: This data shows the number of hospital admissions with lab-confirmed COVID-19 each day. At the state level and above, we show data from the Report on Patient Impact and Hospital Capacity published by the US Department of Health & Human Services (HHS). At the county and metro level, we show data from the Community Profile Report published by the Data Strategy Execution Workgroup. +--- +Name: Flu Hospital Admissions +Id: hhs +Signal: confirmed_admissions_influenza_1d_prop_7dav +ExtendedColorScale: true +Levels: [nation, state] + + + + +SignalTooltip: Confirmed influenza hospital admissions per 100,000 people + + +Description: This data shows the number of hospital admissions with lab-confirmed influenza each day. We source this data from the Report on Patient Impact and Hospital Capacity published by the US Department of Health & Human Services (HHS). +--- +Name: COVID Deaths +Id: jhu-csse +Signal: deaths_7dav_incidence_prop + + +SignalTooltip: Newly reported COVID-19 deaths per 100,000 people, based on data from Johns Hopkins University + + +Description: This data shows the number of COVID-19 deaths newly reported each day. The signal is based on COVID-19 death counts compiled and made public by [a team at Johns Hopkins University](https://systems.jhu.edu/research/public-health/ncov/). \ No newline at end of file diff --git a/src/server/endpoints/covidcast_utils/questions.raw.txt b/src/server/endpoints/covidcast_utils/questions.raw.txt new file mode 100644 index 000000000..764812b27 --- /dev/null +++ b/src/server/endpoints/covidcast_utils/questions.raw.txt @@ -0,0 +1,248 @@ +--- +Overview: | + In collaboration with Facebook Data for Good, along with a consortium of universities and public health officials, the Delphi Group at Carnegie Mellon University conducts the COVID-19 Trends and Impact Survey to monitor the spread and impact of the COVID-19 pandemic in the United States. This survey is advertised through Facebook. It has run continuously since early April 2020. [More about the survey...](https://delphi.cmu.edu/covidcast/surveys) + + + This dashboard provides summary results from selected survey questions. **Note:** The question text below is simplified for this dashboard. +FullSurveyLink: https://cmu-delphi.github.io/delphi-epidata/symptom-survey/coding.html +DataAccessLink: https://cmu-delphi.github.io/delphi-epidata/symptom-survey/data-access.html +ReferenceRawNationSignal: raw_cli +Waves: [2020-04-06, 2020-04-15, 2020-05-21, 2020-09-08, 2020-11-24, 2020-12-19, 2021-01-12, 2021-02-08, 2021-03-02, 2021-03-02, 2021-05-20, 2022-01-30] +Levels: [county, msa, state] # levels which the survey should be pickable + + +# common properties for all questions +Id: fb-survey +Unit: per 100 people + + +EndOfSurveyWarning: | + The US COVID-19 Trends and Impact Survey will stop inviting new respondents to complete the survey on June 25, 2022. All survey data will remain available for research. For more information, see our [end-of-survey notice](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/end-of-survey.html). +EndOfSurveyNotice: | + The US COVID-19 Trends and Impact Survey stopped inviting new respondents to complete the survey on June 25, 2022. All survey data remains available for research. For more information, see our [end-of-survey notice](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/end-of-survey.html). + + +--- +Name: COVID-Like Symptoms +Category: Symptoms +Question: | + In the past 24 hours, have you or anyone in your household experienced a fever, along with cough, shortness of breath, or difficulty breathing? +Signal: smoothed_wcli +AddedInWave: 1 +--- + + +Name: COVID-Like Symptoms in Community +Category: Symptoms +Question: | + Do you personally know someone in your local community who has COVID-like symptoms? +Signal: smoothed_whh_cmnty_cli +AddedInWave: 2 + + +--- +Name: Self-Reported Mask Use +Category: Mask Use +Question: | + In the past 7 days, did you wear a mask **most or all of the time** in public? +Signal: smoothed_wwearing_mask_7d +AddedInWave: 8 +OldRevisions: + - Change: This version asked about the past 5 days instead of 7. This was changed in Wave 8 to match other questions in this survey. + Signal: smoothed_wwearing_mask + AddedInWave: 4 + +--- +Name: Other People Wearing Masks +Category: Mask Use +Question: | + In the past 7 days, when you were out in public, did **most or all other people** wear masks? +Signal: smoothed_wothers_masked_public +AddedInWave: 11 +OldRevisions: + - Change: This version asked about mask-wearing in public places where social distancing is not possible. This wording was changed in Wave 11. + Signal: smoothed_wothers_masked + AddedInWave: 5 + + +--- + + +Name: COVID Vaccine Acceptance +Category: Vaccines +Question: | + Have you already received a COVID vaccine, do you have an appointment to receive a COVID vaccine, or if a vaccine were offered to you today, would you **definitely** or **probably** choose to get vaccinated? +Signal: smoothed_wcovid_vaccinated_appointment_or_accept +AddedInWave: 11 +OldRevisions: + - Change: This version asked whether respondents received a vaccine or would definitely or probably choose to get vaccinated. Appointments were not included in the calculation because only vaccine-accepting respondents were asked whether they have an appointment to get vaccinated. This was changed in Wave 11 to ask all non-vaccinated respondents whether they have an appointment to get vaccinated. + Signal: smoothed_wcovid_vaccinated_or_accept + AddedInWave: 6 + + +--- +Name: In-Person Schooling +Category: Social Distancing +Question: | + Does the oldest child in your household (pre-K–grade 12) go to in-person classes full-time? +Signal: smoothed_winperson_school_fulltime_oldest +AddedInWave: 12 +Unit: per 100 people with school-age children +OldRevisions: + - Change: This version asked if any children in the household attended in-person classes. Wave 12 changed this item to ask only about the oldest child. + Signal: smoothed_winperson_school_fulltime + AddedInWave: 5 +--- +Name: Went to Essential Store +Category: Social Distancing +Question: | + In the past 24 hours, did you go to an indoor market, grocery store, or pharmacy? +Signal: smoothed_wshop_indoors_1d +AddedInWave: 10 +OldRevisions: + - Change: This version did not include “indoor” in the prompt. Wave 10 changed this item to focus on indoor activities. + Signal: smoothed_wshop_1d + AddedInWave: 4 + + +--- +Name: Used Public Transit +Category: Social Distancing +Question: | + In the past 24 hours, did you use public transit? +Signal: smoothed_wpublic_transit_1d +AddedInWave: 4 + + +--- +Name: Went to Work or School +Category: Social Distancing +Question: | + In the past 24 hours, did you work or go to school indoors outside the place where you are currently staying? +Signal: smoothed_wwork_outside_home_indoors_1d +AddedInWave: 10 +OldRevisions: + - Change: This version did not include “indoors” in the prompt. Wave 10 changed this item to focus on indoor work and school. + Signal: smoothed_wwork_outside_home_1d + AddedInWave: 4 + +--- +Name: Spent Time with Others +Category: Social Distancing +Question: | + In the past 24 hours, did you spend time indoors with someone who isn’t currently staying with you? +Signal: smoothed_wspent_time_indoors_1d +AddedInWave: 10 +OldRevisions: + - Change: This version did not include “indoors” in the prompt. Wave 10 changed this item to focus on indoor activities. + Signal: smoothed_wspent_time_1d + AddedInWave: 4 + + + +--- +Name: Visited Restaurant, Bar, or Cafe +Category: Social Distancing +Question: | + In the past 24 hours, did you have a meal or drink indoors at a bar, restaurant, or cafe? +Signal: smoothed_wrestaurant_indoors_1d +AddedInWave: 10 +OldRevisions: + - Change: This version did not include “indoors” in the prompt. Wave 10 changed this item to focus on drinking and dining indoors, rather than take-out or outdoor dining. + AddedInWave: 4 + Signal: smoothed_wrestaurant_1d + +--- + + +Name: Attended Large Event +Category: Social Distancing +Question: | + In the past 24 hours, did you attend an indoor event with more than 10 people? +Signal: smoothed_wlarge_event_indoors_1d +AddedInWave: 10 +OldRevisions: + - Change: This version did not include “indoors” in the prompt. Wave 10 changed this item to focus on indoor events. + AddedInWave: 4 + Signal: smoothed_wlarge_event_1d + + + +--- +Name: Traveled Out of State +Category: Social Distancing +Question: | + In the past 7 days, have you traveled outside of your state? +Signal: smoothed_wtravel_outside_state_7d +AddedInWave: 10 +OldRevisions: + - Change: This version asked about the past 5 days, rather than 7. This was changed in Wave 10 to be consistent with other items on the survey. + Signal: smoothed_wtravel_outside_state_5d + AddedInWave: 1 + + + + + +--- +Name: Felt Anxious +Category: Mental Health +Question: | + In the past 7 days, did you feel anxious **most or all of the time**? +Signal: smoothed_wanxious_7d +AddedInWave: 10 +OldRevisions: + - Change: This version asked about the past 5 days, rather than 7. This was changed in Wave 10 to be consistent with other items on the survey. + Signal: smoothed_wanxious_5d + AddedInWave: 4 +--- +Name: Felt Depressed +Category: Mental Health +Question: | + In the past 7 days, did you feel depressed **most or all of the time**? +Signal: smoothed_wdepressed_7d +AddedInWave: 10 +OldRevisions: + - Change: This version asked about the past 5 days, rather than 7. This was changed in Wave 10 to be consistent with other items on the survey. + Signal: smoothed_wdepressed_5d + AddedInWave: 4 + + +--- +Name: Worried About Getting COVID +Category: Mental Health +Question: | + Do you worry a great deal or a moderate amount about catching COVID-19? +Signal: smoothed_wworried_catch_covid +AddedInWave: 11 + + +--- +Name: Worried About Finances +Category: Mental Health +Question: | + Do you feel **very or somewhat worried** about your household’s finances for the next month? +Signal: smoothed_wworried_finances +AddedInWave: 4 + + +--- + + +Name: Tested in the Last 14 Days +Category: Testing +Question: | + In the past 14 days, did you get tested for COVID-19? +Signal: smoothed_wtested_14d +AddedInWave: 4 + + +--- + + +Name: Test Positivity Rate +Category: Testing +Question: | + If you were tested for COVID-19 in the past 14 days, was the test positive? +Signal: smoothed_wtested_positive_14d +AddedInWave: 4 \ No newline at end of file diff --git a/src/server/endpoints/dengue_sensors.py b/src/server/endpoints/dengue_sensors.py index f8286eacd..6d24bd3bb 100644 --- a/src/server/endpoints/dengue_sensors.py +++ b/src/server/endpoints/dengue_sensors.py @@ -1,9 +1,9 @@ from flask import Blueprint, request -from .._config import AUTH from .._params import extract_integers, extract_strings from .._query import execute_query, QueryBuilder -from .._validate import check_auth_token, require_all +from .._validate import require_all +from .._security import require_role # first argument is the endpoint name bp = Blueprint("dengue_sensors", __name__) @@ -11,8 +11,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("sensors") def handle(): - check_auth_token(request, AUTH["sensors"]) require_all(request, "names", "locations", "epiweeks") names = extract_strings("names") @@ -21,7 +21,7 @@ def handle(): # build query q = QueryBuilder("dengue_sensors", "s") - + fields_string = ["name", "location"] fields_int = ["epiweek"] fields_float = ["value"] diff --git a/src/server/endpoints/fluview.py b/src/server/endpoints/fluview.py index 262cbeb27..98b7c8014 100644 --- a/src/server/endpoints/fluview.py +++ b/src/server/endpoints/fluview.py @@ -2,17 +2,10 @@ from flask import Blueprint, request -from .._config import AUTH -from .._params import ( - extract_integer, - extract_integers, - extract_strings, -) +from .._params import extract_integer, extract_integers, extract_strings from .._query import execute_queries, filter_integers, filter_strings -from .._validate import ( - check_auth_token, - require_all, -) +from .._security import current_user +from .._validate import require_all # first argument is the endpoint name bp = Blueprint("fluview", __name__) @@ -21,7 +14,7 @@ @bp.route("/", methods=("GET", "POST")) def handle(): - authorized = check_auth_token(request, AUTH["fluview"], optional=True) + authorized = False if not current_user else current_user.has_role("fluview") require_all(request, "epiweeks", "regions") diff --git a/src/server/endpoints/ght.py b/src/server/endpoints/ght.py index 24ba84c23..b0a6e37cc 100644 --- a/src/server/endpoints/ght.py +++ b/src/server/endpoints/ght.py @@ -1,9 +1,9 @@ from flask import Blueprint, request -from .._config import AUTH from .._params import extract_integers, extract_strings from .._query import execute_query, QueryBuilder -from .._validate import check_auth_token, require_all +from .._validate import require_all +from .._security import require_role # first argument is the endpoint name bp = Blueprint("ght", __name__) @@ -11,8 +11,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("ght") def handle(): - check_auth_token(request, AUTH["ght"]) require_all(request, "locations", "epiweeks", "query") locations = extract_strings("locations") diff --git a/src/server/endpoints/meta.py b/src/server/endpoints/meta.py index 8bb4c4890..2b4eb480e 100644 --- a/src/server/endpoints/meta.py +++ b/src/server/endpoints/meta.py @@ -10,7 +10,7 @@ def meta_twitter(): - query = "SELECT x.`date` `latest_update`, x.`table_rows`, count(distinct t.`state`) `num_states` FROM (SELECT max(`date`) `date`, count(1) `table_rows` FROM `twitter`) x JOIN `twitter` t ON t.`date` = x.`date`" + query = "SELECT x.`date` `latest_update`, x.`table_rows`, count(distinct t.`state`) `num_states` FROM (SELECT max(`date`) `date`, count(1) `table_rows` FROM `twitter`) x JOIN `twitter` t ON t.`date` = x.`date` GROUP BY x.`date`, x.`table_rows`" fields_string = ["latest_update"] fields_int = ["num_states", "table_rows"] return parse_result(query, {}, fields_string, fields_int, None) diff --git a/src/server/endpoints/meta_afhsb.py b/src/server/endpoints/meta_afhsb.py index 8a74b51ca..096ab58ec 100644 --- a/src/server/endpoints/meta_afhsb.py +++ b/src/server/endpoints/meta_afhsb.py @@ -1,9 +1,9 @@ from flask import Blueprint, request -from .._config import AUTH from .._printer import print_non_standard from .._query import parse_result -from .._validate import check_auth_token +from .._security import require_role + # first argument is the endpoint name bp = Blueprint("meta_afhsb", __name__) @@ -11,9 +11,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("afhsb") def handle(): - check_auth_token(request, AUTH["afhsb"]) - # build query table1 = "afhsb_00to13_state" table2 = "afhsb_13to17_state" diff --git a/src/server/endpoints/meta_norostat.py b/src/server/endpoints/meta_norostat.py index 789b09021..ce24de6b4 100644 --- a/src/server/endpoints/meta_norostat.py +++ b/src/server/endpoints/meta_norostat.py @@ -1,9 +1,8 @@ from flask import Blueprint, request -from .._config import AUTH from .._printer import print_non_standard from .._query import parse_result -from .._validate import check_auth_token +from .._security import require_role # first argument is the endpoint name bp = Blueprint("meta_norostat", __name__) @@ -11,9 +10,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("norostat") def handle(): - check_auth_token(request, AUTH["norostat"]) - # build query query = "SELECT DISTINCT `release_date` FROM `norostat_raw_datatable_version_list`" releases = parse_result(query, {}, ["release_date"]) diff --git a/src/server/endpoints/norostat.py b/src/server/endpoints/norostat.py index 7dc06d443..144c48cd9 100644 --- a/src/server/endpoints/norostat.py +++ b/src/server/endpoints/norostat.py @@ -1,9 +1,9 @@ from flask import Blueprint, request -from .._config import AUTH from .._params import extract_integers from .._query import execute_query, filter_integers, filter_strings -from .._validate import check_auth_token, require_all +from .._validate import require_all +from .._security import require_role # first argument is the endpoint name bp = Blueprint("norostat", __name__) @@ -11,8 +11,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("norostat") def handle(): - check_auth_token(request, AUTH["norostat"]) require_all(request, "location", "epiweeks") location = request.values["location"] diff --git a/src/server/endpoints/quidel.py b/src/server/endpoints/quidel.py index 6de9205b8..7486df8bf 100644 --- a/src/server/endpoints/quidel.py +++ b/src/server/endpoints/quidel.py @@ -1,9 +1,9 @@ from flask import Blueprint, request -from .._config import AUTH from .._params import extract_integers, extract_strings from .._query import execute_query, QueryBuilder -from .._validate import check_auth_token, require_all +from .._validate import require_all +from .._security import require_role # first argument is the endpoint name bp = Blueprint("quidel", __name__) @@ -11,8 +11,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("quidel") def handle(): - check_auth_token(request, AUTH["quidel"]) require_all(request, "locations", "epiweeks") locations = extract_strings("locations") diff --git a/src/server/endpoints/sensors.py b/src/server/endpoints/sensors.py index cd76ca4d8..1c76f47ac 100644 --- a/src/server/endpoints/sensors.py +++ b/src/server/endpoints/sensors.py @@ -1,104 +1,62 @@ -from flask import Blueprint, Request, request +from flask import Blueprint, request -from .._config import AUTH, GRANULAR_SENSOR_AUTH_TOKENS, OPEN_SENSORS from .._exceptions import EpiDataException from .._params import ( extract_strings, extract_integers, ) +from .._security import current_user from .._query import filter_strings, execute_query, filter_integers -from .._validate import ( - require_all, - resolve_auth_token, -) +from .._validate import require_all from typing import List +# sensor names that require auth tokens to access them; +# excludes the global auth key "sensors" that works for all sensors: +GRANULAR_SENSORS = { + "twtr", + "gft", + "ght", + "ghtj", + "cdc", + "quid", + "wiki", +} + +# A set of sensors that do not require an auth key to access: +OPEN_SENSORS = { + "sar3", + "epic", + "arch", +} + # first argument is the endpoint name bp = Blueprint("sensors", __name__) alias = "signals" -# Limits on the number of effective auth token equality checks performed per sensor query; generate auth tokens with appropriate levels of entropy according to the limits below: -MAX_GLOBAL_AUTH_CHECKS_PER_SENSOR_QUERY = 1 # (but imagine is larger to futureproof) -MAX_GRANULAR_AUTH_CHECKS_PER_SENSOR_QUERY = 30 # (but imagine is larger to futureproof) -# A (currently redundant) limit on the number of auth tokens that can be provided: -MAX_AUTH_KEYS_PROVIDED_PER_SENSOR_QUERY = 1 -# end sensor query authentication configuration - -PHP_INT_MAX = 2147483647 - - -def _authenticate(req: Request, names: List[str]): - auth_tokens_presented = (resolve_auth_token(req) or "").split(",") - - names = extract_strings("names") - n_names = len(names) - n_auth_tokens_presented = len(auth_tokens_presented) - max_valid_granular_tokens_per_name = max( - len(v) for v in GRANULAR_SENSOR_AUTH_TOKENS.values() - ) - - # The number of valid granular tokens is related to the number of auth token checks that a single query could perform. Use the max number of valid granular auth tokens per name in the check below as a way to prevent leakage of sensor names (but revealing the number of sensor names) via this interface. Treat all sensors as non-open for convenience of calculation. - if n_names == 0: - # Check whether no names were provided to prevent edge-case issues in error message below, and in case surrounding behavior changes in the future: +def _authenticate(names: List[str]): + if len(names) == 0: raise EpiDataException("no sensor names provided") - if n_auth_tokens_presented > 1: - raise EpiDataException( - "currently, only a single auth token is allowed to be presented at a time; please issue a separate query for each sensor name using only the corresponding token" - ) - - # Check whether max number of presented-vs.-acceptable token comparisons that would be performed is over the set limits, avoiding calculation of numbers > PHP_INT_MAX/100: - # Global auth token comparison limit check: - # Granular auth token comparison limit check: - if ( - n_auth_tokens_presented > MAX_GLOBAL_AUTH_CHECKS_PER_SENSOR_QUERY - or n_names - > int((PHP_INT_MAX / 100 - 1) / max(1, max_valid_granular_tokens_per_name)) - or n_auth_tokens_presented - > int(PHP_INT_MAX / 100 / max(1, n_names * max_valid_granular_tokens_per_name)) - or n_auth_tokens_presented * n_names * max_valid_granular_tokens_per_name - > MAX_GRANULAR_AUTH_CHECKS_PER_SENSOR_QUERY - ): - raise EpiDataException( - "too many sensors requested and/or auth tokens presented; please divide sensors into batches and/or use only the tokens needed for the sensors requested" - ) - if len(auth_tokens_presented) > MAX_AUTH_KEYS_PROVIDED_PER_SENSOR_QUERY: - # this check should be redundant with >1 check as well as global check above - raise EpiDataException("too many auth tokens presented") + # whether has access to all sensors: + sensor_authenticated_globally = (current_user and current_user.has_role("sensors")) unauthenticated_or_nonexistent_sensors = [] for name in names: - sensor_is_open = name in OPEN_SENSORS - # test whether they provided the "global" auth token that works for all sensors: - sensor_authenticated_globally = AUTH["sensors"] in auth_tokens_presented - # test whether they provided a "granular" auth token for one of the - # sensor_subsets containing this sensor (if any): - sensor_authenticated_granularly = False - if name in GRANULAR_SENSOR_AUTH_TOKENS: - acceptable_granular_tokens_for_sensor = GRANULAR_SENSOR_AUTH_TOKENS[name] - # check for nonempty intersection between provided and acceptable - # granular auth tokens: - for acceptable_granular_token in acceptable_granular_tokens_for_sensor: - if acceptable_granular_token in auth_tokens_presented: - sensor_authenticated_granularly = True - break - # (else: there are no granular tokens for this sensor; can't authenticate granularly) - - if ( - not sensor_is_open - and not sensor_authenticated_globally - and not sensor_authenticated_granularly - ): - # authentication failed for this sensor; append to list: + if name in OPEN_SENSORS: + # no auth needed + continue + if name in GRANULAR_SENSORS and current_user and current_user.has_role(name): + # user has permissions for this sensor + continue + if not sensor_authenticated_globally: + # non-existant sensor or auth failed; append to list: unauthenticated_or_nonexistent_sensors.append(name) if unauthenticated_or_nonexistent_sensors: raise EpiDataException( f"unauthenticated/nonexistent sensor(s): {','.join(unauthenticated_or_nonexistent_sensors)}" ) - # # Alternative message that may enable shorter tokens: - # $data['message'] = 'some/all sensors requested were unauthenticated/nonexistent'; @bp.route("/", methods=("GET", "POST")) @@ -106,7 +64,7 @@ def handle(): require_all(request, "names", "locations", "epiweeks") names = extract_strings("names") or [] - _authenticate(request, names) + _authenticate(names) # parse the request locations = extract_strings("locations") diff --git a/src/server/endpoints/twitter.py b/src/server/endpoints/twitter.py index 84cbb2850..0a96adfd0 100644 --- a/src/server/endpoints/twitter.py +++ b/src/server/endpoints/twitter.py @@ -1,16 +1,16 @@ from flask import Blueprint, request -from .._config import AUTH, NATION_REGION, REGION_TO_STATE +from .._config import NATION_REGION, REGION_TO_STATE from .._params import ( extract_integers, extract_strings, ) from .._query import execute_queries, filter_dates, filter_integers, filter_strings from .._validate import ( - check_auth_token, require_all, require_any, ) +from .._security import require_role # first argument is the endpoint name bp = Blueprint("twitter", __name__) @@ -18,8 +18,8 @@ @bp.route("/", methods=("GET", "POST")) +@require_role("twitter") def handle(): - check_auth_token(request, AUTH["twitter"]) require_all(request, "locations") require_any(request, "dates", "epiweeks") diff --git a/src/server/main.py b/src/server/main.py index 7471a2491..c05b9d0d3 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -4,25 +4,42 @@ from flask import request, send_file, Response, send_from_directory, jsonify +from delphi.epidata.common.logger import get_structured_logger + from ._config import URL_PREFIX, VERSION from ._common import app, set_compatibility_mode +from ._db import metadata, engine from ._exceptions import MissingOrWrongSourceException from .endpoints import endpoints +from .endpoints.admin import bp as admin_bp, enable_admin +from ._security import register_user_role +from ._limiter import limiter, apply_limit __all__ = ["app"] +logger = get_structured_logger("webapp_main") + endpoint_map: Dict[str, Callable[[], Response]] = {} for endpoint in endpoints: - endpoint_map[endpoint.bp.name] = endpoint.handle + logger.info("registering endpoint", bp_name=endpoint.bp.name) + apply_limit(endpoint.bp) app.register_blueprint(endpoint.bp, url_prefix=f"{URL_PREFIX}/{endpoint.bp.name}") + endpoint_map[endpoint.bp.name] = endpoint.handle alias = getattr(endpoint, "alias", None) if alias: + logger.info("endpoint has alias", bp_name=endpoint.bp.name, alias=alias) endpoint_map[alias] = endpoint.handle +if enable_admin(): + logger.info("admin endpoint enabled") + limiter.exempt(admin_bp) + app.register_blueprint(admin_bp, url_prefix=f"{URL_PREFIX}/admin") + @app.route(f"{URL_PREFIX}/api.php", methods=["GET", "POST"]) +@apply_limit def handle_generic(): # mark as compatibility mode set_compatibility_mode() @@ -32,7 +49,6 @@ def handle_generic(): return endpoint_map[endpoint]() -@app.route(f"{URL_PREFIX}") @app.route(f"{URL_PREFIX}/") @app.route(f"{URL_PREFIX}/index.html") def send_index_file(): @@ -49,6 +65,8 @@ def send_lib_file(path: str): return send_from_directory(pathlib.Path(__file__).parent / "lib", path) +metadata.create_all(engine) + if __name__ == "__main__": app.run(host="0.0.0.0", port=5000) else: @@ -57,4 +75,6 @@ def send_lib_file(path: str): app.logger.handlers = gunicorn_logger.handlers app.logger.setLevel(gunicorn_logger.level) sqlalchemy_logger = logging.getLogger("sqlalchemy") + sqlalchemy_logger.handlers = gunicorn_logger.handlers sqlalchemy_logger.setLevel(logging.WARN) + # WAS: sqlalchemy_logger.setLevel(gunicorn_logger.level) diff --git a/tests/server/test_security.py b/tests/server/test_security.py new file mode 100644 index 000000000..e209d9342 --- /dev/null +++ b/tests/server/test_security.py @@ -0,0 +1,47 @@ +"""Unit tests for granular sensor authentication in api.php.""" + +# standard library +import unittest +import base64 + +# from flask.testing import FlaskClient +from delphi.epidata.server._common import app +from delphi.epidata.server._security import ( + resolve_auth_token, +) + +# py3tester coverage target +__test_target__ = "delphi.epidata.server._security" + + +class UnitTests(unittest.TestCase): + """Basic unit tests.""" + + # app: FlaskClient + + def setUp(self): + app.config["TESTING"] = True + app.config["WTF_CSRF_ENABLED"] = False + app.config["DEBUG"] = False + + def test_resolve_auth_token(self): + with self.subTest("no auth"): + with app.test_request_context("/"): + self.assertIsNone(resolve_auth_token()) + + with self.subTest("param"): + with app.test_request_context("/?auth=abc"): + self.assertEqual(resolve_auth_token(), "abc") + + with self.subTest("param2"): + with app.test_request_context("/?api_key=abc"): + self.assertEqual(resolve_auth_token(), "abc") + + with self.subTest("bearer token"): + with app.test_request_context("/", headers={"Authorization": "Bearer abc"}): + self.assertEqual(resolve_auth_token(), "abc") + + with self.subTest("basic token"): + userpass = base64.b64encode(b"epidata:abc").decode("utf-8") + with app.test_request_context("/", headers={"Authorization": f"Basic {userpass}"}): + self.assertEqual(resolve_auth_token(), "abc") diff --git a/tests/server/test_validate.py b/tests/server/test_validate.py index 22a4f153c..27ce28672 100644 --- a/tests/server/test_validate.py +++ b/tests/server/test_validate.py @@ -2,21 +2,17 @@ # standard library import unittest -import base64 from flask import request # from flask.testing import FlaskClient from delphi.epidata.server._common import app from delphi.epidata.server._validate import ( - resolve_auth_token, - check_auth_token, require_all, require_any, ) from delphi.epidata.server._exceptions import ( ValidationFailedException, - UnAuthenticatedException, ) # py3tester coverage target @@ -33,47 +29,6 @@ def setUp(self): app.config["WTF_CSRF_ENABLED"] = False app.config["DEBUG"] = False - def test_resolve_auth_token(self): - with self.subTest("no auth"): - with app.test_request_context("/"): - self.assertIsNone(resolve_auth_token(request)) - - with self.subTest("param"): - with app.test_request_context("/?auth=abc"): - self.assertEqual(resolve_auth_token(request), "abc") - - with self.subTest("bearer token"): - with app.test_request_context("/", headers={"Authorization": "Bearer abc"}): - self.assertEqual(resolve_auth_token(request), "abc") - - with self.subTest("basic token"): - userpass = base64.b64encode(b"epidata:abc").decode("utf-8") - with app.test_request_context( - "/", headers={"Authorization": f"Basic {userpass}"} - ): - self.assertEqual(resolve_auth_token(request), "abc") - - def test_check_auth_token(self): - with self.subTest("no auth but optional"): - with app.test_request_context("/"): - self.assertFalse(check_auth_token(request, "abc", True)) - with self.subTest("no auth but required"): - with app.test_request_context("/"): - self.assertRaises( - ValidationFailedException, lambda: check_auth_token(request, "abc") - ) - with self.subTest("auth and required"): - with app.test_request_context("/?auth=abc"): - self.assertTrue(check_auth_token(request, "abc")) - with self.subTest("auth and required but wrong"): - with app.test_request_context("/?auth=abc"): - self.assertRaises( - UnAuthenticatedException, lambda: check_auth_token(request, "def") - ) - with self.subTest("auth and required but wrong but optional"): - with app.test_request_context("/?auth=abc"): - self.assertFalse(check_auth_token(request, "def", True)) - def test_require_all(self): with self.subTest("all given"): with app.test_request_context("/"):