diff --git a/.dockerignore b/.dockerignore index 058bb7939..820d11c43 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,7 @@ /delphi-epidata -/.mypy_cache +**/.mypy_cache /.github /docs -__pycache__ -/node_modules \ No newline at end of file +**/__pycache__ +**/.pytest_cache +**/node_modules \ No newline at end of file diff --git a/.env.example b/.env.example index 212706e08..51cac1ae4 100644 --- a/.env.example +++ b/.env.example @@ -1,18 +1,6 @@ FLASK_DEBUG=True SQLALCHEMY_DATABASE_URI=sqlite:///test.db FLASK_SECRET=abc -SECRET_TWITTER=abc -SECRET_GHT=abc -SECRET_FLUVIEW=abc -SECRET_CDC=abc -SECRET_SENSORS=abc -SECRET_SENSOR_TWTR=abc -SECRET_SENSOR_GFT=abc -SECRET_SENSOR_GHT=abc -SECRET_SENSOR_GHTJ=abc -SECRET_SENSOR_CDC=abc -SECRET_SENSOR_QUID=abc -SECRET_SENSOR_WIKI=abc -SECRET_QUIDEL=abc -SECRET_NOROSTAT=abc -SECRET_AFHSB=abc \ No newline at end of file +#API_KEY_REQUIRED_STARTING_AT=2021-07-30 +API_KEY_ADMIN_PASSWORD=abc +API_KEY_REGISTER_WEBHOOK_TOKEN=abc diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 438a805fd..1f7664b46 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -53,24 +53,31 @@ jobs: run: | docker build -t delphi_database_epidata -f ./repos/delphi/delphi-epidata/dev/docker/database/epidata/Dockerfile . docker build -t delphi_web_python -f repos/delphi/delphi-epidata/dev/docker/python/Dockerfile . + sudo docker build -t delphi_redis -f repos/delphi/delphi-epidata/dev/docker/redis/Dockerfile . cd ./repos/delphi/delphi-epidata docker build -t delphi_web_epidata -f ./devops/Dockerfile . cd ../../../ # MODULE_NAME specifies the location of the `app` variable, the actual WSGI application object to run. # see https://github.com/tiangolo/meinheld-gunicorn-docker#module_name - - name: Start services + - name: Start database and Redis services run: | docker network create --driver bridge delphi-net docker run --rm -d -p 13306:3306 --network delphi-net --name delphi_database_epidata --cap-add=sys_nice delphi_database_epidata - docker run --rm -d -p 10080:80 --env "MODULE_NAME=delphi.epidata.server.main" --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --network delphi-net --name delphi_web_epidata delphi_web_epidata - docker ps + docker run --rm -d -p 6379:6379 --network delphi-net --env "REDIS_PASSWORD=1234" --name delphi_redis delphi_redis + - run: | wget https://raw.githubusercontent.com/eficode/wait-for/master/wait-for chmod +x wait-for ./wait-for localhost:13306 -- echo 'ready' sleep 10s + + - name: Start delphi_web_epidata + run: | + docker run --rm -d -p 10080:80 --env "MODULE_NAME=delphi.epidata.server.main" --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "REDIS_HOST=delphi_redis" --env "REDIS_PASSWORD=1234" --env "API_KEY_REGISTER_WEBHOOK_TOKEN=abc" --env "API_KEY_ADMIN_PASSWORD=test_admin_password" --network delphi-net --name delphi_web_epidata delphi_web_epidata + docker ps + - name: Run Unit Tests run: | docker run --rm --network delphi-net --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" delphi_web_python python -m pytest --import-mode importlib repos/delphi/delphi-epidata/tests @@ -81,7 +88,7 @@ jobs: - name: Clean Up run: | - docker stop delphi_database_epidata delphi_web_epidata + docker stop delphi_database_epidata delphi_web_epidata delphi_redis docker network remove delphi-net build_js_client: @@ -108,7 +115,8 @@ jobs: image: needs: build # only on main and dev branch - if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' + # TODO: #1112 Remove `|| github.ref == 'refs/heads/api-keys'` after transition to production status. + if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/api-keys' runs-on: ubuntu-latest steps: diff --git a/dev/docker/python/Dockerfile b/dev/docker/python/Dockerfile index ffce16b0f..1836e4a5b 100644 --- a/dev/docker/python/Dockerfile +++ b/dev/docker/python/Dockerfile @@ -1,5 +1,7 @@ FROM python:3.8-buster +RUN apt-get update && apt-get install -y r-base && Rscript -e "install.packages(c('httr','xml2'))" + WORKDIR /usr/src/app COPY repos repos diff --git a/dev/docker/redis/Dockerfile b/dev/docker/redis/Dockerfile new file mode 100644 index 000000000..3470ed39f --- /dev/null +++ b/dev/docker/redis/Dockerfile @@ -0,0 +1,5 @@ +FROM redis + +ENV REDIS_PASSWORD=$REDIS_PASSWORD + +CMD ["sh", "-c", "exec redis-server --requirepass \"$REDIS_PASSWORD\""] \ No newline at end of file diff --git a/dev/local/Makefile b/dev/local/Makefile index 55210d790..e7e896aa6 100644 --- a/dev/local/Makefile +++ b/dev/local/Makefile @@ -64,8 +64,10 @@ CWD:=$(dir $(abspath $(lastword $(MAKEFILE_LIST)))) NOW:=$(shell date "+%Y-%m-%d") LOG_WEB:=delphi_web_epidata_$(NOW).log LOG_DB:=delphi_database_epidata_$(NOW).log +LOG_REDIS:=delphi_redis_instance_$(NOW).log WEB_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_web_epidata') DATABASE_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_database_epidata') +REDIS_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_redis') M1= ifeq ($(shell uname -smp), Darwin arm64 arm) @@ -98,6 +100,10 @@ web: --env "MODULE_NAME=delphi.epidata.server.main" \ --env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \ --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "LOG_DEBUG" \ + --env "REDIS_HOST=delphi_redis" \ + --env "REDIS_PASSWORD=1234" \ + --env "API_KEY_ADMIN_PASSWORD=test_admin_password" \ + --env "API_KEY_REGISTER_WEBHOOK_TOKEN=abc" \ --network delphi-net --name delphi_web_epidata \ delphi_web_epidata >$(LOG_WEB) 2>&1 & @@ -136,8 +142,25 @@ py: $(M1) \ -f repos/delphi/delphi-epidata/dev/docker/python/Dockerfile . +.PHONY=redis +redis: + @# Stop container if running + @if [ $(REDIS_CONTAINER_ID) ]; then\ + docker stop $(REDIS_CONTAINER_ID);\ + fi + + @docker build -t delphi_redis \ + $(M1) \ + -f repos/delphi/delphi-epidata/dev/docker/redis/Dockerfile . + + @docker run --rm -d -p 127.0.0.1:6379:6379 \ + $(M1) \ + --network delphi-net \ + --env "REDIS_PASSWORD=1234" \ + --name delphi_redis delphi_redis >$(LOG_REDIS) 2>&1 & + .PHONY=all -all: db web py +all: db web py redis .PHONY=test test: @@ -149,6 +172,17 @@ test: --env "FLASK_SECRET=abc" \ delphi_web_python python -m pytest --import-mode importlib $(pdb) $(test) | tee test_output_$(NOW).log +.PHONY=r-test +r-test: + @docker run -i --rm --network delphi-net \ + $(M1) \ + --mount type=bind,source=$(CWD)repos/delphi/delphi-epidata,target=/usr/src/app/repos/delphi/delphi-epidata,readonly \ + --mount type=bind,source=$(CWD)repos/delphi/delphi-epidata/src,target=/usr/src/app/delphi/epidata,readonly \ + --env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \ + --env "FLASK_SECRET=abc" \ + delphi_web_python Rscript repos/delphi/delphi-epidata/integrations/client/test_delphi_epidata.R | tee r-test_output_$(NOW).log + + .PHONY=bash bash: @docker run -it --rm --network delphi-net \ diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg index 374009e0f..8bcff9681 100644 --- a/dev/local/setup.cfg +++ b/dev/local/setup.cfg @@ -28,6 +28,8 @@ packages = delphi.epidata.acquisition.wiki delphi.epidata.client delphi.epidata.server + delphi.epidata.server.admin + delphi.epidata.server.admin.templates delphi.epidata.server.covidcast_issues_migration delphi.epidata.server.endpoints delphi.epidata.server.endpoints.covidcast_utils diff --git a/docs/api/api_keys.md b/docs/api/api_keys.md index 4b2080324..80f74101c 100644 --- a/docs/api/api_keys.md +++ b/docs/api/api_keys.md @@ -16,11 +16,11 @@ change as we learn more about their impact: For example, a query for three signals on one date across all counties can be submitted anonymously, but a query for three signals on a period of four weeks -across all counties requires an API key. +across all counties requires an API key. An API key is a pseudonymous access token that grants privileged access to the -Epidata API. You can request an API key by -[registering with us](https://forms.gle/hkBr5SfQgxguAfEt7). +Epidata API. You can request an API key by +[registering with us](https://api.delphi.cmu.edu/epidata/admin/registration_form). Privileges of registration may include: 1. no rate limit @@ -36,7 +36,8 @@ store the information you provide us at registration time, see our ## Usage -If you choose to [register for an API key](https://forms.gle/hkBr5SfQgxguAfEt7), +If you choose to +[register for an API key](https://api.delphi.cmu.edu/epidata/admin/registration_form), there are several ways to use your key to authenticate your requests: ### Via request parameter diff --git a/docs/api/privacy_statement.md b/docs/api/privacy_statement.md index 002367931..eccaec14d 100644 --- a/docs/api/privacy_statement.md +++ b/docs/api/privacy_statement.md @@ -29,7 +29,7 @@ purposes: * to identify excessive or abnormal usage patterns which may harm our system The logs are only available to members of our operations team, and are expunged -at or before they reach five years in age. +at or before they reach five years in age. If you provide us with your email address, we will only use it to contact you in the following scenarios: @@ -47,9 +47,9 @@ security practices to help us keep your information secure. We only retrieve this mapping to resolve cases of excessive or abnormal usage. We automatically disassociate an email address from its API key when the API key has not been used for six months, or upon user request. You can request that your -email address be removed from our records by filling out a -[deletion request](https://forms.gle/GucFmZHTMgEFjH197). +email address be removed from our records by filling out a +[deletion request](https://api.delphi.cmu.edu/epidata/admin/removal_request). -For more information, see +For more information, see [Carnegie Mellon’s privacy notice](https://www.cmu.edu/legal/privacy-notice.html). Further questions can be directed to delphi-support+privacy@andrew.cmu.edu. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 1ddce45dc..931434984 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,14 +14,14 @@ group](https://delphi.cmu.edu/). The Epidata API includes: quick access to COVID data are available. - [Data about other diseases](api/README.md), including influenza, dengue, and other diseases tracked by Delphi through various data streams. - + Anyone may access the Epidata API anonymously without providing any personal data. Anonymous API access is currently rate-limited and restricted to public datasets with a maximum of two of the requested parameters having multiple selections (signals, dates, versions, regions, etc). To request access with no rate limit and unlimited multiple -selections, you can [request a registered API key](https://forms.gle/hkBr5SfQgxguAfEt7). +selections, you can [request a registered API key](https://api.delphi.cmu.edu/epidata/admin/registration_form). For policy and usage details, consult the [Epidata API keys documentation](api/api_keys.md). If you regularly or frequently use our system, please consider using an API key @@ -30,10 +30,11 @@ us understand who and how others are using our Delphi Epidata API, which may in turn inform our future research, data partnerships, and funding. For more information about how we use the data you provide us through your -registration and API request activity, see our -[Privacy Statement](api/privacy_statement.md). At any time, you may submit a -[Deletion Request](https://forms.gle/GucFmZHTMgEFjH197) to have us deactivate your key and destroy all -information associating that key with your identity. +registration and API request activity, see our +[Privacy Statement](api/privacy_statement.md). At any time, you may submit a +[Deletion Request](https://api.delphi.cmu.edu/epidata/admin/removal_request) to +have us deactivate your key and destroy all information associating that key +with your identity. The Delphi group is extremely grateful to Pedrito Maynard-Zhang for all his help with the Epidata API [documentation](api/README.md). diff --git a/integrations/acquisition/covid_hosp/facility/test_scenarios.py b/integrations/acquisition/covid_hosp/facility/test_scenarios.py index aaa3c5e3b..c6c51e2f5 100644 --- a/integrations/acquisition/covid_hosp/facility/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/facility/test_scenarios.py @@ -29,6 +29,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -40,6 +41,8 @@ def setUp(self): cur.execute('truncate table covid_hosp_facility') cur.execute('truncate table covid_hosp_facility_key') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "emai")') @freeze_time("2021-03-16") def test_acquire_dataset(self): diff --git a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py index e55bc8ca6..2054d19c8 100644 --- a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py @@ -33,6 +33,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -43,6 +44,8 @@ def setUp(self): with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values("key", "email")') @freeze_time("2021-03-16") def test_acquire_dataset(self): diff --git a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py index 5d13ccbb0..8565b8e7f 100644 --- a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py @@ -29,6 +29,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -39,6 +40,8 @@ def setUp(self): with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') @freeze_time("2021-03-17") def test_acquire_dataset(self): diff --git a/integrations/acquisition/covidcast/test_covidcast_meta_caching.py b/integrations/acquisition/covidcast/test_covidcast_meta_caching.py index 99008a0f1..e746c4ef1 100644 --- a/integrations/acquisition/covidcast/test_covidcast_meta_caching.py +++ b/integrations/acquisition/covidcast/test_covidcast_meta_caching.py @@ -60,12 +60,20 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = BASE_URL + Epidata.auth = ('epidata', 'key') def tearDown(self): """Perform per-test teardown.""" self.cur.close() self.cnx.close() + @staticmethod + def _make_request(): + params = {'endpoint': 'covidcast_meta', 'cached': 'true'} + response = requests.get(Epidata.BASE_URL, params=params, auth=Epidata.auth) + response.raise_for_status() + return response.json() + def test_caching(self): """Populate, query, cache, query, and verify the cache.""" @@ -147,10 +155,7 @@ def test_caching(self): self.cnx.commit() # fetch the cached version (manually) - params = {'endpoint': 'covidcast_meta', 'cached': 'true'} - response = requests.get(BASE_URL, params=params) - response.raise_for_status() - epidata4 = response.json() + epidata4 = self._make_request() # make sure the cache was actually served self.assertEqual(epidata4, { @@ -170,10 +175,7 @@ def test_caching(self): self.cnx.commit() # fetch the cached version (manually) - params = {'endpoint': 'covidcast_meta', 'cached': 'true'} - response = requests.get(BASE_URL, params=params) - response.raise_for_status() - epidata5 = response.json() + epidata5 = self._make_request() # make sure the cache was returned anyhow self.assertEqual(epidata4, epidata5) diff --git a/integrations/acquisition/covidcast/test_csv_uploading.py b/integrations/acquisition/covidcast/test_csv_uploading.py index 040eb5f1a..e4c9d881e 100644 --- a/integrations/acquisition/covidcast/test_csv_uploading.py +++ b/integrations/acquisition/covidcast/test_csv_uploading.py @@ -57,6 +57,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def tearDown(self): """Perform per-test teardown.""" diff --git a/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py b/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py index 9dc163a2b..1299c6144 100644 --- a/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py +++ b/integrations/acquisition/covidcast_nowcast/test_csv_uploading.py @@ -41,6 +41,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table covidcast_nowcast') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() @@ -54,6 +56,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def tearDown(self): """Perform per-test teardown.""" diff --git a/integrations/client/test_delphi_epidata.R b/integrations/client/test_delphi_epidata.R new file mode 100644 index 000000000..3f05386f5 --- /dev/null +++ b/integrations/client/test_delphi_epidata.R @@ -0,0 +1,106 @@ +#!/usr/bin/env Rscript + +# disgusting hack to set up cwd +initial_options <- commandArgs(trailingOnly = FALSE) +file_arg_name <- "--file=" +basename <- dirname(sub(file_arg_name, "", initial_options[grep(file_arg_name, initial_options)])) +setwd(basename) + +options(epidata.url="http://delphi_web_epidata/epidata/") +source("../../src/client/delphi_epidata.R") + +out <- function(res) { + cat(paste(res$result, res$message, length(res$epidata), "\n")) +} + +check <- function(res, exp_result, exp_message, exp_length) { + stopifnot(res$result == exp_result) + stopifnot(res$message == exp_message) + stopifnot(length(res$epidata) == exp_length) +} + +call_region_epiweeks <- function(fn_name) { + fn <- Epidata[[fn_name]] + cat(paste(fn_name,"\n")) + fn(list('nat'), list(201440)) +} + +cat("server_version\n") +res <- Epidata$server_version() +stopifnot("version" %in% names(res)) +cat("fluview\n") +res <- Epidata$fluview(list('nat'), list(201440, Epidata$range(201501, 201510)), auth="test") +check(res, -2, "no results", 0) +cat("fluview_meta\n") +res <- Epidata$fluview_meta() +check(res, 1, "success", 1) +res <- call_region_epiweeks("fluview_clinical") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("flusurv") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("ecdc_ili") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("kcdc_ili") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("gft") +check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$ght("abc", list("nat"), list(201440), "cough") +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$twitter("abc", list("nat"), epiweeks=list(201440)) +#check(res, -2, "no results", 0) +cat("wiki\n") +res <- Epidata$wiki(list("abc"), list(20141201)) +check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$cdc("abc", list(201440), list("nat")) +#check(res, -2, "no results", 0 +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$quidel("abc", list(201440), list("nat")) +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$norostat("abc", "nat", list(201440)) +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$meta_norostat("abc") +#out(res) +res <- call_region_epiweeks("nidss.flu") +check(res, -2, "no results", 0) +res <- call_region_epiweeks("nidss.dengue") +check(res, -2, "no results", 0) +cat("delphi\n") +res <- Epidata$delphi("xyz", 201440) +check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$sensors("abc", list("def"), list("nat"), list(201440)) +#check(res, -2, "no results", 0) +## pre-pandemic auth endpoint; no test credentials plumbing for that yet +#res <- Epidata$dengue_sensors("abc", list("def"), list("nat"), list(201440)) +#check(res, -2, "no results", 0) +res <- call_region_epiweeks("nowcast") +check(res, -2, "no results", 0) +## Fails with database error: +#res <- call_region_epiweeks("dengue_nowcast") +#out(res) +#check(res, -2, "no results", 0) +cat("meta\n") +res <- Epidata$meta() +check(res, 1, "success", 1) +cat("covidcast\n") +res <- Epidata$covidcast("abc", "def", "day", "nation", list(20200401), "us") +check(res, -2, "no results", 0) +cat("covidcast_meta\n") +res <- Epidata$covidcast_meta() +check(res, 1, "success", 16) +cat("covid_hosp\n") +res <- Epidata$covid_hosp("pa", 20201201) +check(res, -2, "no results", 0) +cat("covid_hosp_facility\n") +res <- Epidata$covid_hosp_facility(list("abc"), list(202050)) +check(res, -2, "no results", 0) +cat("covid_hosp_facility_lookup\n") +res <- Epidata$covid_hosp_facility_lookup("pa") +check(res, 1, "success", 1) + + diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index f5ce462f2..9698d1274 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -40,6 +40,7 @@ def localSetUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' diff --git a/integrations/client/test_nowcast.py b/integrations/client/test_nowcast.py index dc1a20794..f5124e021 100644 --- a/integrations/client/test_nowcast.py +++ b/integrations/client/test_nowcast.py @@ -28,6 +28,8 @@ def setUp(self): cur = cnx.cursor() cur.execute('truncate table covidcast_nowcast') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() @@ -38,6 +40,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' diff --git a/integrations/server/test_covid_hosp.py b/integrations/server/test_covid_hosp.py index 16538b82d..37aa77363 100644 --- a/integrations/server/test_covid_hosp.py +++ b/integrations/server/test_covid_hosp.py @@ -17,6 +17,7 @@ def setUp(self): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') # use the local instance of the epidata database secrets.db.host = 'delphi_database_epidata' @@ -27,6 +28,8 @@ def setUp(self): with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') def insert_issue(self, cur, issue, value, record_type): diff --git a/integrations/server/test_covidcast.py b/integrations/server/test_covidcast.py index 01d81bf29..73787d664 100644 --- a/integrations/server/test_covidcast.py +++ b/integrations/server/test_covidcast.py @@ -6,15 +6,12 @@ # third party import mysql.connector -import requests # first party from delphi_utils import Nans from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA from delphi.epidata.client.delphi_epidata import Epidata -# use the local instance of the Epidata API -BASE_URL = 'http://delphi_web_epidata/epidata/api.php' class CovidcastTests(CovidcastBase): """Tests the `covidcast` endpoint.""" @@ -25,7 +22,9 @@ def localSetUp(self): def request_based_on_row(self, row: CovidcastTestRow, **kwargs): params = self.params_from_row(row, endpoint='covidcast', **kwargs) - Epidata.BASE_URL = BASE_URL + # use the local instance of the Epidata API + Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') response = Epidata.covidcast(**params) return response diff --git a/integrations/server/test_covidcast_endpoints.py b/integrations/server/test_covidcast_endpoints.py index 1f7e7ade5..41b74ac03 100644 --- a/integrations/server/test_covidcast_endpoints.py +++ b/integrations/server/test_covidcast_endpoints.py @@ -15,6 +15,7 @@ # use the local instance of the Epidata API BASE_URL = "http://delphi_web_epidata/epidata/covidcast" BASE_URL_OLD = "http://delphi_web_epidata/epidata/api.php" +AUTH = ('epidata', 'key') class CovidcastEndpointTests(CovidcastBase): @@ -36,7 +37,7 @@ def _fetch(self, endpoint="/", is_compatibility=False, **params): params.setdefault("data_source", params.get("source")) else: url = f"{BASE_URL}{endpoint}" - response = requests.get(url, params=params) + response = requests.get(url, params=params, auth=AUTH) response.raise_for_status() return response.json() diff --git a/integrations/server/test_covidcast_meta.py b/integrations/server/test_covidcast_meta.py index d0aef6fe5..95a51e354 100644 --- a/integrations/server/test_covidcast_meta.py +++ b/integrations/server/test_covidcast_meta.py @@ -14,6 +14,7 @@ # use the local instance of the Epidata API BASE_URL = 'http://delphi_web_epidata/epidata/api.php' +AUTH = ('epidata', 'key') class CovidcastMetaTests(unittest.TestCase): @@ -135,6 +136,14 @@ def insert_placeholder_data(self): def _get_id(self): self.id_counter += 1 return self.id_counter + + @staticmethod + def _fetch(**kwargs): + params = kwargs.copy() + params['endpoint'] = 'covidcast_meta' + response = requests.get(BASE_URL, params=params, auth=AUTH) + response.raise_for_status() + return response.json() def test_round_trip(self): """Make a simple round-trip with some sample data.""" @@ -143,9 +152,7 @@ def test_round_trip(self): expected = self.insert_placeholder_data() # make the request - response = requests.get(BASE_URL, params={'endpoint': 'covidcast_meta'}) - response.raise_for_status() - response = response.json() + response = self._fetch() # assert that the right data came back self.assertEqual(response, { @@ -160,71 +167,63 @@ def test_filter(self): # insert placeholder data and accumulate expected results (in sort order) expected = self.insert_placeholder_data() - def fetch(**kwargs): - # make the request - params = kwargs.copy() - params['endpoint'] = 'covidcast_meta' - response = requests.get(BASE_URL, params=params) - response.raise_for_status() - return response.json() - - res = fetch() + res = self._fetch() self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), len(expected)) # time types - res = fetch(time_types='day') + res = self._fetch(time_types='day') self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['time_type'] == 'day'])) - res = fetch(time_types='day,week') + res = self._fetch(time_types='day,week') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), len(expected)) - res = fetch(time_types='sec') + res = self._fetch(time_types='sec') self.assertEqual(res['result'], -2) # geo types - res = fetch(geo_types='hrr') + res = self._fetch(geo_types='hrr') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['geo_type'] == 'hrr'])) - res = fetch(geo_types='hrr,msa') + res = self._fetch(geo_types='hrr,msa') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), len(expected)) - res = fetch(geo_types='state') + res = self._fetch(geo_types='state') self.assertEqual(res['result'], -2) # signals - res = fetch(signals='src1:sig1') + res = self._fetch(signals='src1:sig1') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1' and s['signal'] == 'sig1'])) - res = fetch(signals='src1') + res = self._fetch(signals='src1') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1'])) - res = fetch(signals='src1:*') + res = self._fetch(signals='src1:*') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1'])) - res = fetch(signals='src1:src4') + res = self._fetch(signals='src1:src4') self.assertEqual(res['result'], -2) - res = fetch(signals='src1:*,src2:*') + res = self._fetch(signals='src1:*,src2:*') self.assertEqual(res['result'], 1) self.assertTrue(isinstance(res['epidata'], list)) self.assertEqual(len(res['epidata']), len(expected)) # filter fields - res = fetch(fields='data_source,min_time') + res = self._fetch(fields='data_source,min_time') self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), len(expected)) self.assertTrue('data_source' in res['epidata'][0]) @@ -232,7 +231,7 @@ def fetch(**kwargs): self.assertFalse('max_time' in res['epidata'][0]) self.assertFalse('signal' in res['epidata'][0]) - res = fetch(fields='xx') + res = self._fetch(fields='xx') self.assertEqual(res['result'], 1) self.assertEqual(len(res['epidata']), len(expected)) self.assertEqual(res['epidata'][0], {}) diff --git a/integrations/server/test_covidcast_nowcast.py b/integrations/server/test_covidcast_nowcast.py index 7df695038..889d962dd 100644 --- a/integrations/server/test_covidcast_nowcast.py +++ b/integrations/server/test_covidcast_nowcast.py @@ -10,6 +10,7 @@ # use the local instance of the Epidata API BASE_URL = 'http://delphi_web_epidata/epidata/api.php' +AUTH = ('epidata', 'key') class CovidcastTests(unittest.TestCase): @@ -26,6 +27,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table covidcast_nowcast') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values("key", "email")') cnx.commit() cur.close() @@ -38,6 +41,12 @@ def tearDown(self): self.cur.close() self.cnx.close() + @staticmethod + def _make_request(params: dict): + response = requests.get(BASE_URL, params=params, auth=AUTH) + response.raise_for_status() + return response.json() + def test_query(self): """Query nowcasts using default and specified issue.""" @@ -49,7 +58,7 @@ def test_query(self): self.cnx.commit() # make the request with specified issue date - response = requests.get(BASE_URL, params={ + params={ 'source': 'covidcast_nowcast', 'data_source': 'src', 'signals': 'sig', @@ -59,9 +68,8 @@ def test_query(self): 'time_values': 20200101, 'geo_value': '01001', 'issues': 20200101 - }) - response.raise_for_status() - response = response.json() + } + response = self._make_request(params=params) self.assertEqual(response, { 'result': 1, 'epidata': [{ @@ -76,7 +84,7 @@ def test_query(self): }) # make request without specific issue date - response = requests.get(BASE_URL, params={ + params={ 'source': 'covidcast_nowcast', 'data_source': 'src', 'signals': 'sig', @@ -85,9 +93,8 @@ def test_query(self): 'geo_type': 'county', 'time_values': 20200101, 'geo_value': '01001', - }) - response.raise_for_status() - response = response.json() + } + response = self._make_request(params=params) self.assertEqual(response, { 'result': 1, @@ -102,7 +109,7 @@ def test_query(self): 'message': 'success', }) - response = requests.get(BASE_URL, params={ + params={ 'source': 'covidcast_nowcast', 'data_source': 'src', 'signals': 'sig', @@ -112,9 +119,8 @@ def test_query(self): 'time_values': 20200101, 'geo_value': '01001', 'as_of': 20200101 - }) - response.raise_for_status() - response = response.json() + } + response = self._make_request(params=params) self.assertEqual(response, { 'result': 1, diff --git a/integrations/server/test_fluview.py b/integrations/server/test_fluview.py index 8bfc18376..c192da637 100644 --- a/integrations/server/test_fluview.py +++ b/integrations/server/test_fluview.py @@ -19,6 +19,7 @@ def setUpClass(cls): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def setUp(self): """Perform per-test setup.""" @@ -31,6 +32,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table fluview') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() diff --git a/integrations/server/test_fluview_meta.py b/integrations/server/test_fluview_meta.py index 137e9464a..1e2cf73e3 100644 --- a/integrations/server/test_fluview_meta.py +++ b/integrations/server/test_fluview_meta.py @@ -19,6 +19,7 @@ def setUpClass(cls): # use the local instance of the Epidata API Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php' + Epidata.auth = ('epidata', 'key') def setUp(self): """Perform per-test setup.""" @@ -31,6 +32,8 @@ def setUp(self): database='epidata') cur = cnx.cursor() cur.execute('truncate table fluview') + cur.execute('delete from api_user') + cur.execute('insert into api_user(api_key, email) values ("key", "email")') cnx.commit() cur.close() diff --git a/requirements.api.txt b/requirements.api.txt index 5e31804b8..c7de90997 100644 --- a/requirements.api.txt +++ b/requirements.api.txt @@ -1,6 +1,7 @@ delphi_utils==0.3.15 epiweeks==2.1.2 Flask==2.2.2 +Flask-Limiter==3.3.0 itsdangerous<2.1 jinja2==3.0.3 more_itertools==8.4.0 @@ -9,6 +10,9 @@ newrelic orjson==3.4.7 pandas==1.2.3 python-dotenv==0.15.0 +pyyaml +redis==3.5.3 +requests==2.28.1 scipy==1.6.2 SQLAlchemy==1.4.40 structlog==22.1.0 diff --git a/requirements.dev.txt b/requirements.dev.txt index 0bd175eeb..6b7b6b5db 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -16,10 +16,8 @@ pycountry==22.3.5 pymysql==1.0.2 pytest==7.2.0 pytest-check==1.3.0 -requests==2.28.1 sas7bdat==2.2.3 selenium==4.7.2 sqlalchemy-stubs>=0.3 -structlog==22.1.0 tenacity==7.0.0 xlrd==2.0.1 diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 4f72b5f91..8c179fffd 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -13,10 +13,14 @@ library(httr) Epidata <- (function() { # API base url - BASE_URL <- 'https://api.delphi.cmu.edu/epidata/api.php' + BASE_URL <- getOption('epidata.url', default = 'https://api.delphi.cmu.edu/epidata/') client_version <- '0.4.12' + auth <- getOption("epidata.auth", default = NA) + + client_user_agent <- user_agent(paste("delphi_epidata/", client_version, " (R)", sep="")) + # Helper function to cast values and/or ranges to strings .listitem <- function(value) { if(is.list(value) && 'from' %in% names(value) && 'to' %in% names(value)) { @@ -36,10 +40,31 @@ Epidata <- (function() { # Helper function to request and parse epidata .request <- function(params) { + headers <- add_headers(Authorization = ifelse(is.na(auth), "", paste("Bearer", auth))) + url <- paste(BASE_URL, params$endpoint, sep="") + params <- within(params, rm(endpoint)) # API call - res <- GET(BASE_URL, query=params) + res <- GET(url, + client_user_agent, + headers, + query=params) if (res$status_code == 414) { - res <- POST(BASE_URL, body=params, encode='form') + res <- POST(url, + client_user_agent, + headers, + body=params, encode='form') + } + if (res$status_code != 200) { + # 500, 429, 401 are possible + msg <- "fetch data from API" + if (http_type(res) == "text/html") { + # grab the error information out of the returned HTML document + msg <- paste(msg, ":", xml2::xml_text(xml2::xml_find_all( + xml2::read_html(content(res, 'text')), + "//p" + ))) + } + stop_for_status(res, task = msg) } return(content(res, 'parsed')) } @@ -563,7 +588,7 @@ Epidata <- (function() { } # Set up request params <- list( - endpoint = 'covid_hosp', + endpoint = 'covid_hosp_state_timeseries', states = .list(states), dates = .list(dates) ) @@ -582,7 +607,7 @@ Epidata <- (function() { } # Set up request params <- list( - source = 'covid_hosp_facility', + endpoint = 'covid_hosp_facility', hospital_pks = .list(hospital_pks), collection_weeks = .list(collection_weeks) ) @@ -596,7 +621,7 @@ Epidata <- (function() { # Lookup COVID hospitalization facility identifiers covid_hosp_facility_lookup <- function(state, ccn, city, zip, fips_code) { # Set up request - params <- list(source = 'covid_hosp_facility_lookup') + params <- list(endpoint = 'covid_hosp_facility_lookup') if(!missing(state)) { params$state <- state } else if(!missing(ccn)) { @@ -614,14 +639,21 @@ Epidata <- (function() { return(.request(params)) } + server_version <- function() { + return(.request(list(endpoint = 'version'))) + } + # Export the public methods return(list( range = range, client_version = client_version, + server_version = server_version, fluview = fluview, fluview_meta = fluview_meta, fluview_clinical = fluview_clinical, flusurv = flusurv, + ecdc_ili = ecdc_ili, + kcdc_ili = kcdc_ili, gft = gft, ght = ght, twitter = twitter, diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index 61b5e41eb..a56527357 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -13,7 +13,7 @@ import asyncio from tenacity import retry, stop_after_attempt -from aiohttp import ClientSession, TCPConnector +from aiohttp import ClientSession, TCPConnector, BasicAuth from pkg_resources import get_distribution, DistributionNotFound # Obtain package version for the user-agent. Uses the installed version by @@ -25,7 +25,7 @@ _version = "0.script" _HEADERS = { - "user-agent": "delphi_epidata/" + _version + "user-agent": "delphi_epidata/" + _version + " (Python)" } # Because the API is stateless, the Epidata class only contains static methods @@ -34,6 +34,7 @@ class Epidata: # API base url BASE_URL = 'https://api.delphi.cmu.edu/epidata/api.php' + auth = None client_version = _version @@ -58,9 +59,11 @@ def _list(values): @retry(reraise=True, stop=stop_after_attempt(2)) def _request_with_retry(params): """Make request with a retry if an exception is thrown.""" - req = requests.get(Epidata.BASE_URL, params, headers=_HEADERS) + req = requests.get(Epidata.BASE_URL, params, auth=Epidata.auth, headers=_HEADERS) if req.status_code == 414: - req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) + req = requests.post(Epidata.BASE_URL, params, auth=Epidata.auth, headers=_HEADERS) + # handle 401 and 429 + req.raise_for_status() return req @staticmethod @@ -73,12 +76,15 @@ def _request(params): """ try: result = Epidata._request_with_retry(params) - if params is not None and "format" in params and params["format"]=="csv": - return result.text - else: - return result.json() except Exception as e: return {'result': 0, 'message': 'error: ' + str(e)} + if params is not None and "format" in params and params["format"]=="csv": + return result.text + else: + try: + return result.json() + except requests.exceptions.JSONDecodeError: + return {'result': 0, 'message': 'error decoding json: ' + result.text} # Raise an Exception on error, otherwise return epidata @staticmethod @@ -736,7 +742,11 @@ async def async_make_calls(param_combos): """Helper function to asynchronously make and aggregate Epidata GET requests.""" tasks = [] connector = TCPConnector(limit=batch_size) - async with ClientSession(connector=connector, headers=_HEADERS) as session: + if isinstance(Epidata.auth, tuple): + auth = BasicAuth(login=Epidata.auth[0], password=Epidata.auth[1], encoding='utf-8') + else: + auth = Epidata.auth + async with ClientSession(connector=connector, headers=_HEADERS, auth=auth) as session: for param in param_combos: task = asyncio.ensure_future(async_get(param, session)) tasks.append(task) diff --git a/src/ddl/api_analytics.sql b/src/ddl/api_analytics.sql deleted file mode 100644 index 7b8aa0279..000000000 --- a/src/ddl/api_analytics.sql +++ /dev/null @@ -1,32 +0,0 @@ -USE epidata; -/* -`api_analytics` logs API usage, which Delphi uses to improve the API. - -This data is private to Delphi. - -+----------+---------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+----------+---------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| datetime | datetime | NO | MUL | NULL | | -| ip | varchar(15) | NO | MUL | NULL | | -| ua | varchar(1024) | NO | | NULL | | -| source | varchar(32) | NO | MUL | NULL | | -| result | int(11) | NO | | NULL | | -| num_rows | int(11) | NO | | NULL | | -+----------+---------------+------+-----+---------+----------------+ -*/ - -CREATE TABLE `api_analytics` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `datetime` datetime NOT NULL, - `ip` varchar(15) NOT NULL, - `ua` varchar(1024) NOT NULL, - `source` varchar(32) NOT NULL, - `result` int(11) NOT NULL, - `num_rows` int(11) NOT NULL, - PRIMARY KEY (`id`), - KEY `datetime` (`datetime`), - KEY `ip` (`ip`), - KEY `source` (`source`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/src/ddl/api_user.sql b/src/ddl/api_user.sql new file mode 100644 index 000000000..90e56539f --- /dev/null +++ b/src/ddl/api_user.sql @@ -0,0 +1,31 @@ +USE epidata; + + +-- `api_user` API key and user management + +CREATE TABLE IF NOT EXISTS `api_user` ( + `id` int(11) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + `api_key` varchar(50) UNIQUE NOT NULL, + `email` varchar(320) UNIQUE NOT NULL, + `created` date, + `last_time_used` date, + UNIQUE KEY `api_user` (`api_key`, `email`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + + +-- `user_role` User roles + +CREATE TABLE IF NOT EXISTS `user_role` ( + `id` int(11) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + `name` varchar(50) NOT NULL, + UNIQUE KEY `name` (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + + +-- `user_role_link` User roles link table + +CREATE TABLE IF NOT EXISTS `user_role_link` ( + `user_id` int(11) UNSIGNED NOT NULL, + `role_id` int(11) UNSIGNED NOT NULL, + PRIMARY KEY (`user_id`, `role_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/src/server/_common.py b/src/server/_common.py index 2d2d3059f..6c424326e 100644 --- a/src/server/_common.py +++ b/src/server/_common.py @@ -2,15 +2,17 @@ import time from flask import Flask, g, request -from sqlalchemy import create_engine, event -from sqlalchemy.engine import Connection, Engine +from sqlalchemy import event +from sqlalchemy.engine import Connection +from werkzeug.exceptions import Unauthorized from werkzeug.local import LocalProxy from delphi.epidata.common.logger import get_structured_logger -from ._config import SECRET, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_ENGINE_OPTIONS +from ._config import SECRET, REVERSE_PROXIED +from ._db import engine from ._exceptions import DatabaseErrorException, EpiDataException +from ._security import current_user, _is_public_route, resolve_auth_token, show_no_api_key_warning, update_key_last_time_used, ERROR_MSG_INVALID_KEY -engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI, **SQLALCHEMY_ENGINE_OPTIONS) app = Flask("EpiData", static_url_path="") app.config["SECRET"] = SECRET @@ -22,12 +24,52 @@ def _get_db() -> Connection: g.db = conn return g.db - """ access to the SQL Alchemy connection for this request """ db: Connection = cast(Connection, LocalProxy(_get_db)) + +def get_real_ip_addr(req): # `req` should be a Flask.request object + if REVERSE_PROXIED: + # NOTE: ONLY trust these headers if reverse proxied!!! + if "X-Forwarded-For" in req.headers: + return req.headers["X-Forwarded-For"].split(",")[ + 0 + ] # take the first (or only) address from the comma-sep list + if "X-Real-Ip" in req.headers: + return req.headers["X-Real-Ip"] + return req.remote_addr + + +def log_info_with_request(message, **kwargs): + # TODO: make log level an option and check for key conflicts in kwargs + get_structured_logger("server_api").info( + message, + method=request.method, + url=request.url, + form_args=request.form, + req_length=request.content_length, + remote_addr=request.remote_addr, + real_remote_addr=get_real_ip_addr(request), + user_agent=request.user_agent.string, + api_key=resolve_auth_token(), + user_id=(current_user and current_user.id), + **kwargs + ) + +def log_info_with_request_and_response(message, response, **kwargs): + # TODO: make log level an option and check for key conflicts in kwargs + log_info_with_request( + message, + values=request.values.to_dict(flat=False), + blueprint=request.blueprint, + endpoint=request.endpoint, + response_status=response.status, + content_length=response.calculate_content_length(), + **kwargs + ) + @event.listens_for(engine, "before_cursor_execute") def before_cursor_execute(conn, cursor, statement, parameters, context, executemany): context._query_start_time = time.time() @@ -43,7 +85,9 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema # Convert to milliseconds total_time *= 1000 - get_structured_logger('server_api').info("Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time) + get_structured_logger("server_api").info( + "Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time + ) @app.before_request @@ -51,16 +95,37 @@ def before_request_execute(): # Set timer for statement g._request_start_time = time.time() - # Log statement - get_structured_logger('server_api').info("Received API request", method=request.method, url=request.url, form_args=request.form, req_length=request.content_length, remote_addr=request.remote_addr, user_agent=request.user_agent.string) - - if request.path.startswith('/lib'): + user = current_user + api_key = resolve_auth_token() + + # TODO: replace this next call with: log_info_with_request("Received API request") + get_structured_logger("server_api").info( + "Received API request", + method=request.method, + url=request.url, + form_args=request.form, + req_length=request.content_length, + remote_addr=request.remote_addr, + real_remote_addr=get_real_ip_addr(request), + user_agent=request.user_agent.string, + api_key=api_key, + user_id=(user and user.id) + ) + + if not show_no_api_key_warning(): + if not _is_public_route() and api_key and not user: + # if this is a privleged endpoint, and an api key was given but it does not look up to a user, raise exception: + get_structured_logger("server_api").info("bad api key used", api_key=api_key) + raise Unauthorized(ERROR_MSG_INVALID_KEY) + + if request.path.startswith("/lib"): + # files served from 'lib' directory don't need the database, so we can exit this early... return # try to get the db try: _get_db() except Exception as e: - get_structured_logger('server_error').error('database connection error', exception=e) + get_structured_logger("server_error").error("database connection error", exception=e) raise DatabaseErrorException() @@ -69,9 +134,29 @@ def after_request_execute(response): total_time = time.time() - g._request_start_time # Convert to milliseconds total_time *= 1000 - get_structured_logger('server_api').info('Served API request', method=request.method, url=request.url, form_args=request.form, req_length=request.content_length, remote_addr=request.remote_addr, user_agent=request.user_agent.string, - values=request.values.to_dict(flat=False), blueprint=request.blueprint, endpoint=request.endpoint, - response_status=response.status, content_length=response.calculate_content_length(), elapsed_time_ms=total_time) + + api_key = resolve_auth_token() + + update_key_last_time_used(current_user) + + # TODO: replace this next call with: log_info_with_request_and_response("Served API request", response, elapsed_time_ms=total_time) + get_structured_logger("server_api").info( + "Served API request", + method=request.method, + url=request.url, + form_args=request.form, + req_length=request.content_length, + remote_addr=request.remote_addr, + real_remote_addr=get_real_ip_addr(request), + user_agent=request.user_agent.string, + api_key=api_key, + values=request.values.to_dict(flat=False), + blueprint=request.blueprint, + endpoint=request.endpoint, + response_status=response.status, + content_length=response.calculate_content_length(), + elapsed_time_ms=total_time, + ) return response @@ -88,9 +173,9 @@ def teardown_db(exception=None): def handle_exception(e): # Log error and pass through; EpiDataExceptions are HTTPExceptions which are valid WSGI responses (see https://werkzeug.palletsprojects.com/en/2.2.x/exceptions/ ) if isinstance(e, DatabaseErrorException): - get_structured_logger('server_error').error('Received DatabaseErrorException', exception=str(e), exc_info=True) + get_structured_logger("server_error").error("Received DatabaseErrorException", exception=str(e), exc_info=True) else: - get_structured_logger('server_error').warn('Encountered user-side error', exception=str(e)) + get_structured_logger("server_error").warn("Encountered user-side error", exception=str(e)) return e @@ -98,7 +183,7 @@ def is_compatibility_mode() -> bool: """ checks whether this request is in compatibility mode """ - return 'compatibility' in g and g.compatibility + return "compatibility" in g and g.compatibility def set_compatibility_mode(): diff --git a/src/server/_config.py b/src/server/_config.py index 073da564a..54433e411 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -1,5 +1,8 @@ import json import os +from datetime import date +from enum import Enum + from dotenv import load_dotenv load_dotenv() @@ -13,45 +16,23 @@ # defaults SQLALCHEMY_ENGINE_OPTIONS = { - "pool_pre_ping": True, # enable ping test for validity of recycled pool connections on connect() calls - "pool_recycle": 5 # seconds after which a recycled pool connection is considered invalid + "pool_pre_ping": True, # enable ping test for validity of recycled pool connections on connect() calls + "pool_recycle": 5, # seconds after which a recycled pool connection is considered invalid } # update with overrides of defaults or additions from external configs -SQLALCHEMY_ENGINE_OPTIONS.update( - json.loads(os.environ.get("SQLALCHEMY_ENGINE_OPTIONS", "{}"))) +SQLALCHEMY_ENGINE_OPTIONS.update(json.loads(os.environ.get("SQLALCHEMY_ENGINE_OPTIONS", "{}"))) SECRET = os.environ.get("FLASK_SECRET", "secret") -URL_PREFIX = os.environ.get("FLASK_PREFIX", "/") - -AUTH = { - "twitter": os.environ.get("SECRET_TWITTER"), - "ght": os.environ.get("SECRET_GHT"), - "fluview": os.environ.get("SECRET_FLUVIEW"), - "cdc": os.environ.get("SECRET_CDC"), - "sensors": os.environ.get("SECRET_SENSORS"), - "quidel": os.environ.get("SECRET_QUIDEL"), - "norostat": os.environ.get("SECRET_NOROSTAT"), - "afhsb": os.environ.get("SECRET_AFHSB"), -} +URL_PREFIX = os.environ.get("FLASK_PREFIX", "") # if not empty, this value should begin but not end in a slash ('/') -# begin sensor query authentication configuration -# A multimap of sensor names to the "granular" auth tokens that can be used to access them; excludes the "global" sensor auth key that works for all sensors: -GRANULAR_SENSOR_AUTH_TOKENS = { - "twtr": os.environ.get("SECRET_SENSOR_TWTR", "").split(","), - "gft": os.environ.get("SECRET_SENSOR_GFT", "").split(","), - "ght": os.environ.get("SECRET_SENSOR_GHT", "").split(","), - "ghtj": os.environ.get("SECRET_SENSOR_GHTJ", "").split(","), - "cdc": os.environ.get("SECRET_SENSOR_CDC", "").split(","), - "quid": os.environ.get("SECRET_SENSOR_QUID", "").split(","), - "wiki": os.environ.get("SECRET_SENSOR_WIKI", "").split(","), -} - -# A set of sensors that do not require an auth key to access: -OPEN_SENSORS = [ - "sar3", - "epic", - "arch", -] +# REVERSE_PROXIED is a boolean value that indicates whether or not this server instance +# is running behind a reverse proxy (like nginx). +# in dev and testing, it is fine (or even preferable) for this variable to be set to 'TRUE' +# even if it is not actually the case. in prod, it is very important that this is set accurately -- +# it should _only_ be set to 'TRUE' if it really is behind a reverse proxy, as remote addresses can be "spoofed" +# which can carry security/identity implications. conversely, if this is set to 'FALSE' when in fact +# running behind a reverse proxy, it can hinder logging accuracy. it defaults to 'FALSE' for safety. +REVERSE_PROXIED = os.environ.get("REVERSE_PROXIED", "FALSE").upper() == "TRUE" REGION_TO_STATE = { "hhs1": ["VT", "CT", "ME", "MA", "NH", "RI"], @@ -75,3 +56,32 @@ "cen9": ["AK", "CA", "HI", "OR", "WA"], } NATION_REGION = "nat" + +API_KEY_REQUIRED_STARTING_AT = date.fromisoformat(os.environ.get("API_KEY_REQUIRED_STARTING_AT", "2023-06-21")) +TEMPORARY_API_KEY = os.environ.get("TEMPORARY_API_KEY", "TEMP-API-KEY-EXPIRES-2023-06-28") +# password needed for the admin application if not set the admin routes won't be available +ADMIN_PASSWORD = os.environ.get("API_KEY_ADMIN_PASSWORD", "abc") +# secret for the google form to give to the admin/register endpoint +REGISTER_WEBHOOK_TOKEN = os.environ.get("API_KEY_REGISTER_WEBHOOK_TOKEN") + +REDIS_HOST = os.environ.get("REDIS_HOST", "delphi_redis") +REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "1234") + +# https://flask-limiter.readthedocs.io/en/stable/#rate-limit-string-notation +RATE_LIMIT = os.environ.get("RATE_LIMIT", "60/hour") +# fixed-window, fixed-window-elastic-expiry, or moving-window +# see also https://flask-limiter.readthedocs.io/en/stable/#rate-limiting-strategies +RATELIMIT_STRATEGY = os.environ.get("RATELIMIT_STRATEGY", "fixed-window") + +# see https://flask-limiter.readthedocs.io/en/stable/#configuration +RATELIMIT_STORAGE_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:6379" + +API_KEY_REGISTRATION_FORM_LINK = "https://forms.gle/hkBr5SfQgxguAfEt7" +# ^ shortcut to "https://docs.google.com/forms/d/e/1FAIpQLSe5i-lgb9hcMVepntMIeEo8LUZUMTUnQD3hbrQI3vSteGsl4w/viewform?usp=sf_link" +API_KEY_REGISTRATION_FORM_LINK_LOCAL = "https://api.delphi.cmu.edu/epidata/admin/registration_form" +# ^ redirects to API_KEY_REGISTRATION_FORM_LINK + +API_KEY_REMOVAL_REQUEST_LINK = "https://forms.gle/GucFmZHTMgEFjH197" +# ^ shortcut to "https://docs.google.com/forms/d/e/1FAIpQLSff30tsq4xwPCoUbvaIygLSMs_Mt8eDhHA0rifBoIrjo8J5lw/viewform" +API_KEY_REMOVAL_REQUEST_LINK_LOCAL = "https://api.delphi.cmu.edu/epidata/admin/removal_request" +# ^ redirects to API_KEY_REMOVAL_REQUEST_LINK diff --git a/src/server/_db.py b/src/server/_db.py new file mode 100644 index 000000000..9057b8181 --- /dev/null +++ b/src/server/_db.py @@ -0,0 +1,18 @@ +from sqlalchemy import create_engine, MetaData +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from ._config import SQLALCHEMY_DATABASE_URI, SQLALCHEMY_ENGINE_OPTIONS + + +# _db.py exists so that we dont have a circular dependency: +# previously `_common` imported from `_security` which imported from `admin.models`, which imported (back again) from `_common` for database connection objects + + +engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI, **SQLALCHEMY_ENGINE_OPTIONS) + +metadata = MetaData(bind=engine) + +Session = sessionmaker(bind=engine) + + diff --git a/src/server/_exceptions.py b/src/server/_exceptions.py index 835bfc118..86ef028c1 100644 --- a/src/server/_exceptions.py +++ b/src/server/_exceptions.py @@ -25,11 +25,6 @@ def __init__(self, endpoints: Iterable[str]): super(MissingOrWrongSourceException, self).__init__(f"no data source specified, possible values: {','.join(endpoints)}", 400) -class UnAuthenticatedException(EpiDataException): - def __init__(self): - super(UnAuthenticatedException, self).__init__("unauthenticated", 401) - - class ValidationFailedException(EpiDataException): def __init__(self, message: str): super(ValidationFailedException, self).__init__(message, 400) diff --git a/src/server/_limiter.py b/src/server/_limiter.py new file mode 100644 index 000000000..4bf72e05b --- /dev/null +++ b/src/server/_limiter.py @@ -0,0 +1,148 @@ +from delphi.epidata.server.endpoints.covidcast_utils.dashboard_signals import DashboardSignals +from flask import Response, request, make_response, jsonify +from flask_limiter import Limiter, HEADERS +from redis import Redis +from werkzeug.exceptions import Unauthorized, TooManyRequests + +from ._common import app, get_real_ip_addr +from ._config import RATE_LIMIT, RATELIMIT_STORAGE_URL, REDIS_HOST, REDIS_PASSWORD +from ._exceptions import ValidationFailedException +from ._params import extract_dates, extract_integers, extract_strings +from ._security import _is_public_route, current_user, require_api_key, show_no_api_key_warning, resolve_auth_token, ERROR_MSG_RATE_LIMIT, ERROR_MSG_MULTIPLES + + +def deduct_on_success(response: Response) -> bool: + if response.status_code != 200: + return False + # check if we have the classic format + if not response.is_streamed and response.is_json: + out = response.json + if out and isinstance(out, dict) and out.get("result") == -1: + return False + return True + + +def get_multiples_count(request): + multiples = { + "articles": extract_strings, + "ccn": extract_strings, + "city": extract_strings, + "collection_weeks": extract_integers, + "dates": extract_integers, + "epiweeks": extract_integers, + "fips_code": extract_strings, + "flu_types": extract_strings, + "geo_value": extract_strings, + "geo_values": extract_strings, + "hospital_pks": extract_strings, + "issues": extract_integers, + "locations": extract_strings, + "names": extract_strings, + "publication_dates": extract_strings, + "regions": extract_strings, + "sensor_names": extract_strings, + "signal": extract_strings, + "signals": extract_strings, + "states": extract_strings, + "time_types": extract_strings, + "time_values": extract_dates, + "zip": extract_strings, + } + multiple_selection_allowed = 2 + if "window" in request.args.keys(): + multiple_selection_allowed -= 1 + for k, v in request.args.items(): + if v == "*": + multiple_selection_allowed -= 1 + try: + vals = multiples.get(k)(k) + if len(vals) >= 2: + multiple_selection_allowed -= 1 + elif len(vals) and isinstance(vals, list) and isinstance(vals[0], tuple): + # else we have one val which is a tuple, representing a range, and thus is a "multiple" + multiple_selection_allowed -= 1 + except ValidationFailedException: + continue + except TypeError: + continue + return multiple_selection_allowed + + +def check_signals_allowlist(request): + signals_allowlist = {":".join(ss_pair) for ss_pair in DashboardSignals().srcsig_list()} + request_signals = [] + if "signal" in request.args.keys(): + request_signals += extract_strings("signal") + if "signals" in request.args.keys(): + request_signals += extract_strings("signals") + if "data_source" in request.args: + request_signals = [f"{request.args['data_source']}:{request_signal}" for request_signal in request_signals] + if len(request_signals) == 0: + return False + return all([signal in signals_allowlist for signal in request_signals]) + + +def _resolve_tracking_key() -> str: + token = resolve_auth_token() + return token or get_real_ip_addr(request) + + +limiter = Limiter( + _resolve_tracking_key, + app=app, + storage_uri=RATELIMIT_STORAGE_URL, + request_identifier=lambda: "EpidataLimiter", + headers_enabled=True, + header_name_mapping={ + HEADERS.LIMIT: "X-My-Limit", + HEADERS.RESET: "X-My-Reset", + HEADERS.REMAINING: "X-My-Remaining", + }, +) + +apply_limit = limiter.limit(RATE_LIMIT, deduct_when=deduct_on_success) + + +@app.errorhandler(429) +def ratelimit_handler(e): + return TooManyRequests(ERROR_MSG_RATE_LIMIT) + + +def requests_left(): + r = Redis(host=REDIS_HOST, password=REDIS_PASSWORD) + allowed_count, period = RATE_LIMIT.split("/") + try: + remaining_count = int(allowed_count) - int( + r.get(f"LIMITER/{_resolve_tracking_key()}/EpidataLimiter/{allowed_count}/1/{period}") + ) + except TypeError: + return 1 + return remaining_count + + +@limiter.request_filter +def _no_rate_limit() -> bool: + if show_no_api_key_warning(): + # no rate limit in phase 0 + return True + if _is_public_route(): + # no rate limit for public routes + return True + if current_user: + # no rate limit if user is registered + return True + + if not require_api_key(): + # we are in phase 1 or 2 + if requests_left() > 0: + # ...and user is below rate limit, we still want to record this query for the rate computation... + return False + # ...otherwise, they have exceeded the limit, but we still want to allow them through + return True + + # phase 3 (full api-keys behavior) + multiples = get_multiples_count(request) + if multiples < 0: + # too many multiples + raise Unauthorized(ERROR_MSG_MULTIPLES) + return check_signals_allowlist(request) diff --git a/src/server/_printer.py b/src/server/_printer.py index 162ba2e36..6e32d7d43 100644 --- a/src/server/_printer.py +++ b/src/server/_printer.py @@ -2,12 +2,15 @@ from io import StringIO from typing import Any, Dict, Iterable, List, Optional, Union -from flask import Response, jsonify, stream_with_context +from flask import Response, jsonify, stream_with_context, request from flask.json import dumps import orjson from ._config import MAX_RESULTS, MAX_COMPATIBILITY_RESULTS -from ._common import is_compatibility_mode +# TODO: remove warnings after once we are past the API_KEY_REQUIRED_STARTING_AT date +from ._security import show_hard_api_key_warning, show_soft_api_key_warning, ROLLOUT_WARNING_RATE_LIMIT, ROLLOUT_WARNING_MULTIPLES, _ROLLOUT_WARNING_AD_FRAGMENT, PHASE_1_2_STOPGAP +from ._common import is_compatibility_mode, log_info_with_request +from ._limiter import requests_left, get_multiples_count from delphi.epidata.common.logger import get_structured_logger @@ -22,7 +25,15 @@ def print_non_standard(format: str, data): message = "no results" result = -2 else: - message = "success" + warning = "" + if show_hard_api_key_warning(): + if requests_left() == 0: + warning = f"{ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + message = warning.strip() or "success" result = 1 if result == -1 and is_compatibility_mode(): return jsonify(dict(result=result, message=message)) @@ -69,6 +80,7 @@ def gen(): yield r r = self._end() + log_info_with_request("APrinter finished processing rows", count=self.count) if r is not None: yield r @@ -90,6 +102,8 @@ def _print_row(self, row: Dict) -> Optional[Union[str, bytes]]: first = self.count == 0 if self.count >= self._max_results: # hit the limit + # TODO: consider making this a WARN-level log event + log_info_with_request("Max result limit reached", count=self.count) self.result = 2 return None if first: @@ -112,21 +126,40 @@ class ClassicPrinter(APrinter): """ def _begin(self): - if is_compatibility_mode(): + if is_compatibility_mode() and not show_hard_api_key_warning(): return "{ " - return '{ "epidata": [' + r = '{ "epidata": [' + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning != "": + return f'{r} "{warning.strip()}",' + return r def _format_row(self, first: bool, row: Dict): - if first and is_compatibility_mode(): + if first and is_compatibility_mode() and not show_hard_api_key_warning(): sep = b'"epidata": [' else: sep = b"," if not first else b"" return sep + orjson.dumps(row) def _end(self): - message = "success" + warning = "" + if show_soft_api_key_warning(): + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + message = warning.strip() or "success" prefix = "], " - if self.count == 0 and is_compatibility_mode(): + if self.count == 0 and is_compatibility_mode() and not show_hard_api_key_warning(): # no array to end prefix = "" @@ -160,7 +193,7 @@ def _format_row(self, first: bool, row: Dict): self._tree[group].append(row) else: self._tree[group] = [row] - if first and is_compatibility_mode(): + if first and is_compatibility_mode() and not show_hard_api_key_warning(): return b'"epidata": [' return None @@ -171,7 +204,10 @@ def _end(self): tree = orjson.dumps(self._tree) self._tree = dict() r = super(ClassicTreePrinter, self)._end() - return tree + r + r = tree + r + if show_hard_api_key_warning() and (requests_left() == 0 or get_multiples_count(request) < 0): + r = b", " + r + return r class CSVPrinter(APrinter): @@ -200,8 +236,20 @@ def _error(self, error: Exception) -> str: def _format_row(self, first: bool, row: Dict): if first: - self._writer = DictWriter(self._stream, list(row.keys()), lineterminator="\n") + columns = list(row.keys()) + self._writer = DictWriter(self._stream, columns, lineterminator="\n") self._writer.writeheader() + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning.strip() != "": + self._writer.writerow({columns[0]: warning}) + self._writer.writerow(row) # remove the stream content to print just one line at a time @@ -222,7 +270,18 @@ class JSONPrinter(APrinter): """ def _begin(self): - return b"[" + r = b"[" + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning.strip() != "": + r = b'["' + bytes(warning, "utf-8") + b'",' + return r def _format_row(self, first: bool, row: Dict): sep = b"," if not first else b"" @@ -240,6 +299,19 @@ class JSONLPrinter(APrinter): def make_response(self, gen): return Response(gen, mimetype=" text/plain; charset=utf8") + def _begin(self): + if show_hard_api_key_warning(): + warning = "" + if requests_left() == 0: + warning = f"{warning} {ROLLOUT_WARNING_RATE_LIMIT}" + if get_multiples_count(request) < 0: + warning = f"{warning} {ROLLOUT_WARNING_MULTIPLES}" + if requests_left() == 0 or get_multiples_count(request) < 0: + warning = f"{warning} {_ROLLOUT_WARNING_AD_FRAGMENT} {PHASE_1_2_STOPGAP}" + if warning.strip() != "": + return bytes(warning, "utf-8") + b"\n" + return None + def _format_row(self, first: bool, row: Dict): # each line is a JSON file with a new line to separate them return orjson.dumps(row, option=orjson.OPT_APPEND_NEWLINE) diff --git a/src/server/_security.py b/src/server/_security.py new file mode 100644 index 000000000..36fb1d93b --- /dev/null +++ b/src/server/_security.py @@ -0,0 +1,128 @@ +from datetime import date, datetime, timedelta +from functools import wraps +from typing import Optional, cast + +import redis +from delphi.epidata.common.logger import get_structured_logger +from flask import g, request +from werkzeug.exceptions import Unauthorized +from werkzeug.local import LocalProxy + +from ._config import ( + API_KEY_REQUIRED_STARTING_AT, + REDIS_HOST, + REDIS_PASSWORD, + API_KEY_REGISTRATION_FORM_LINK_LOCAL, + TEMPORARY_API_KEY, + URL_PREFIX, +) +from .admin.models import User, UserRole + +API_KEY_HARD_WARNING = API_KEY_REQUIRED_STARTING_AT - timedelta(days=14) +API_KEY_SOFT_WARNING = API_KEY_HARD_WARNING - timedelta(days=14) + +# rollout warning messages +ROLLOUT_WARNING_RATE_LIMIT = "This request exceeded the rate limit on anonymous requests, which will be enforced starting {}.".format(API_KEY_REQUIRED_STARTING_AT) +ROLLOUT_WARNING_MULTIPLES = "This request exceeded the anonymous limit on selected multiples, which will be enforced starting {}.".format(API_KEY_REQUIRED_STARTING_AT) +_ROLLOUT_WARNING_AD_FRAGMENT = "To be exempt from this limit, authenticate your requests with a free API key, now available at {}.".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) + +PHASE_1_2_STOPGAP = ( + "A temporary public key `{}` is available for use between now and {} to give you time to register or adapt your requests without this message continuing to break your systems." +).format(TEMPORARY_API_KEY, (API_KEY_REQUIRED_STARTING_AT + timedelta(days=7))) + + +# steady-state error messages +ERROR_MSG_RATE_LIMIT = "Rate limit exceeded for anonymous queries. To remove this limit, register a free API key at {}".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) +ERROR_MSG_MULTIPLES = "Requested too many multiples for anonymous queries. To remove this limit, register a free API key at {}".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) +ERROR_MSG_INVALID_KEY = ( + "API key does not exist. Register a new key at {} or contact delphi-support+privacy@andrew.cmu.edu to troubleshoot".format(API_KEY_REGISTRATION_FORM_LINK_LOCAL) +) +ERROR_MSG_INVALID_ROLE = "Provided API key does not have access to this endpoint. Please contact delphi-support+privacy@andrew.cmu.edu." + + +def resolve_auth_token() -> Optional[str]: + for n in ("auth", "api_key", "token"): + if n in request.values: + return request.values[n] + # username password + if request.authorization and request.authorization.username == "epidata": + return request.authorization.password + # bearer token authentication + auth_header = request.headers.get("Authorization") + if auth_header and auth_header.startswith("Bearer "): + return auth_header[len("Bearer ") :] + return None + + +def show_no_api_key_warning() -> bool: + # aka "phase 0" + n = date.today() + return not current_user and n < API_KEY_SOFT_WARNING + + +def show_soft_api_key_warning() -> bool: + # aka "phase 1" + n = date.today() + return not current_user and API_KEY_SOFT_WARNING <= n < API_KEY_HARD_WARNING + + +def show_hard_api_key_warning() -> bool: + # aka "phase 2" + n = date.today() + return not current_user and API_KEY_HARD_WARNING <= n < API_KEY_REQUIRED_STARTING_AT + + +def require_api_key() -> bool: + # aka "phase 3" + n = date.today() + return API_KEY_REQUIRED_STARTING_AT <= n + + +def _get_current_user(): + if "user" not in g: + api_key = resolve_auth_token() + g.user = User.find_user(api_key=api_key) + return g.user + + +current_user: User = cast(User, LocalProxy(_get_current_user)) + + +def register_user_role(role_name: str) -> None: + UserRole.create_role(role_name) + + +def _is_public_route() -> bool: + public_routes_list = ["lib", "admin", "version"] + for route in public_routes_list: + if request.path.startswith(f"{URL_PREFIX}/{route}"): + return True + return False + + +def require_role(required_role: str): + def decorator_wrapper(f): + if not required_role: + return f + + @wraps(f) + def decorated_function(*args, **kwargs): + if not current_user or not current_user.has_role(required_role): + get_structured_logger("api_security").info( + ERROR_MSG_INVALID_ROLE, + role=required_role, + api_key=(current_user and current_user.api_key), + ) + raise Unauthorized(ERROR_MSG_INVALID_ROLE) + return f(*args, **kwargs) + + return decorated_function + + return decorator_wrapper + + +def update_key_last_time_used(user): + if user: + # update last usage for this user's api key to "now()" + r = redis.Redis(host=REDIS_HOST, password=REDIS_PASSWORD) + r.set(f"LAST_USED/{user.api_key}", datetime.strftime(datetime.now(), "%Y-%m-%d")) diff --git a/src/server/_validate.py b/src/server/_validate.py index 957bee09d..37adc470f 100644 --- a/src/server/_validate.py +++ b/src/server/_validate.py @@ -1,37 +1,6 @@ -from typing import Optional - from flask import Request -from ._exceptions import UnAuthenticatedException, ValidationFailedException - - -def resolve_auth_token(request: Request) -> Optional[str]: - # auth request param - if "auth" in request.values: - return request.values["auth"] - # user name password - if request.authorization and request.authorization.username == "epidata": - return request.authorization.password - # bearer token authentication - auth_header = request.headers.get("Authorization") - if auth_header and auth_header.startswith("Bearer "): - return auth_header[len("Bearer ") :] - return None - - -def check_auth_token(request: Request, token: str, optional=False) -> bool: - value = resolve_auth_token(request) - - if value is None: - if optional: - return False - else: - raise ValidationFailedException(f"missing parameter: auth") - - valid_token = value == token - if not valid_token and not optional: - raise UnAuthenticatedException() - return valid_token +from ._exceptions import ValidationFailedException def require_all(request: Request, *values: str) -> bool: diff --git a/src/server/admin/__init__.py b/src/server/admin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/server/admin/api_key_form_script.js b/src/server/admin/api_key_form_script.js new file mode 100644 index 000000000..2a48d1459 --- /dev/null +++ b/src/server/admin/api_key_form_script.js @@ -0,0 +1,66 @@ +// post-processing script for API key registration form submissions +// currently located at: +// https://script.google.com/u/1/home/projects/1hpgZcxqbeyfJLVEaipNqCJ7ItdkjNu2NsX2IWqpjOd1wZwhCBeKzlCAa/edit + + +var POST_URL = "https://api.delphi.cmu.edu/epidata/admin/register"; +var WEBHOOK_SECRET = "abc"; + +function onSubmit(e) { + + + var form = FormApp.getActiveForm(); + var allResponses = form.getResponses(); + var latestResponse = allResponses[allResponses.length - 1]; + + var user_api_key = Math.random().toString(16).substr(2, 18); + var user_email = latestResponse.getRespondentEmail(); + + var payload = { + 'token': WEBHOOK_SECRET, + 'user_api_key': user_api_key, + 'user_email': user_email, + }; + + var options = { + "method": "post", + "contentType": "application/json", + "muteHttpExceptions": true, + "payload": JSON.stringify(payload) + }; + + Logger.log('Sending registration webhook request.') + var result = UrlFetchApp.fetch(POST_URL, options); + console.log(result.getResponseCode()); + + if (result.getResponseCode() == 200) { + Logger.log('Registration successful, sending email'); + MailApp.sendEmail({ + to: user_email, + subject: "Delphi Epidata API Registration", + noReply: true, + body: `Thank you for registering with the Delphi Epidata API. + + Your API key is: ${user_api_key} + + For usage information, see the API Keys section of the documentation: https://cmu-delphi.github.io/delphi-epidata/api/api_keys.html + + Best, + Delphi Team` + }); + } else if (result.getResponseCode() == 409) { + Logger.log('Registration was not successful, %s %s', result.getContentText("UTF-8"), result.GetResponseCode); + MailApp.sendEmail({ + to: user_email, + subject: "Delphi Epidata API Registration", + noReply: true, + body: ` + API Key was not generated. + + This email address is already registered. Please contact us if you believe this to be in error. + + Best, + Delphi Team` + }); + } +}; diff --git a/src/server/admin/models.py b/src/server/admin/models.py new file mode 100644 index 000000000..3755a517b --- /dev/null +++ b/src/server/admin/models.py @@ -0,0 +1,154 @@ +from sqlalchemy import Table, ForeignKey, Column, Integer, String, Date, delete, update +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship +from copy import deepcopy + +from .._db import Session +from delphi.epidata.common.logger import get_structured_logger + +from typing import Set, Optional, List +from datetime import datetime as dtime + + +Base = declarative_base() + +association_table = Table( + "user_role_link", + Base.metadata, + Column("user_id", ForeignKey("api_user.id")), + Column("role_id", ForeignKey("user_role.id")), +) + + +class User(Base): + __tablename__ = "api_user" + id = Column(Integer, primary_key=True, autoincrement=True) + roles = relationship("UserRole", secondary=association_table) + api_key = Column(String(50), unique=True, nullable=False) + email = Column(String(320), unique=True, nullable=False) + created = Column(Date, default=dtime.strftime(dtime.now(), "%Y-%m-%d")) + last_time_used = Column(Date, default=dtime.strftime(dtime.now(), "%Y-%m-%d")) + + def __init__(self, api_key: str, email: str = None) -> None: + self.api_key = api_key + self.email = email + + @staticmethod + def list_users() -> List["User"]: + with Session() as session: + return session.query(User).all() + + @property + def as_dict(self): + return { + "id": self.id, + "api_key": self.api_key, + "email": self.email, + "roles": User.get_user_roles(self.id), + "created": self.created, + "last_time_used": self.last_time_used + } + + @staticmethod + def get_user_roles(user_id: int) -> Set[str]: + with Session() as session: + user = session.query(User).filter(User.id == user_id).first() + return set([role.name for role in user.roles]) + + def has_role(self, required_role: str) -> bool: + return required_role in User.get_user_roles(self.id) + + @staticmethod + def _assign_roles(user: "User", roles: Optional[Set[str]], session) -> None: + # NOTE: this uses a borrowed/existing `session`, and thus does not do a `session.commit()`... + # that is the responsibility of the caller! + get_structured_logger("api_user_models").info("setting roles", roles=roles, user_id=user.id, api_key=user.api_key) + db_user = session.query(User).filter(User.id == user.id).first() + # TODO: would it be sufficient to use the passed-in `user` instead of looking up this `db_user`? + if roles: + roles_to_assign = session.query(UserRole).filter(UserRole.name.in_(roles)).all() + db_user.roles = roles_to_assign + else: + db_user.roles = [] + + @staticmethod + def find_user(*, # asterisk forces explicit naming of all arguments when calling this method + user_id: Optional[int] = None, api_key: Optional[str] = None, user_email: Optional[str] = None + ) -> "User": + # NOTE: be careful, using multiple arguments could match multiple users, but this will return only one! + with Session() as session: + user = ( + session.query(User) + .filter((User.id == user_id) | (User.api_key == api_key) | (User.email == user_email)) + .first() + ) + return user if user else None + + @staticmethod + def create_user(api_key: str, email: str, user_roles: Optional[Set[str]] = None) -> "User": + get_structured_logger("api_user_models").info("creating user", api_key=api_key) + with Session() as session: + new_user = User(api_key=api_key, email=email) + # TODO: we may need to populate 'created' field/column here, if the default + # specified above gets bound to the time of when that line of python was evaluated. + session.add(new_user) + session.commit() + User._assign_roles(new_user, user_roles, session) + session.commit() + return new_user + + @staticmethod + def update_user( + user: "User", + email: Optional[str], + api_key: Optional[str], + roles: Optional[Set[str]] + ) -> "User": + get_structured_logger("api_user_models").info("updating user", user_id=user.id, new_api_key=api_key) + with Session() as session: + user = User.find_user(user_id=user.id) + if user: + update_stmt = ( + update(User) + .where(User.id == user.id) + .values(api_key=api_key, email=email) + ) + session.execute(update_stmt) + User._assign_roles(user, roles, session) + session.commit() + return user + + @staticmethod + def delete_user(user_id: int) -> None: + get_structured_logger("api_user_models").info("deleting user", user_id=user_id) + with Session() as session: + session.execute(delete(User).where(User.id == user_id)) + session.commit() + + +class UserRole(Base): + __tablename__ = "user_role" + id = Column(Integer, primary_key=True, autoincrement=True) + name = Column(String(50), unique=True) + + @staticmethod + def create_role(name: str) -> None: + get_structured_logger("api_user_models").info("creating user role", role=name) + with Session() as session: + session.execute( + f""" + INSERT INTO user_role (name) + SELECT '{name}' + WHERE NOT EXISTS + (SELECT * + FROM user_role + WHERE name='{name}') + """ + ) + session.commit() + + @staticmethod + def list_all_roles(): + with Session() as session: + roles = session.query(UserRole).all() + return [role.name for role in roles] diff --git a/src/server/admin/templates/index.html b/src/server/admin/templates/index.html new file mode 100644 index 000000000..518211199 --- /dev/null +++ b/src/server/admin/templates/index.html @@ -0,0 +1,81 @@ + + +
+ + +ID | +API Key | +Roles | +Actions | + + + {% for user in users %} +|
---|---|---|---|---|
{{ user.id }} | +{{ user.api_key }} | +{{ user.email }} | +{{ ','.join(user.roles) }} | ++ Edit + Delete + | +