Skip to content

Release Delphi Epidata 0.4.3 #1039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.2
current_version = 0.4.3
commit = False
tag = False

Expand Down
16 changes: 15 additions & 1 deletion dev/local/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ web:
@# Run the web server
@docker run --rm -p 127.0.0.1:10080:80 \
--env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" \
--env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" \
--env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "LOG_DEBUG" \
--network delphi-net --name delphi_web_epidata \
delphi_web_epidata >$(LOG_WEB) 2>&1 &

Expand Down Expand Up @@ -139,6 +139,20 @@ test:
--env "FLASK_SECRET=abc" \
delphi_web_python python -m pytest --import-mode importlib $(pdb) $(test) | tee test_output_$(NOW).log

.PHONY=bash
bash:
@docker run -it --rm --network delphi-net \
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata,target=/usr/src/app/repos/delphi/delphi-epidata,readonly \
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata/src,target=/usr/src/app/delphi/epidata,readonly \
--env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" \
--env "FLASK_SECRET=abc" \
delphi_web_python bash

.PHONY=sql
sql:
@docker run --rm -it --network delphi-net --cap-add=sys_nice \
percona mysql --user=user --password=pass --port 3306 --host delphi_database_epidata epidata

.PHONY=clean
clean:
@docker images -f "dangling=true" -q | xargs docker rmi >/dev/null 2>&1
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ tenacity==7.0.0
newrelic
epiweeks==2.1.2
typing-extensions
structlog==22.1.0
18 changes: 16 additions & 2 deletions src/acquisition/covidcast/logger.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Structured logger utility for creating JSON logs in Delphi pipelines."""
import logging
import os
import sys
import threading
import structlog


def handle_exceptions(logger):
"""Handle exceptions using the provided logger."""
def exception_handler(etype, value, traceback):
Expand Down Expand Up @@ -45,12 +45,24 @@ def get_structured_logger(name=__name__,
if filename:
handlers.append(logging.FileHandler(filename))

if "LOG_DEBUG" in os.environ:
log_level = logging.DEBUG
else:
log_level = logging.INFO

logging.basicConfig(
format="%(message)s",
level=logging.INFO,
level=log_level,
handlers=handlers
)

def add_pid(logger, method_name, event_dict):
"""
Add current PID to the event dict.
"""
event_dict["pid"] = os.getpid()
return event_dict

# Configure structlog. This uses many of the standard suggestions from
# the structlog documentation.
structlog.configure(
Expand All @@ -61,6 +73,8 @@ def get_structured_logger(name=__name__,
structlog.stdlib.add_logger_name,
# Include log level in output.
structlog.stdlib.add_log_level,
# Include PID in output.
add_pid,
# Allow formatting into arguments e.g., logger.info("Hello, %s",
# name)
structlog.stdlib.PositionalArgumentsFormatter(),
Expand Down
2 changes: 1 addition & 1 deletion src/client/delphi_epidata.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Epidata <- (function() {
# API base url
BASE_URL <- 'https://delphi.cmu.edu/epidata/api.php'

client_version <- '0.4.2'
client_version <- '0.4.3'

# Helper function to cast values and/or ranges to strings
.listitem <- function(value) {
Expand Down
2 changes: 1 addition & 1 deletion src/client/delphi_epidata.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
}
})(this, function (exports, fetchImpl, jQuery) {
const BASE_URL = "https://delphi.cmu.edu/epidata/";
const client_version = "0.4.2";
const client_version = "0.4.3";

// Helper function to cast values and/or ranges to strings
function _listitem(value) {
Expand Down
2 changes: 1 addition & 1 deletion src/client/packaging/npm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "delphi_epidata",
"description": "Delphi Epidata API Client",
"authors": "Delphi Group",
"version": "0.4.2",
"version": "0.4.3",
"license": "MIT",
"homepage": "https://github.com/cmu-delphi/delphi-epidata",
"bugs": {
Expand Down
2 changes: 1 addition & 1 deletion src/client/packaging/pypi/delphi_epidata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .delphi_epidata import Epidata

name = 'delphi_epidata'
__version__ = '0.4.2'
__version__ = '0.4.3'
2 changes: 1 addition & 1 deletion src/client/packaging/pypi/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="delphi_epidata",
version="0.4.2",
version="0.4.3",
author="David Farrow",
author_email="[email protected]",
description="A programmatic interface to Delphi's Epidata API.",
Expand Down
55 changes: 51 additions & 4 deletions src/server/_common.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from typing import cast
import time

from flask import Flask, g, request
from sqlalchemy import event
from sqlalchemy.engine import Connection
from werkzeug.local import LocalProxy

from .utils.logger import get_structured_logger
from ._config import SECRET
from ._db import engine
from ._exceptions import DatabaseErrorException
from ._exceptions import DatabaseErrorException, EpiDataException

app = Flask("EpiData", static_url_path="")
app.config["SECRET"] = SECRET
Expand All @@ -24,19 +27,53 @@ def _get_db() -> Connection:
"""
db: Connection = cast(Connection, LocalProxy(_get_db))

@event.listens_for(engine, "before_cursor_execute")
def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
context._query_start_time = time.time()


@event.listens_for(engine, "after_cursor_execute")
def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
# this timing info may be suspect, at least in terms of dbms cpu time...
# it is likely that it includes that time as well as any overhead that
# comes from throttling or flow control on the streamed data, as well as
# any row transform/processing time
total_time = time.time() - context._query_start_time

# Convert to milliseconds
total_time *= 1000
get_structured_logger('server_api').info("Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time)


@app.before_request
def connect_db():
def before_request_execute():
# Set timer for statement
g._request_start_time = time.time()

# Log statement
get_structured_logger('server_api').info("Received API request", method=request.method, url=request.url, form_args=request.form, req_length=request.content_length, remote_addr=request.remote_addr, user_agent=request.user_agent.string)

if request.path.startswith('/lib'):
return
# try to get the db
try:
_get_db()
except:
app.logger.error('database connection error', exc_info=True)
except Exception as e:
get_structured_logger('server_error').error('database connection error', exception=e)
raise DatabaseErrorException()


@app.after_request
def after_request_execute(response):
total_time = time.time() - g._request_start_time
# Convert to milliseconds
total_time *= 1000
get_structured_logger('server_api').info('Served API request', method=request.method, url=request.url, form_args=request.form, req_length=request.content_length, remote_addr=request.remote_addr, user_agent=request.user_agent.string,
values=request.values.to_dict(flat=False), blueprint=request.blueprint, endpoint=request.endpoint,
response_status=response.status, content_length=response.calculate_content_length(), elapsed_time_ms=total_time)
return response


@app.teardown_appcontext
def teardown_db(exception=None):
# close the db connection
Expand All @@ -46,6 +83,16 @@ def teardown_db(exception=None):
db.close()


@app.errorhandler(EpiDataException)
def handle_exception(e):
# Log error and pass through; EpiDataExceptions are HTTPExceptions which are valid WSGI responses (see https://werkzeug.palletsprojects.com/en/2.2.x/exceptions/ )
if isinstance(e, DatabaseErrorException):
get_structured_logger('server_error').error('Received DatabaseErrorException', exception=str(e), exc_info=True)
else:
get_structured_logger('server_error').warn('Encountered user-side error', exception=str(e))
return e


def is_compatibility_mode() -> bool:
"""
checks whether this request is in compatibility mode
Expand Down
2 changes: 1 addition & 1 deletion src/server/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

load_dotenv()

VERSION = "0.4.2"
VERSION = "0.4.3"

MAX_RESULTS = int(10e6)
MAX_COMPATIBILITY_RESULTS = int(3650)
Expand Down
5 changes: 3 additions & 2 deletions src/server/_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import orjson

from ._config import MAX_RESULTS, MAX_COMPATIBILITY_RESULTS
from ._common import app, is_compatibility_mode
from ._common import is_compatibility_mode
from .utils.logger import get_structured_logger


def print_non_standard(data):
Expand Down Expand Up @@ -58,7 +59,7 @@ def gen():
if r is not None:
yield r
except Exception as e:
app.logger.exception(f"error executing: {str(e)}")
get_structured_logger('server_error').error("Exception while executing printer", exception=e)
self.result = -1
yield self._error(e)

Expand Down
1 change: 0 additions & 1 deletion src/server/_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@ def run_query(p: APrinter, query_tuple: Tuple[str, Dict[str, Any]]):
query, params = query_tuple
# limit rows + 1 for detecting whether we would have more
full_query = text(limit_query(query, p.remaining_rows + 1))
app.logger.info("full_query: %s, params: %s", full_query, params)
return db.execution_options(stream_results=True).execute(full_query, **params)


Expand Down
6 changes: 4 additions & 2 deletions src/server/endpoints/covidcast_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from flask.json import loads
from sqlalchemy import text

from .._common import db, app
from .._common import db
from .._printer import create_printer
from .._query import filter_fields
from .._validate import extract_strings
from ..utils.logger import get_structured_logger

bp = Blueprint("covidcast_meta", __name__)

Expand Down Expand Up @@ -41,11 +42,12 @@ def fetch_data(
).fetchone()

if not row or not row["epidata"]:
get_structured_logger('server_api').warning("no data in covidcast_meta cache")
return

age = row["age"]
if age > max_age and row["epidata"]:
app.logger.warning("covidcast_meta cache is stale: %d", age)
get_structured_logger('server_api').warning("covidcast_meta cache is stale", cache_age=age)
pass

epidata = loads(row["epidata"])
Expand Down
3 changes: 1 addition & 2 deletions src/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,4 @@ def send_lib_file(path: str):
app.logger.handlers = gunicorn_logger.handlers
app.logger.setLevel(gunicorn_logger.level)
sqlalchemy_logger = logging.getLogger("sqlalchemy")
sqlalchemy_logger.handlers = gunicorn_logger.handlers
sqlalchemy_logger.setLevel(gunicorn_logger.level)
sqlalchemy_logger.setLevel(logging.WARN)
12 changes: 10 additions & 2 deletions src/server/utils/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
Tuple,
Union
)
from .logger import get_structured_logger
from datetime import date, timedelta
from epiweeks import Week, Year
from typing_extensions import TypeAlias
import logging

# Alias for a sequence of date ranges (int, int) or date integers
TimeValues: TypeAlias = Sequence[Union[Tuple[int, int], int]]
Expand Down Expand Up @@ -86,16 +86,22 @@ def time_values_to_ranges(values: Optional[TimeValues]) -> Optional[TimeValues]:
e.g. [20200101, 20200102, (20200101, 20200104), 20200106] -> [(20200101, 20200104), 20200106]
(the first two values of the original list are merged into a single range)
"""
logger = get_structured_logger('server_utils')
if not values or len(values) <= 1:
logger.info("List of dates looks like 0-1 elements, nothing to optimize", time_values=values)
return values

# determine whether the list is of days (YYYYMMDD) or weeks (YYYYWW) based on first element
first_element = values[0][0] if isinstance(values[0], tuple) else values[0]
if guess_time_value_is_day(first_element):
# TODO: reduce this and other date logging to DEBUG after prod metrics gathered
logger.info("Treating time value as day", time_value=first_element)
return days_to_ranges(values)
elif guess_time_value_is_week(first_element):
logger.info("Treating time value as week", time_value=first_element)
return weeks_to_ranges(values)
else:
logger.info("Time value unclear, not optimizing", time_value=first_element)
return values

def days_to_ranges(values: TimeValues) -> TimeValues:
Expand Down Expand Up @@ -138,7 +144,9 @@ def _to_ranges(values: TimeValues, value_to_date: Callable, date_to_value: Calla
else:
ranges.append((date_to_value(m[0]), date_to_value(m[1])))

get_structured_logger('server_utils').info("Optimized list of date values", original=values, optimized=ranges, original_length=len(values), optimized_length=len(ranges))

return ranges
except Exception as e:
logging.info('bad input to date ranges', input=values, exception=e)
get_structured_logger('server_utils').error('bad input to date ranges', time_values=values, exception=e)
return values
Loading