Skip to content

Commit c0efc6c

Browse files
authored
Merge pull request #806 from lee14257/lee14257/api_key
Implement API key scrubbing and structured logging
2 parents 2b02abe + 1b5bf2a commit c0efc6c

File tree

5 files changed

+136
-15
lines changed

5 files changed

+136
-15
lines changed

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ newrelic==6.8.0.163
1010
requests==2.26.0
1111
epiweeks==2.1.2
1212
Flask-Limiter==1.4
13-
redis==3.5.3
13+
redis==3.5.3
14+
structlog

src/server/_logger.py

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""Structured logger utility for creating JSON logs in web server logs"""
2+
import logging
3+
import sys
4+
import threading
5+
import structlog
6+
7+
8+
def handle_exceptions(logger):
9+
"""Handle exceptions using the provided logger."""
10+
def exception_handler(etype, value, traceback):
11+
logger.exception("Top-level exception occurred",
12+
exc_info=(etype, value, traceback))
13+
14+
def multithread_exception_handler(args):
15+
exception_handler(args.exc_type, args.exc_value, args.exc_traceback)
16+
17+
sys.excepthook = exception_handler
18+
threading.excepthook = multithread_exception_handler
19+
20+
21+
def get_structured_logger(name=__name__,
22+
filename=None,
23+
log_exceptions=True):
24+
"""Create a new structlog logger.
25+
26+
Use the logger returned from this in server code using the standard
27+
wrapper calls, e.g.:
28+
29+
logger = get_structured_logger(__name__)
30+
logger.warning("Error", type="Signal too low").
31+
32+
The output will be rendered as JSON which can easily be consumed by logs
33+
processors.
34+
35+
See the structlog documentation for details.
36+
37+
Parameters
38+
---------
39+
name: Name to use for logger (included in log lines), __name__ from caller
40+
is a good choice.
41+
filename: An (optional) file to write log output.
42+
"""
43+
# Configure the underlying logging configuration
44+
handlers = [logging.StreamHandler()]
45+
if filename:
46+
handlers.append(logging.FileHandler(filename))
47+
48+
logging.basicConfig(
49+
format="%(message)s",
50+
level=logging.INFO,
51+
handlers=handlers
52+
)
53+
54+
55+
56+
# Configure structlog. This uses many of the standard suggestions from
57+
# the structlog documentation.
58+
structlog.configure(
59+
processors=[
60+
# Filter out log levels we are not tracking.
61+
structlog.stdlib.filter_by_level,
62+
# Include logger name in output.
63+
structlog.stdlib.add_logger_name,
64+
# Include log level in output.
65+
structlog.stdlib.add_log_level,
66+
# Allow formatting into arguments e.g., logger.info("Hello, %s",
67+
# name)
68+
structlog.stdlib.PositionalArgumentsFormatter(),
69+
# Add timestamps.
70+
structlog.processors.TimeStamper(fmt="iso"),
71+
# Match support for exception logging in the standard logger.
72+
structlog.processors.StackInfoRenderer(),
73+
structlog.processors.format_exc_info,
74+
# Decode unicode characters
75+
structlog.processors.UnicodeDecoder(),
76+
# Render as JSON
77+
structlog.processors.JSONRenderer()
78+
],
79+
# Use a dict class for keeping track of data.
80+
context_class=dict,
81+
# Use a standard logger for the actual log call.
82+
logger_factory=structlog.stdlib.LoggerFactory(),
83+
# Use a standard wrapper class for utilities like log.warning()
84+
wrapper_class=structlog.stdlib.BoundLogger,
85+
# Cache the logger
86+
cache_logger_on_first_use=True,
87+
)
88+
89+
logger = structlog.get_logger(name)
90+
91+
if log_exceptions:
92+
handle_exceptions(logger)
93+
94+
return logger

src/server/_query.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def run_query(p: APrinter, query_tuple: Tuple[str, Dict[str, Any]]):
235235
query, params = query_tuple
236236
# limit rows + 1 for detecting whether we would have more
237237
full_query = text(f"{query} LIMIT {p.remaining_rows + 1}")
238-
current_user.log_info("full_query: %s, params: %s", full_query, params)
238+
current_user.log_info("Running query", full_query=str(full_query), params=params)
239239
return db.execution_options(stream_results=True).execute(full_query, **params)
240240

241241

src/server/_security.py

+37-12
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from ._common import app, request, db
1313
from ._exceptions import MissingAPIKeyException, UnAuthenticatedException
1414
from ._db import metadata, TABLE_OPTIONS
15+
from ._logger import get_structured_logger
16+
import re
1517

1618
API_KEY_HARD_WARNING = API_KEY_REQUIRED_STARTING_AT - timedelta(days=14)
1719
API_KEY_SOFT_WARNING = API_KEY_HARD_WARNING - timedelta(days=14)
@@ -130,8 +132,8 @@ class UserRole(str, Enum):
130132

131133
class User:
132134
api_key: str
133-
roles: Set[UserRole]
134135
authenticated: bool
136+
roles: Set[UserRole]
135137
tracking: bool = True
136138
registered: bool = True
137139

@@ -141,18 +143,31 @@ def __init__(self, api_key: str, authenticated: bool, roles: Set[UserRole], trac
141143
self.roles = roles
142144
self.tracking = tracking
143145
self.registered = registered
146+
147+
def get_apikey(self) -> str:
148+
return self.api_key
149+
150+
def is_authenticated(self) -> bool:
151+
return self.authenticated
144152

145153
def has_role(self, role: UserRole) -> bool:
146154
return role in self.roles
147155

148-
def log_info(self, msg: str, *args, **kwargs) -> None:
149-
if self.authenticated and self.tracking:
150-
app.logger.info(f"apikey: {self.api_key}, {msg}", *args, **kwargs)
151-
else:
152-
app.logger.info(msg, *args, **kwargs)
153-
154156
def is_rate_limited(self) -> bool:
155157
return not self.registered
158+
159+
def is_tracking(self) -> bool:
160+
return self.tracking
161+
162+
def log_info(self, msg: str, *args, **kwargs) -> None:
163+
logger = get_structured_logger("api_key_logs", filename="api_key_logs.log")
164+
if self.is_authenticated():
165+
if self.is_tracking():
166+
logger.info(msg, *args, **dict(kwargs, apikey=self.get_apikey()))
167+
else:
168+
logger.info(msg, *args, **dict(kwargs, apikey="*****"))
169+
else:
170+
logger.info(msg, *args, **kwargs)
156171

157172

158173
ANONYMOUS_USER = User("anonymous", False, set())
@@ -169,7 +184,6 @@ def _find_user(api_key: Optional[str]) -> User:
169184
return User(user.api_key, True, set(user.roles.split(",")), user.tracking, user.registered)
170185

171186
def resolve_auth_token() -> Optional[str]:
172-
# auth request param
173187
for name in ('auth', 'api_key', 'token'):
174188
if name in request.values:
175189
return request.values[name]
@@ -187,12 +201,23 @@ def _get_current_user() -> User:
187201
if "user" not in g:
188202
api_key = resolve_auth_token()
189203
user = _find_user(api_key)
190-
if not user.authenticated and require_api_key():
204+
request_path = request.full_path
205+
if not user.is_authenticated() and require_api_key():
191206
raise MissingAPIKeyException()
192-
user.log_info(request.full_path)
207+
# If the user configured no-track option, mask the API key
208+
if not user.is_tracking():
209+
request_path = mask_apikey(request_path)
210+
user.log_info("Get path", path=request_path)
193211
g.user = user
194212
return g.user
195213

214+
def mask_apikey(path: str) -> str:
215+
# Function to mask API key query string from a request path
216+
regexp = re.compile(r'[\\?&]api_key=([^&#]*)')
217+
if regexp.search(path):
218+
path = re.sub(regexp, "&api_key=*****", path)
219+
return path
220+
196221

197222
current_user: User = cast(User, LocalProxy(_get_current_user))
198223

@@ -204,12 +229,12 @@ def require_api_key() -> bool:
204229

205230
def show_soft_api_key_warning() -> bool:
206231
n = date.today()
207-
return not current_user.authenticated and not app.config.get('TESTING', False) and n > API_KEY_SOFT_WARNING and n < API_KEY_HARD_WARNING
232+
return not current_user.is_authenticated() and not app.config.get('TESTING', False) and n > API_KEY_SOFT_WARNING and n < API_KEY_HARD_WARNING
208233

209234

210235
def show_hard_api_key_warning() -> bool:
211236
n = date.today()
212-
return not current_user.authenticated and not app.config.get('TESTING', False) and n > API_KEY_HARD_WARNING
237+
return not current_user.is_authenticated() and not app.config.get('TESTING', False) and n > API_KEY_HARD_WARNING
213238

214239

215240
def _is_public_route() -> bool:

src/server/main.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,5 @@ def send_lib_file(path: str):
6969
app.logger.setLevel(gunicorn_logger.level)
7070
sqlalchemy_logger = logging.getLogger("sqlalchemy")
7171
sqlalchemy_logger.handlers = gunicorn_logger.handlers
72-
sqlalchemy_logger.setLevel(gunicorn_logger.level)
72+
# Change SQLAlchemy logging level to "ERROR" in order to prevent query details of API keys from being logged
73+
sqlalchemy_logger.setLevel(logging.ERROR)

0 commit comments

Comments
 (0)