From 3a4994329c914b88aaed14ba5aa92d4a618eb73b Mon Sep 17 00:00:00 2001 From: george haff Date: Wed, 7 Jun 2023 15:35:11 -0400 Subject: [PATCH 1/2] properly handle and interpret X-Forwarded-For header --- src/server/_common.py | 27 +++++++++++++++++++++------ src/server/_config.py | 39 +++++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/server/_common.py b/src/server/_common.py index 8322f2da6..56d4c38ec 100644 --- a/src/server/_common.py +++ b/src/server/_common.py @@ -8,7 +8,7 @@ from werkzeug.local import LocalProxy from delphi.epidata.common.logger import get_structured_logger -from ._config import SECRET, REVERSE_PROXIED +from ._config import SECRET, REVERSE_PROXY_DEPTH from ._db import engine from ._exceptions import DatabaseErrorException, EpiDataException from ._security import current_user, _is_public_route, resolve_auth_token, show_no_api_key_warning, update_key_last_time_used, ERROR_MSG_INVALID_KEY @@ -31,14 +31,29 @@ def _get_db() -> Connection: def get_real_ip_addr(req): # `req` should be a Flask.request object - if REVERSE_PROXIED: - # NOTE: ONLY trust these headers if reverse proxied!!! + if REVERSE_PROXY_DEPTH: + # we only expect/trust (up to) "REVERSE_PROXY_DEPTH" number of proxies between this server and the outside world. + # a REVERSE_PROXY_DEPTH of 0 means not proxied, i.e. server is globally directly reachable. + # a negative proxy depth is a special case to trust the whole chain -- not generally recommended unless the + # most-external proxy is configured to disregard "X-Forwarded-For" from outside. + # really, ONLY trust the following headers if reverse proxied!!! if "X-Forwarded-For" in req.headers: - return req.headers["X-Forwarded-For"].split(",")[ - 0 - ] # take the first (or only) address from the comma-sep list + full_proxy_chain = req.headers["X-Forwarded-For"].split(",") + # eliminate any extra addresses at the front of this list, as they could be spoofed. + if REVERSE_PROXY_DEPTH > 0: + depth = REVERSE_PROXY_DEPTH + else: + # special case for -1/negative: setting `depth` to 0 will not strip any items from the chain + depth = 0 + trusted_proxy_chain = full_proxy_chain[-depth:] + # accept the first (or only) address in the remaining trusted part of the chain as the actual remote address + return trusted_proxy_chain[0].strip() + + # fall back to "X-Real-Ip" if "X-Forwarded-For" isnt present if "X-Real-Ip" in req.headers: return req.headers["X-Real-Ip"] + + # if we are not proxied (or we are proxied but the headers werent present and we fell through to here), just use the remote ip addr as the true client address return req.remote_addr diff --git a/src/server/_config.py b/src/server/_config.py index dadeaa99c..be5032b27 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -26,14 +26,37 @@ SECRET = os.environ.get("FLASK_SECRET", "secret") URL_PREFIX = os.environ.get("FLASK_PREFIX", "") # if not empty, this value should begin but not end in a slash ('/') -# REVERSE_PROXIED is a boolean value that indicates whether or not this server instance -# is running behind a reverse proxy (like nginx). -# in dev and testing, it is fine (or even preferable) for this variable to be set to 'TRUE' -# even if it is not actually the case. in prod, it is very important that this is set accurately -- -# it should _only_ be set to 'TRUE' if it really is behind a reverse proxy, as remote addresses can be "spoofed" -# which can carry security/identity implications. conversely, if this is set to 'FALSE' when in fact -# running behind a reverse proxy, it can hinder logging accuracy. it defaults to 'FALSE' for safety. -REVERSE_PROXIED = os.environ.get("REVERSE_PROXIED", "FALSE").upper() == "TRUE" + +""" +REVERSE_PROXY_DEPTH is an integer value that indicates how many "chained" and trusted reverse proxies (like nginx) this + server instance is running behind. "chained" refers to proxies forwarding to other proxies, and then ultimately + forwarding to the app server itself. each of these proxies appends the remote address of the request to the + "X-Forwarded-For" header. in many situations, the most externally facing proxy (the first in the chain, the one that + faces the "open internet") can and should be set to write its own "X-Forwarded-For" header, ignoring and replacing + (or creating anew, if it didnt exist) such a header from the client request -- thus preserving the chain of trusted + proxies under our control. + +however, in our typical production environment, the most externally facing "proxy" is the AWS application load balancer, + which seemingly cannot be configured to provide this trust boundary without losing the referring client address + (see: https://docs.aws.amazon.com/elasticloadbalancing/latest/application/x-forwarded-headers.html ). accordingly, in + our current typical production environment, REVERSE_PROXY_DEPTH should be set to "2": one for the AWS application load + balancer, and one for our own nginx/haproxy instance. thus "2" is our default value. + +it is important that REVERSE_PROXY_DEPTH is set accurately for two reasons... + +setting it too high (or to -1) will respect more of the entries in the "X-Forwarded-For" header than are appropriate. + this can allow remote addresses to be "spoofed" when a client fakes this header, carrying security/identity + implications. in dev and testing, it is not particularly dangerous for this variable to be set to -1 (special case + for an "infinite" depth, where any and all proxy hops will be trusted). + +setting it too low can hinder logging accuracy -- that can cause an intermediate proxy IP address to be used as the + "real" client IP address. + +setting REVERSE_PROXY_DEPTH to "0" essentially indicates there are no proxies between this server and the outside + world. in this case, the "X-Forwarded-For" header is ignored. +""" +REVERSE_PROXY_DEPTH = int(os.environ.get("PROXY_DEPTH", 2)) + REGION_TO_STATE = { "hhs1": ["VT", "CT", "ME", "MA", "NH", "RI"], From 8884f72a4f38a38dcf89e55e41f225b556334d8e Mon Sep 17 00:00:00 2001 From: melange396 Date: Wed, 7 Jun 2023 18:05:28 -0400 Subject: [PATCH 2/2] comment clarification Co-authored-by: Katie Mazaitis --- src/server/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/_config.py b/src/server/_config.py index be5032b27..5d807635c 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -50,7 +50,7 @@ for an "infinite" depth, where any and all proxy hops will be trusted). setting it too low can hinder logging accuracy -- that can cause an intermediate proxy IP address to be used as the - "real" client IP address. + "real" client IP address, which could cause requests to be rate-limited inappropriately. setting REVERSE_PROXY_DEPTH to "0" essentially indicates there are no proxies between this server and the outside world. in this case, the "X-Forwarded-For" header is ignored.