Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/cardinal_pythonlib/wsgi/reverse_proxied_mw.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# cardinal_pythonlib/wsgi/reverse_proxied_mw.py
4"""
5===============================================================================
7 Original code copyright (C) 2009-2021 Rudolf Cardinal (rudolf@pobox.com).
9 This file is part of cardinal_pythonlib.
11 Licensed under the Apache License, Version 2.0 (the "License");
12 you may not use this file except in compliance with the License.
13 You may obtain a copy of the License at
15 https://www.apache.org/licenses/LICENSE-2.0
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
23===============================================================================
25**Middleware to set SCRIPT_NAME environment variable etc. when behind a
26reverse proxy.**
28"""
30from pprint import pformat
31from typing import List
33from cardinal_pythonlib.dicts import dict_diff, delete_keys
34from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
35from cardinal_pythonlib.wsgi.constants import (
36 TYPE_WSGI_APP,
37 TYPE_WSGI_APP_RESULT,
38 TYPE_WSGI_ENVIRON,
39 TYPE_WSGI_START_RESPONSE,
40 WsgiEnvVar,
41)
43log = get_brace_style_log_with_null_handler(__name__)
46# =============================================================================
47# Helper functions for handling HTTP headers
48# =============================================================================
50def ip_addresses_from_xff(value: str) -> List[str]:
51 """
52 Returns a list of IP addresses (as strings), given the value of an HTTP
53 ``X-Forwarded-For`` (or ``WSGI HTTP_X_FORWARDED_FOR``) header.
55 Args:
56 value:
57 the value of an HTTP ``X-Forwarded-For`` (or ``WSGI
58 HTTP_X_FORWARDED_FOR``) header
60 Returns:
61 a list of IP address as strings
63 See:
64 - https://en.wikipedia.org/wiki/X-Forwarded-For
65 - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For # noqa
66 - NOT THIS: https://tools.ietf.org/html/rfc7239
67 """
68 if not value:
69 return []
70 return [x.strip() for x in value.split(",")]
71 # ... separator is comma-space, but let's be liberal
74def first_from_xff(value: str) -> str:
75 """
76 Returns the first IP address from an ``X-Forwarded-For`` header; see
77 :func:`ip_addresses_from_xff`.
79 Args:
80 value:
81 the value of an HTTP ``X-Forwarded-For`` (or ``WSGI
82 HTTP_X_FORWARDED_FOR``) header
84 Returns:
85 an IP address as a string, or ``''`` if none is found
87 """
88 ip_addresses = ip_addresses_from_xff(value)
89 if not ip_addresses:
90 return ''
91 return ip_addresses[0] # leftmost
94# =============================================================================
95# Middleware to set SCRIPT_NAME environment variable etc. when behind a
96# reverse proxy.
97# =============================================================================
99EXAMPLE_APACHE_REVERSE_PROXY_CONFIG = """
101 # =========================================================================
102 # Mount a WSGI application, using CamCOPS as an example
103 # =========================================================================
104 # This WSGI application is served by a SEPARATE web server (e.g. CherryPy);
105 # Apache just needs to pass information to and fro, and handle the HTTPS
106 # aspects.
108 # ---------------------------------------------------------------------
109 # 1. Proxy requests to the external server and back, and allow access.
110 # ---------------------------------------------------------------------
111 # ... either via port 8000
112 # ... or, better, socket /tmp/.camcops.sock
113 # NOTES
114 # - Don't specify trailing slashes.
115 # If you do, http://host/camcops will fail, though
116 # http://host/camcops/ will succeed.
117 # - Using a socket
118 # - this requires Apache 2.4.9, and passes after the '|' character a
119 # URL that determines the Host: value of the request; see
120 # https://httpd.apache.org/docs/trunk/mod/mod_proxy.html#proxypass
121 # - The Django debug toolbar will then require the bizarre entry in
122 # the Django settings: INTERNAL_IPS = ("b''", ) -- i.e. the string
123 # value of "b''", not an empty bytestring.
124 # - Ensure that you put the CORRECT PROTOCOL (e.g. https) in the rules
125 # below.
127 # (a) Proxy
129 # ... via a port
130 # Note the use of "http" (reflecting the backend), not https (like the
131 # front end).
132 ProxyPass /camcops http://127.0.0.1:8000 retry=0
133 ProxyPassReverse /camcops http://127.0.0.1:8000 retry=0
135 # ... or via a socket (Apache 2.4.9 and higher)
136 # ProxyPass /camcops unix:/tmp/.camcops.sock|https://localhost retry=0
137 # ProxyPassReverse /camcops unix:/tmp/.camcops.sock|https://localhost retry=0
139 # (b) Allow proxy over SSL.
140 # Without this, you will get errors like:
141 # ... SSL Proxy requested for wombat:443 but not enabled [Hint: SSLProxyEngine]
142 # ... failed to enable ssl support for 0.0.0.0:0 (httpd-UDS)
143 SSLProxyEngine on
145 <Location /camcops>
146 # (c) Allow access
147 Require all granted
149 # (d) Tell the proxied application that we are using HTTPS:
150 # ... https://stackoverflow.com/questions/16042647
151 # Enable mod_headers (e.g. "sudo a2enmod headers") and:
152 RequestHeader set X-Forwarded-Proto https
153 RequestHeader set X-Script-Name /camcops
154 </Location>
156 # ---------------------------------------------------------------------
157 # 2. Serve static files
158 # ---------------------------------------------------------------------
159 # a) offer them at the appropriate URL
160 # b) provide permission
162 Alias /camcops/static/ /usr/share/camcops/server/static/
164 # Change this: aim the alias at your own institutional logo.
165 # Alias /camcops/static/logo_local.png /usr/share/camcops/server/static/logo_local.png
167 <Directory /usr/share/camcops/server/static>
168 Require all granted
169 </Directory>
171""" # noqa
174class ReverseProxiedConfig(object):
175 """
176 Class to hold information about a reverse proxy configuration.
177 """
178 def __init__(self,
179 trusted_proxy_headers: List[str] = None,
180 http_host: str = None,
181 remote_addr: str = None,
182 script_name: str = None,
183 server_name: str = None,
184 server_port: int = None,
185 url_scheme: str = None,
186 rewrite_path_info: bool = False) -> None:
187 """
188 Args:
189 trusted_proxy_headers:
190 list of headers, from
191 :const:`ReverseProxiedMiddleware.ALL_CANDIDATES`, that the
192 middleware will treat as trusted and obey. All others from this
193 list will be stripped.
195 http_host:
196 Value to write to the ``HTTP_HOST`` WSGI variable. If not
197 specified, an appropriate trusted header will be used (if there
198 is one).
200 remote_addr:
201 ... similarly for ``REMOTE_ADDR``
203 script_name:
204 ... similarly for ``SCRIPT_NAME``
206 server_name:
207 ... similarly for ``SERVER_NAME``
209 server_port:
210 ... similarly for ``SERVER_PORT``
212 url_scheme:
213 ... similarly for ``URL_SCHEME`` (e.g. ``"https"``)
215 rewrite_path_info:
216 If ``True``, then if the middleware sets ``SCRIPT_NAME`` and
217 ``PATH_INFO`` starts with ``SCRIPT_NAME``, the ``SCRIPT_NAME``
218 will be stripped off the front of ``PATH_INFO``.
220 This is appropriate for front-end web servers that fail to
221 rewrite the incoming URL properly. (Do not use for Apache with
222 ``ProxyPass``; ``ProxyPass`` rewrites the URLs properly for
223 you.)
225 ... as per e.g. http://flask.pocoo.org/snippets/35/
226 """
227 self.trusted_proxy_headers = [] # type: List[str]
228 if trusted_proxy_headers:
229 for x in trusted_proxy_headers:
230 h = x.upper()
231 if h in ReverseProxiedMiddleware.ALL_CANDIDATES:
232 self.trusted_proxy_headers.append(h)
233 self.http_host = http_host
234 self.remote_addr = remote_addr
235 self.script_name = script_name.rstrip("/") if script_name else ""
236 self.server_name = server_name
237 self.server_port = str(server_port) if server_port is not None else ""
238 self.url_scheme = url_scheme.lower() if url_scheme else ""
239 self.rewrite_path_info = rewrite_path_info
241 def necessary(self) -> bool:
242 """
243 Is any special handling (e.g. the addition of
244 :class:`ReverseProxiedMiddleware`) necessary for thie config?
245 """
246 return any([
247 self.trusted_proxy_headers,
248 self.http_host,
249 self.remote_addr,
250 self.script_name,
251 self.server_name,
252 self.server_port,
253 self.url_scheme,
254 self.rewrite_path_info,
255 ])
258class ReverseProxiedMiddleware(object):
259 """
260 WSGI middleware to set the ``SCRIPT_NAME`` and ``PATH_INFO`` WSGI
261 environment variables (etc.) correctly when behind a reverse proxy.
263 Note that the WSGI environment variables ``HTTP_*`` are clones of HTTP
264 headers; for example, ``X-Forwarded-For`` in HTTP becomes
265 ``HTTP_X_FORWARDED_FOR`` in WSGI.
267 See also:
269 - http://flask.pocoo.org/snippets/35/
270 - http://blog.macuyiko.com/post/2016/fixing-flask-url_for-when-behind-mod_proxy.html
271 - http://alex.eftimie.ro/2013/03/21/how-to-run-flask-application-in-a-subpath-using-apache-mod_proxy/
272 - http://modwsgi.readthedocs.io/en/develop/release-notes/version-4.4.9.html
273 - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers
274 """ # noqa
276 CANDIDATES_HTTP_HOST = [
277 # These are variables that may contain a value for HTTP_HOST.
278 WsgiEnvVar.HTTP_X_HOST,
279 WsgiEnvVar.HTTP_X_FORWARDED_HOST
280 ]
281 CANDIDATES_SERVER_PORT = [
282 # These are variables that may contain a value for SERVER_PORT.
283 WsgiEnvVar.HTTP_X_FORWARDED_PORT
284 ]
285 CANDIDATES_REMOTE_ADDR = [
286 # These are variables that may contain a value for REMOTE_ADDR.
287 # However, they differ:
288 WsgiEnvVar.HTTP_X_FORWARDED_FOR, # may contain many values; first is taken # noqa
289 WsgiEnvVar.HTTP_X_REAL_IP # may contain only one
290 ]
291 _CANDIDATES_URL_SCHEME_GIVING_PROTOCOL = [
292 # These are variables whose values might be "http" or "https",
293 # relevant to wsgi.url_scheme.
294 WsgiEnvVar.HTTP_X_FORWARDED_PROTO,
295 WsgiEnvVar.HTTP_X_FORWARDED_PROTOCOL,
296 WsgiEnvVar.HTTP_X_FORWARDED_SCHEME,
297 WsgiEnvVar.HTTP_X_SCHEME
298 ]
299 _CANDIDATES_URL_SCHEME_INDICATING_HTTPS = [
300 # These are variables whose values might be "On", "True", or "1",
301 # to indicate the use of HTTPS/SSL, relevant to wsgi.url_scheme.
302 WsgiEnvVar.HTTP_X_FORWARDED_HTTPS,
303 WsgiEnvVar.HTTP_X_FORWARDED_SSL,
304 WsgiEnvVar.HTTP_X_HTTPS,
305 ]
306 CANDIDATES_URL_SCHEME = (
307 # All variables that may inform wsgi.url_scheme
308 _CANDIDATES_URL_SCHEME_GIVING_PROTOCOL +
309 _CANDIDATES_URL_SCHEME_INDICATING_HTTPS
310 )
311 CANDIDATES_SCRIPT_NAME = [
312 # These are variables that may contain a value for SCRIPT_NAME.
313 WsgiEnvVar.HTTP_X_SCRIPT_NAME,
314 WsgiEnvVar.HTTP_X_FORWARDED_SCRIPT_NAME
315 ]
316 CANDIDATES_SERVER_NAME = [
317 # These are variables that may contain a value for SERVER_NAME.
318 WsgiEnvVar.HTTP_X_FORWARDED_SERVER
319 ]
320 ALL_CANDIDATES = (
321 # All variables of interest.
322 CANDIDATES_HTTP_HOST +
323 CANDIDATES_SERVER_PORT +
324 CANDIDATES_REMOTE_ADDR +
325 _CANDIDATES_URL_SCHEME_GIVING_PROTOCOL +
326 _CANDIDATES_URL_SCHEME_INDICATING_HTTPS +
327 CANDIDATES_SCRIPT_NAME +
328 CANDIDATES_SERVER_NAME
329 )
330 SCHEME_HTTPS = 'https'
331 TRUE_VALUES_LOWER_CASE = ["on", "true", "1"]
333 def __init__(self,
334 app: TYPE_WSGI_APP,
335 config: ReverseProxiedConfig,
336 debug: bool = False) -> None:
337 self.app = app
338 self.config = config
339 self.debug = debug
341 self.vars_host = [x for x in self.CANDIDATES_HTTP_HOST
342 if x in config.trusted_proxy_headers]
343 self.vars_addr = [x for x in self.CANDIDATES_REMOTE_ADDR
344 if x in config.trusted_proxy_headers]
345 self.vars_script = [x for x in self.CANDIDATES_SCRIPT_NAME
346 if x in config.trusted_proxy_headers]
347 self.vars_server = [x for x in self.CANDIDATES_SERVER_NAME
348 if x in config.trusted_proxy_headers]
349 self.vars_port = [x for x in self.CANDIDATES_SERVER_PORT
350 if x in config.trusted_proxy_headers]
351 self.vars_scheme_a = [
352 x for x in self._CANDIDATES_URL_SCHEME_GIVING_PROTOCOL
353 if x in config.trusted_proxy_headers
354 ]
355 self.vars_scheme_b = [
356 x for x in self._CANDIDATES_URL_SCHEME_INDICATING_HTTPS
357 if x in config.trusted_proxy_headers
358 ]
360 if self.debug:
361 log.debug("ReverseProxiedMiddleware installed")
362 self._report(WsgiEnvVar.HTTP_HOST, config.http_host,
363 self.vars_host)
364 self._report(WsgiEnvVar.REMOTE_ADDR, config.remote_addr,
365 self.vars_addr)
366 self._report(WsgiEnvVar.SCRIPT_NAME, config.script_name,
367 self.vars_script)
368 if config.script_name or self.vars_script:
369 log.debug("... which will also affect WSGI environment "
370 "variable {}", WsgiEnvVar.PATH_INFO)
371 self._report(WsgiEnvVar.SERVER_NAME, config.server_name,
372 self.vars_server)
373 self._report(WsgiEnvVar.SERVER_PORT, config.server_port,
374 self.vars_port)
375 self._report(WsgiEnvVar.WSGI_URL_SCHEME, config.url_scheme,
376 self.vars_scheme_a + self.vars_scheme_b)
378 @staticmethod
379 def _report(option: str, value: str, envvars: List[str]) -> None:
380 if value:
381 log.debug("... WSGI environment variable {} will be set to {}",
382 option, value)
383 elif envvars:
384 log.debug("... WSGI environment variable {} will be set according "
385 "to reflect environment variables {!r} in "
386 "incoming requests",
387 option, envvars)
388 else:
389 log.debug("... WSGI environment variable {} will not be changed",
390 option)
392 @classmethod
393 def _get_first(cls,
394 environ: TYPE_WSGI_ENVIRON,
395 envvars: List[str],
396 keys_to_keep: List[str],
397 as_remote_addr: bool = False) -> str:
398 for k in envvars:
399 value = environ.get(k, '')
400 if value:
401 keys_to_keep.append(k)
402 # Oddity for REMOTE_ADDR and X-Forwarded-For:
403 if as_remote_addr and k == WsgiEnvVar.HTTP_X_FORWARDED_FOR:
404 value = first_from_xff(value)
405 return value
406 return ''
408 @classmethod
409 def _proto_if_one_true(cls,
410 environ: TYPE_WSGI_ENVIRON,
411 envvars: List[str],
412 keys_to_keep: List[str]) -> str:
413 for k in envvars:
414 value = environ.get(k, '')
415 if value.lower() in cls.TRUE_VALUES_LOWER_CASE:
416 keys_to_keep.append(k)
417 return cls.SCHEME_HTTPS
418 return ''
420 def __call__(self,
421 environ: TYPE_WSGI_ENVIRON,
422 start_response: TYPE_WSGI_START_RESPONSE) \
423 -> TYPE_WSGI_APP_RESULT:
424 """
425 -----------------------------------------------------------------------
426 REWRITING THE HOST (setting HTTP_HOST):
427 -----------------------------------------------------------------------
429 If you don't rewrite the host, the Pyramid debug toolbar will get
430 things a bit wrong. An example:
431 http://127.0.0.1:80/camcops
432 is proxied by Apache to
433 http://127.0.0.7:8000/camcops
435 In that situation, HTTP_HOST will be '127.0.0.1:8000', and so the
436 Pyramid debug toolbar will start asking the web browser to go to
437 http://127.0.0.1:8000/camcops/_debug_toolbar/...
438 ... which is wrong (it's a reference to the "internal" site).
440 If you allow the host to be rewritten, then you get a sensible
441 reference e.g. to
442 http://wombat/camcops/_debug_toolbar/...
444 Should we be looking at HTTP_X_FORWARDED_HOST or
445 HTTP_X_FORWARDED_SERVER?
446 See https://github.com/omnigroup/Apache/blob/master/httpd/modules/proxy/mod_proxy_http.c # noqa
447 ... and let's follow mod_wsgi.
449 -----------------------------------------------------------------------
450 HTTP_HOST versus SERVER_NAME
451 -----------------------------------------------------------------------
452 https://stackoverflow.com/questions/2297403/what-is-the-difference-between-http-host-and-server-name-in-php # noqa
454 -----------------------------------------------------------------------
455 REWRITING THE PROTOCOL
456 -----------------------------------------------------------------------
457 Consider how we get here. For example, we may have this sequence:
459 .. code-block:: none
461 user's web browser
462 -> Apache front-end web server via HTTPS on port 443
463 -> ProxyPass/ProxyPassReverse
464 -> CherryPy server via HTTP on port 8000 or via a Unix socket
465 -> ...
466 -> cherrypy/wsgiserver/__init__.py,
467 WSGIGateway_10.get_environ()
468 ... which creates a WSGI environment from an HTTP request.
470 So if you want to see what's coming by way of raw headers, put this
471 in at the end of that get_environ() function:
473 .. code-block:: python
475 from pprint import pformat; import logging; log = logging.getLogger(__name__); log.critical("Request headers:\n" + pformat(req.inheaders))
477 """ # noqa
478 if self.debug:
479 log.debug("Starting WSGI environment: \n{}", pformat(environ))
480 oldenv = environ.copy()
481 keys_to_keep = [] # type: List[str]
482 config = self.config
484 # ---------------------------------------------------------------------
486 # HTTP_HOST
487 http_host = (
488 config.http_host or # manually specified: top priority. Otherwise:
489 self._get_first(environ, self.vars_host, keys_to_keep)
490 )
491 if http_host:
492 environ[WsgiEnvVar.HTTP_HOST] = http_host
494 # REMOTE_ADDR
495 remote_addr = (
496 config.remote_addr or
497 self._get_first(environ, self.vars_addr, keys_to_keep,
498 as_remote_addr=True)
499 )
500 if remote_addr:
501 environ[WsgiEnvVar.REMOTE_ADDR] = remote_addr
503 # SCRIPT_NAME, PATH_INFO
504 script_name = (
505 config.script_name or
506 self._get_first(environ, self.vars_script, keys_to_keep)
507 )
508 if script_name:
509 environ[WsgiEnvVar.SCRIPT_NAME] = script_name
510 path_info = environ[WsgiEnvVar.PATH_INFO]
511 if config.rewrite_path_info and path_info.startswith(script_name):
512 newpath = path_info[len(script_name):]
513 if not newpath: # e.g. trailing slash omitted from incoming path # noqa
514 newpath = "/"
515 environ[WsgiEnvVar.PATH_INFO] = newpath
517 # SERVER_NAME
518 server_name = (
519 config.server_name or
520 self._get_first(environ, self.vars_server, keys_to_keep)
521 )
522 if server_name:
523 environ[WsgiEnvVar.SERVER_NAME] = server_name
525 # SERVER_PORT
526 server_port = (
527 config.server_port or
528 self._get_first(environ, self.vars_port, keys_to_keep)
529 )
530 if server_port:
531 environ[WsgiEnvVar.SERVER_PORT] = server_port
533 # wsgi.url_scheme
534 url_scheme = (
535 config.url_scheme or # manually specified: top priority. Otherwise:
536 self._get_first(environ, self.vars_scheme_a, keys_to_keep) or
537 self._proto_if_one_true(environ, self.vars_scheme_b, keys_to_keep)
538 )
539 if url_scheme:
540 url_scheme = url_scheme.lower()
541 environ[WsgiEnvVar.WSGI_URL_SCHEME] = url_scheme
543 # ---------------------------------------------------------------------
545 # As per mod_wsgi, we delete unused and untrusted keys.
546 delete_keys(environ,
547 keys_to_delete=self.ALL_CANDIDATES,
548 keys_to_keep=keys_to_keep)
549 if self.debug:
550 # noinspection PyUnboundLocalVariable
551 changes = dict_diff(oldenv, environ)
552 log.debug("Changes to WSGI environment: \n{}", pformat(changes))
553 return self.app(environ, start_response)