Coverage for /Users/davegaeddert/Development/dropseed/plain/plain/plain/http/request.py: 59%
399 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-17 22:03 -0500
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-17 22:03 -0500
1import codecs
2import copy
3import uuid
4from io import BytesIO
5from itertools import chain
6from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit
8from plain import signing
9from plain.exceptions import (
10 DisallowedHost,
11 ImproperlyConfigured,
12 RequestDataTooBig,
13 TooManyFieldsSent,
14)
15from plain.http.multipartparser import (
16 MultiPartParser,
17 MultiPartParserError,
18 TooManyFilesSent,
19)
20from plain.internal.files import uploadhandler
21from plain.runtime import settings
22from plain.utils.datastructures import (
23 CaseInsensitiveMapping,
24 ImmutableList,
25 MultiValueDict,
26)
27from plain.utils.encoding import escape_uri_path, iri_to_uri
28from plain.utils.functional import cached_property
29from plain.utils.http import is_same_domain, parse_header_parameters
30from plain.utils.regex_helper import _lazy_re_compile
32RAISE_ERROR = object()
33host_validation_re = _lazy_re_compile(
34 r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:[0-9]+)?$"
35)
38class UnreadablePostError(OSError):
39 pass
42class RawPostDataException(Exception):
43 """
44 You cannot access raw_post_data from a request that has
45 multipart/* POST data if it has been accessed via POST,
46 FILES, etc..
47 """
49 pass
52class HttpRequest:
53 """A basic HTTP request."""
55 # The encoding used in GET/POST dicts. None means use default setting.
56 _encoding = None
57 _upload_handlers = []
59 non_picklable_attrs = frozenset(["resolver_match", "_stream"])
61 def __init__(self):
62 # WARNING: The `WSGIRequest` subclass doesn't call `super`.
63 # Any variable assignment made here should also happen in
64 # `WSGIRequest.__init__()`.
66 # A unique ID we can use to trace this request
67 self.unique_id = str(uuid.uuid4())
69 self.GET = QueryDict(mutable=True)
70 self.POST = QueryDict(mutable=True)
71 self.COOKIES = {}
72 self.META = {}
73 self.FILES = MultiValueDict()
75 self.path = ""
76 self.path_info = ""
77 self.method = None
78 self.resolver_match = None
79 self.content_type = None
80 self.content_params = None
82 def __repr__(self):
83 if self.method is None or not self.get_full_path():
84 return "<%s>" % self.__class__.__name__
85 return f"<{self.__class__.__name__}: {self.method} {self.get_full_path()!r}>"
87 def __getstate__(self):
88 obj_dict = self.__dict__.copy()
89 for attr in self.non_picklable_attrs:
90 if attr in obj_dict:
91 del obj_dict[attr]
92 return obj_dict
94 def __deepcopy__(self, memo):
95 obj = copy.copy(self)
96 for attr in self.non_picklable_attrs:
97 if hasattr(self, attr):
98 setattr(obj, attr, copy.deepcopy(getattr(self, attr), memo))
99 memo[id(self)] = obj
100 return obj
102 @cached_property
103 def headers(self):
104 return HttpHeaders(self.META)
106 @cached_property
107 def accepted_types(self):
108 """Return a list of MediaType instances."""
109 return parse_accept_header(self.headers.get("Accept", "*/*"))
111 def accepts(self, media_type):
112 return any(
113 accepted_type.match(media_type) for accepted_type in self.accepted_types
114 )
116 def _set_content_type_params(self, meta):
117 """Set content_type, content_params, and encoding."""
118 self.content_type, self.content_params = parse_header_parameters(
119 meta.get("CONTENT_TYPE", "")
120 )
121 if "charset" in self.content_params:
122 try:
123 codecs.lookup(self.content_params["charset"])
124 except LookupError:
125 pass
126 else:
127 self.encoding = self.content_params["charset"]
129 def _get_raw_host(self):
130 """
131 Return the HTTP host using the environment or request headers. Skip
132 allowed hosts protection, so may return an insecure host.
133 """
134 # We try three options, in order of decreasing preference.
135 if settings.USE_X_FORWARDED_HOST and ("HTTP_X_FORWARDED_HOST" in self.META):
136 host = self.META["HTTP_X_FORWARDED_HOST"]
137 elif "HTTP_HOST" in self.META:
138 host = self.META["HTTP_HOST"]
139 else:
140 # Reconstruct the host using the algorithm from PEP 333.
141 host = self.META["SERVER_NAME"]
142 server_port = self.get_port()
143 if server_port != ("443" if self.is_https() else "80"):
144 host = f"{host}:{server_port}"
145 return host
147 def get_host(self):
148 """Return the HTTP host using the environment or request headers."""
149 host = self._get_raw_host()
151 # Allow variants of localhost if ALLOWED_HOSTS is empty and DEBUG=True.
152 allowed_hosts = settings.ALLOWED_HOSTS
153 if settings.DEBUG and not allowed_hosts:
154 allowed_hosts = [".localhost", "127.0.0.1", "[::1]"]
156 domain, port = split_domain_port(host)
157 if domain and validate_host(domain, allowed_hosts):
158 return host
159 else:
160 msg = "Invalid HTTP_HOST header: %r." % host
161 if domain:
162 msg += " You may need to add %r to ALLOWED_HOSTS." % domain
163 else:
164 msg += (
165 " The domain name provided is not valid according to RFC 1034/1035."
166 )
167 raise DisallowedHost(msg)
169 def get_port(self):
170 """Return the port number for the request as a string."""
171 if settings.USE_X_FORWARDED_PORT and "HTTP_X_FORWARDED_PORT" in self.META:
172 port = self.META["HTTP_X_FORWARDED_PORT"]
173 else:
174 port = self.META["SERVER_PORT"]
175 return str(port)
177 def get_full_path(self, force_append_slash=False):
178 return self._get_full_path(self.path, force_append_slash)
180 def get_full_path_info(self, force_append_slash=False):
181 return self._get_full_path(self.path_info, force_append_slash)
183 def _get_full_path(self, path, force_append_slash):
184 # RFC 3986 requires query string arguments to be in the ASCII range.
185 # Rather than crash if this doesn't happen, we encode defensively.
186 return "{}{}{}".format(
187 escape_uri_path(path),
188 "/" if force_append_slash and not path.endswith("/") else "",
189 ("?" + iri_to_uri(self.META.get("QUERY_STRING", "")))
190 if self.META.get("QUERY_STRING", "")
191 else "",
192 )
194 def get_signed_cookie(self, key, default=RAISE_ERROR, salt="", max_age=None):
195 """
196 Attempt to return a signed cookie. If the signature fails or the
197 cookie has expired, raise an exception, unless the `default` argument
198 is provided, in which case return that value.
199 """
200 try:
201 cookie_value = self.COOKIES[key]
202 except KeyError:
203 if default is not RAISE_ERROR:
204 return default
205 else:
206 raise
207 try:
208 value = signing.get_cookie_signer(salt=key + salt).unsign(
209 cookie_value, max_age=max_age
210 )
211 except signing.BadSignature:
212 if default is not RAISE_ERROR:
213 return default
214 else:
215 raise
216 return value
218 def build_absolute_uri(self, location=None):
219 """
220 Build an absolute URI from the location and the variables available in
221 this request. If no ``location`` is specified, build the absolute URI
222 using request.get_full_path(). If the location is absolute, convert it
223 to an RFC 3987 compliant URI and return it. If location is relative or
224 is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base
225 URL constructed from the request variables.
226 """
227 if location is None:
228 # Make it an absolute url (but schemeless and domainless) for the
229 # edge case that the path starts with '//'.
230 location = "//%s" % self.get_full_path()
231 else:
232 # Coerce lazy locations.
233 location = str(location)
234 bits = urlsplit(location)
235 if not (bits.scheme and bits.netloc):
236 # Handle the simple, most common case. If the location is absolute
237 # and a scheme or host (netloc) isn't provided, skip an expensive
238 # urljoin() as long as no path segments are '.' or '..'.
239 if (
240 bits.path.startswith("/")
241 and not bits.scheme
242 and not bits.netloc
243 and "/./" not in bits.path
244 and "/../" not in bits.path
245 ):
246 # If location starts with '//' but has no netloc, reuse the
247 # schema and netloc from the current request. Strip the double
248 # slashes and continue as if it wasn't specified.
249 location = self._current_scheme_host + location.removeprefix("//")
250 else:
251 # Join the constructed URL with the provided location, which
252 # allows the provided location to apply query strings to the
253 # base path.
254 location = urljoin(self._current_scheme_host + self.path, location)
255 return iri_to_uri(location)
257 @cached_property
258 def _current_scheme_host(self):
259 return f"{self.scheme}://{self.get_host()}"
261 def _get_scheme(self):
262 """
263 Hook for subclasses like WSGIRequest to implement. Return 'http' by
264 default.
265 """
266 return "http"
268 @property
269 def scheme(self):
270 if settings.HTTPS_PROXY_HEADER:
271 try:
272 header, secure_value = settings.HTTPS_PROXY_HEADER
273 except ValueError:
274 raise ImproperlyConfigured(
275 "The HTTPS_PROXY_HEADER setting must be a tuple containing "
276 "two values."
277 )
278 header_value = self.META.get(header)
279 if header_value is not None:
280 header_value, *_ = header_value.split(",", 1)
281 return "https" if header_value.strip() == secure_value else "http"
282 return self._get_scheme()
284 def is_https(self):
285 return self.scheme == "https"
287 @property
288 def encoding(self):
289 return self._encoding
291 @encoding.setter
292 def encoding(self, val):
293 """
294 Set the encoding used for GET/POST accesses. If the GET or POST
295 dictionary has already been created, remove and recreate it on the
296 next access (so that it is decoded correctly).
297 """
298 self._encoding = val
299 if hasattr(self, "GET"):
300 del self.GET
301 if hasattr(self, "_post"):
302 del self._post
304 def _initialize_handlers(self):
305 self._upload_handlers = [
306 uploadhandler.load_handler(handler, self)
307 for handler in settings.FILE_UPLOAD_HANDLERS
308 ]
310 @property
311 def upload_handlers(self):
312 if not self._upload_handlers:
313 # If there are no upload handlers defined, initialize them from settings.
314 self._initialize_handlers()
315 return self._upload_handlers
317 @upload_handlers.setter
318 def upload_handlers(self, upload_handlers):
319 if hasattr(self, "_files"):
320 raise AttributeError(
321 "You cannot set the upload handlers after the upload has been "
322 "processed."
323 )
324 self._upload_handlers = upload_handlers
326 def parse_file_upload(self, META, post_data):
327 """Return a tuple of (POST QueryDict, FILES MultiValueDict)."""
328 self.upload_handlers = ImmutableList(
329 self.upload_handlers,
330 warning=(
331 "You cannot alter upload handlers after the upload has been "
332 "processed."
333 ),
334 )
335 parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
336 return parser.parse()
338 @property
339 def body(self):
340 if not hasattr(self, "_body"):
341 if self._read_started:
342 raise RawPostDataException(
343 "You cannot access body after reading from request's data stream"
344 )
346 # Limit the maximum request data size that will be handled in-memory.
347 if (
348 settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None
349 and int(self.META.get("CONTENT_LENGTH") or 0)
350 > settings.DATA_UPLOAD_MAX_MEMORY_SIZE
351 ):
352 raise RequestDataTooBig(
353 "Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE."
354 )
356 try:
357 self._body = self.read()
358 except OSError as e:
359 raise UnreadablePostError(*e.args) from e
360 finally:
361 self._stream.close()
362 self._stream = BytesIO(self._body)
363 return self._body
365 def _mark_post_parse_error(self):
366 self._post = QueryDict()
367 self._files = MultiValueDict()
369 def _load_post_and_files(self):
370 """Populate self._post and self._files if the content-type is a form type"""
371 if self.method != "POST":
372 self._post, self._files = (
373 QueryDict(encoding=self._encoding),
374 MultiValueDict(),
375 )
376 return
377 if self._read_started and not hasattr(self, "_body"):
378 self._mark_post_parse_error()
379 return
381 if self.content_type == "multipart/form-data":
382 if hasattr(self, "_body"):
383 # Use already read data
384 data = BytesIO(self._body)
385 else:
386 data = self
387 try:
388 self._post, self._files = self.parse_file_upload(self.META, data)
389 except (MultiPartParserError, TooManyFilesSent):
390 # An error occurred while parsing POST data. Since when
391 # formatting the error the request handler might access
392 # self.POST, set self._post and self._file to prevent
393 # attempts to parse POST data again.
394 self._mark_post_parse_error()
395 raise
396 elif self.content_type == "application/x-www-form-urlencoded":
397 self._post, self._files = (
398 QueryDict(self.body, encoding=self._encoding),
399 MultiValueDict(),
400 )
401 else:
402 self._post, self._files = (
403 QueryDict(encoding=self._encoding),
404 MultiValueDict(),
405 )
407 def close(self):
408 if hasattr(self, "_files"):
409 for f in chain.from_iterable(list_[1] for list_ in self._files.lists()):
410 f.close()
412 # File-like and iterator interface.
413 #
414 # Expects self._stream to be set to an appropriate source of bytes by
415 # a corresponding request subclass (e.g. WSGIRequest).
416 # Also when request data has already been read by request.POST or
417 # request.body, self._stream points to a BytesIO instance
418 # containing that data.
420 def read(self, *args, **kwargs):
421 self._read_started = True
422 try:
423 return self._stream.read(*args, **kwargs)
424 except OSError as e:
425 raise UnreadablePostError(*e.args) from e
427 def readline(self, *args, **kwargs):
428 self._read_started = True
429 try:
430 return self._stream.readline(*args, **kwargs)
431 except OSError as e:
432 raise UnreadablePostError(*e.args) from e
434 def __iter__(self):
435 return iter(self.readline, b"")
437 def readlines(self):
438 return list(self)
441class HttpHeaders(CaseInsensitiveMapping):
442 HTTP_PREFIX = "HTTP_"
443 # PEP 333 gives two headers which aren't prepended with HTTP_.
444 UNPREFIXED_HEADERS = {"CONTENT_TYPE", "CONTENT_LENGTH"}
446 def __init__(self, environ):
447 headers = {}
448 for header, value in environ.items():
449 name = self.parse_header_name(header)
450 if name:
451 headers[name] = value
452 super().__init__(headers)
454 def __getitem__(self, key):
455 """Allow header lookup using underscores in place of hyphens."""
456 return super().__getitem__(key.replace("_", "-"))
458 @classmethod
459 def parse_header_name(cls, header):
460 if header.startswith(cls.HTTP_PREFIX):
461 header = header.removeprefix(cls.HTTP_PREFIX)
462 elif header not in cls.UNPREFIXED_HEADERS:
463 return None
464 return header.replace("_", "-").title()
466 @classmethod
467 def to_wsgi_name(cls, header):
468 header = header.replace("-", "_").upper()
469 if header in cls.UNPREFIXED_HEADERS:
470 return header
471 return f"{cls.HTTP_PREFIX}{header}"
473 @classmethod
474 def to_asgi_name(cls, header):
475 return header.replace("-", "_").upper()
477 @classmethod
478 def to_wsgi_names(cls, headers):
479 return {
480 cls.to_wsgi_name(header_name): value
481 for header_name, value in headers.items()
482 }
484 @classmethod
485 def to_asgi_names(cls, headers):
486 return {
487 cls.to_asgi_name(header_name): value
488 for header_name, value in headers.items()
489 }
492class QueryDict(MultiValueDict):
493 """
494 A specialized MultiValueDict which represents a query string.
496 A QueryDict can be used to represent GET or POST data. It subclasses
497 MultiValueDict since keys in such data can be repeated, for instance
498 in the data from a form with a <select multiple> field.
500 By default QueryDicts are immutable, though the copy() method
501 will always return a mutable copy.
503 Both keys and values set on this class are converted from the given encoding
504 (DEFAULT_CHARSET by default) to str.
505 """
507 # These are both reset in __init__, but is specified here at the class
508 # level so that unpickling will have valid values
509 _mutable = True
510 _encoding = None
512 def __init__(self, query_string=None, mutable=False, encoding=None):
513 super().__init__()
514 self.encoding = encoding or settings.DEFAULT_CHARSET
515 query_string = query_string or ""
516 parse_qsl_kwargs = {
517 "keep_blank_values": True,
518 "encoding": self.encoding,
519 "max_num_fields": settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
520 }
521 if isinstance(query_string, bytes):
522 # query_string normally contains URL-encoded data, a subset of ASCII.
523 try:
524 query_string = query_string.decode(self.encoding)
525 except UnicodeDecodeError:
526 # ... but some user agents are misbehaving :-(
527 query_string = query_string.decode("iso-8859-1")
528 try:
529 for key, value in parse_qsl(query_string, **parse_qsl_kwargs):
530 self.appendlist(key, value)
531 except ValueError as e:
532 # ValueError can also be raised if the strict_parsing argument to
533 # parse_qsl() is True. As that is not used by Plain, assume that
534 # the exception was raised by exceeding the value of max_num_fields
535 # instead of fragile checks of exception message strings.
536 raise TooManyFieldsSent(
537 "The number of GET/POST parameters exceeded "
538 "settings.DATA_UPLOAD_MAX_NUMBER_FIELDS."
539 ) from e
540 self._mutable = mutable
542 @classmethod
543 def fromkeys(cls, iterable, value="", mutable=False, encoding=None):
544 """
545 Return a new QueryDict with keys (may be repeated) from an iterable and
546 values from value.
547 """
548 q = cls("", mutable=True, encoding=encoding)
549 for key in iterable:
550 q.appendlist(key, value)
551 if not mutable:
552 q._mutable = False
553 return q
555 @property
556 def encoding(self):
557 if self._encoding is None:
558 self._encoding = settings.DEFAULT_CHARSET
559 return self._encoding
561 @encoding.setter
562 def encoding(self, value):
563 self._encoding = value
565 def _assert_mutable(self):
566 if not self._mutable:
567 raise AttributeError("This QueryDict instance is immutable")
569 def __setitem__(self, key, value):
570 self._assert_mutable()
571 key = bytes_to_text(key, self.encoding)
572 value = bytes_to_text(value, self.encoding)
573 super().__setitem__(key, value)
575 def __delitem__(self, key):
576 self._assert_mutable()
577 super().__delitem__(key)
579 def __copy__(self):
580 result = self.__class__("", mutable=True, encoding=self.encoding)
581 for key, value in self.lists():
582 result.setlist(key, value)
583 return result
585 def __deepcopy__(self, memo):
586 result = self.__class__("", mutable=True, encoding=self.encoding)
587 memo[id(self)] = result
588 for key, value in self.lists():
589 result.setlist(copy.deepcopy(key, memo), copy.deepcopy(value, memo))
590 return result
592 def setlist(self, key, list_):
593 self._assert_mutable()
594 key = bytes_to_text(key, self.encoding)
595 list_ = [bytes_to_text(elt, self.encoding) for elt in list_]
596 super().setlist(key, list_)
598 def setlistdefault(self, key, default_list=None):
599 self._assert_mutable()
600 return super().setlistdefault(key, default_list)
602 def appendlist(self, key, value):
603 self._assert_mutable()
604 key = bytes_to_text(key, self.encoding)
605 value = bytes_to_text(value, self.encoding)
606 super().appendlist(key, value)
608 def pop(self, key, *args):
609 self._assert_mutable()
610 return super().pop(key, *args)
612 def popitem(self):
613 self._assert_mutable()
614 return super().popitem()
616 def clear(self):
617 self._assert_mutable()
618 super().clear()
620 def setdefault(self, key, default=None):
621 self._assert_mutable()
622 key = bytes_to_text(key, self.encoding)
623 default = bytes_to_text(default, self.encoding)
624 return super().setdefault(key, default)
626 def copy(self):
627 """Return a mutable copy of this object."""
628 return self.__deepcopy__({})
630 def urlencode(self, safe=None):
631 """
632 Return an encoded string of all query string arguments.
634 `safe` specifies characters which don't require quoting, for example::
636 >>> q = QueryDict(mutable=True)
637 >>> q['next'] = '/a&b/'
638 >>> q.urlencode()
639 'next=%2Fa%26b%2F'
640 >>> q.urlencode(safe='/')
641 'next=/a%26b/'
642 """
643 output = []
644 if safe:
645 safe = safe.encode(self.encoding)
647 def encode(k, v):
648 return f"{quote(k, safe)}={quote(v, safe)}"
650 else:
652 def encode(k, v):
653 return urlencode({k: v})
655 for k, list_ in self.lists():
656 output.extend(
657 encode(k.encode(self.encoding), str(v).encode(self.encoding))
658 for v in list_
659 )
660 return "&".join(output)
663class MediaType:
664 def __init__(self, media_type_raw_line):
665 full_type, self.params = parse_header_parameters(
666 media_type_raw_line if media_type_raw_line else ""
667 )
668 self.main_type, _, self.sub_type = full_type.partition("/")
670 def __str__(self):
671 params_str = "".join(f"; {k}={v}" for k, v in self.params.items())
672 return "{}{}{}".format(
673 self.main_type,
674 ("/%s" % self.sub_type) if self.sub_type else "",
675 params_str,
676 )
678 def __repr__(self):
679 return f"<{self.__class__.__qualname__}: {self}>"
681 @property
682 def is_all_types(self):
683 return self.main_type == "*" and self.sub_type == "*"
685 def match(self, other):
686 if self.is_all_types:
687 return True
688 other = MediaType(other)
689 if self.main_type == other.main_type and self.sub_type in {"*", other.sub_type}:
690 return True
691 return False
694# It's neither necessary nor appropriate to use
695# plain.utils.encoding.force_str() for parsing URLs and form inputs. Thus,
696# this slightly more restricted function, used by QueryDict.
697def bytes_to_text(s, encoding):
698 """
699 Convert bytes objects to strings, using the given encoding. Illegally
700 encoded input characters are replaced with Unicode "unknown" codepoint
701 (\ufffd).
703 Return any non-bytes objects without change.
704 """
705 if isinstance(s, bytes):
706 return str(s, encoding, "replace")
707 else:
708 return s
711def split_domain_port(host):
712 """
713 Return a (domain, port) tuple from a given host.
715 Returned domain is lowercased. If the host is invalid, the domain will be
716 empty.
717 """
718 host = host.lower()
720 if not host_validation_re.match(host):
721 return "", ""
723 if host[-1] == "]":
724 # It's an IPv6 address without a port.
725 return host, ""
726 bits = host.rsplit(":", 1)
727 domain, port = bits if len(bits) == 2 else (bits[0], "")
728 # Remove a trailing dot (if present) from the domain.
729 domain = domain.removesuffix(".")
730 return domain, port
733def validate_host(host, allowed_hosts):
734 """
735 Validate the given host for this site.
737 Check that the host looks valid and matches a host or host pattern in the
738 given list of ``allowed_hosts``. Any pattern beginning with a period
739 matches a domain and all its subdomains (e.g. ``.example.com`` matches
740 ``example.com`` and any subdomain), ``*`` matches anything, and anything
741 else must match exactly.
743 Note: This function assumes that the given host is lowercased and has
744 already had the port, if any, stripped off.
746 Return ``True`` for a valid host, ``False`` otherwise.
747 """
748 return any(
749 pattern == "*" or is_same_domain(host, pattern) for pattern in allowed_hosts
750 )
753def parse_accept_header(header):
754 return [MediaType(token) for token in header.split(",") if token.strip()]