Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/requests/utils.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
3"""
4requests.utils
5~~~~~~~~~~~~~~
7This module provides utility functions that are used within Requests
8that are also useful for external consumption.
9"""
11import codecs
12import contextlib
13import io
14import os
15import re
16import socket
17import struct
18import sys
19import tempfile
20import warnings
21import zipfile
22from collections import OrderedDict
24from .__version__ import __version__
25from . import certs
26# to_native_string is unused here, but imported here for backwards compatibility
27from ._internal_utils import to_native_string
28from .compat import parse_http_list as _parse_list_header
29from .compat import (
30 quote, urlparse, bytes, str, unquote, getproxies,
31 proxy_bypass, urlunparse, basestring, integer_types, is_py3,
32 proxy_bypass_environment, getproxies_environment, Mapping)
33from .cookies import cookiejar_from_dict
34from .structures import CaseInsensitiveDict
35from .exceptions import (
36 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
38NETRC_FILES = ('.netrc', '_netrc')
40DEFAULT_CA_BUNDLE_PATH = certs.where()
42DEFAULT_PORTS = {'http': 80, 'https': 443}
45if sys.platform == 'win32':
46 # provide a proxy_bypass version on Windows without DNS lookups
48 def proxy_bypass_registry(host):
49 try:
50 if is_py3:
51 import winreg
52 else:
53 import _winreg as winreg
54 except ImportError:
55 return False
57 try:
58 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
59 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
60 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
61 proxyEnable = int(winreg.QueryValueEx(internetSettings,
62 'ProxyEnable')[0])
63 # ProxyOverride is almost always a string
64 proxyOverride = winreg.QueryValueEx(internetSettings,
65 'ProxyOverride')[0]
66 except OSError:
67 return False
68 if not proxyEnable or not proxyOverride:
69 return False
71 # make a check value list from the registry entry: replace the
72 # '<local>' string by the localhost entry and the corresponding
73 # canonical entry.
74 proxyOverride = proxyOverride.split(';')
75 # now check if we match one of the registry values.
76 for test in proxyOverride:
77 if test == '<local>':
78 if '.' not in host:
79 return True
80 test = test.replace(".", r"\.") # mask dots
81 test = test.replace("*", r".*") # change glob sequence
82 test = test.replace("?", r".") # change glob char
83 if re.match(test, host, re.I):
84 return True
85 return False
87 def proxy_bypass(host): # noqa
88 """Return True, if the host should be bypassed.
90 Checks proxy settings gathered from the environment, if specified,
91 or the registry.
92 """
93 if getproxies_environment():
94 return proxy_bypass_environment(host)
95 else:
96 return proxy_bypass_registry(host)
99def dict_to_sequence(d):
100 """Returns an internal sequence dictionary update."""
102 if hasattr(d, 'items'):
103 d = d.items()
105 return d
108def super_len(o):
109 total_length = None
110 current_position = 0
112 if hasattr(o, '__len__'):
113 total_length = len(o)
115 elif hasattr(o, 'len'):
116 total_length = o.len
118 elif hasattr(o, 'fileno'):
119 try:
120 fileno = o.fileno()
121 except io.UnsupportedOperation:
122 pass
123 else:
124 total_length = os.fstat(fileno).st_size
126 # Having used fstat to determine the file length, we need to
127 # confirm that this file was opened up in binary mode.
128 if 'b' not in o.mode:
129 warnings.warn((
130 "Requests has determined the content-length for this "
131 "request using the binary size of the file: however, the "
132 "file has been opened in text mode (i.e. without the 'b' "
133 "flag in the mode). This may lead to an incorrect "
134 "content-length. In Requests 3.0, support will be removed "
135 "for files in text mode."),
136 FileModeWarning
137 )
139 if hasattr(o, 'tell'):
140 try:
141 current_position = o.tell()
142 except (OSError, IOError):
143 # This can happen in some weird situations, such as when the file
144 # is actually a special file descriptor like stdin. In this
145 # instance, we don't know what the length is, so set it to zero and
146 # let requests chunk it instead.
147 if total_length is not None:
148 current_position = total_length
149 else:
150 if hasattr(o, 'seek') and total_length is None:
151 # StringIO and BytesIO have seek but no useable fileno
152 try:
153 # seek to end of file
154 o.seek(0, 2)
155 total_length = o.tell()
157 # seek back to current position to support
158 # partially read file-like objects
159 o.seek(current_position or 0)
160 except (OSError, IOError):
161 total_length = 0
163 if total_length is None:
164 total_length = 0
166 return max(0, total_length - current_position)
169def get_netrc_auth(url, raise_errors=False):
170 """Returns the Requests tuple auth for a given url from netrc."""
172 netrc_file = os.environ.get('NETRC')
173 if netrc_file is not None:
174 netrc_locations = (netrc_file,)
175 else:
176 netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES)
178 try:
179 from netrc import netrc, NetrcParseError
181 netrc_path = None
183 for f in netrc_locations:
184 try:
185 loc = os.path.expanduser(f)
186 except KeyError:
187 # os.path.expanduser can fail when $HOME is undefined and
188 # getpwuid fails. See https://bugs.python.org/issue20164 &
189 # https://github.com/psf/requests/issues/1846
190 return
192 if os.path.exists(loc):
193 netrc_path = loc
194 break
196 # Abort early if there isn't one.
197 if netrc_path is None:
198 return
200 ri = urlparse(url)
202 # Strip port numbers from netloc. This weird `if...encode`` dance is
203 # used for Python 3.2, which doesn't support unicode literals.
204 splitstr = b':'
205 if isinstance(url, str):
206 splitstr = splitstr.decode('ascii')
207 host = ri.netloc.split(splitstr)[0]
209 try:
210 _netrc = netrc(netrc_path).authenticators(host)
211 if _netrc:
212 # Return with login / password
213 login_i = (0 if _netrc[0] else 1)
214 return (_netrc[login_i], _netrc[2])
215 except (NetrcParseError, IOError):
216 # If there was a parsing error or a permissions issue reading the file,
217 # we'll just skip netrc auth unless explicitly asked to raise errors.
218 if raise_errors:
219 raise
221 # App Engine hackiness.
222 except (ImportError, AttributeError):
223 pass
226def guess_filename(obj):
227 """Tries to guess the filename of the given object."""
228 name = getattr(obj, 'name', None)
229 if (name and isinstance(name, basestring) and name[0] != '<' and
230 name[-1] != '>'):
231 return os.path.basename(name)
234def extract_zipped_paths(path):
235 """Replace nonexistent paths that look like they refer to a member of a zip
236 archive with the location of an extracted copy of the target, or else
237 just return the provided path unchanged.
238 """
239 if os.path.exists(path):
240 # this is already a valid path, no need to do anything further
241 return path
243 # find the first valid part of the provided path and treat that as a zip archive
244 # assume the rest of the path is the name of a member in the archive
245 archive, member = os.path.split(path)
246 while archive and not os.path.exists(archive):
247 archive, prefix = os.path.split(archive)
248 member = '/'.join([prefix, member])
250 if not zipfile.is_zipfile(archive):
251 return path
253 zip_file = zipfile.ZipFile(archive)
254 if member not in zip_file.namelist():
255 return path
257 # we have a valid zip archive and a valid member of that archive
258 tmp = tempfile.gettempdir()
259 extracted_path = os.path.join(tmp, *member.split('/'))
260 if not os.path.exists(extracted_path):
261 extracted_path = zip_file.extract(member, path=tmp)
263 return extracted_path
266def from_key_val_list(value):
267 """Take an object and test to see if it can be represented as a
268 dictionary. Unless it can not be represented as such, return an
269 OrderedDict, e.g.,
271 ::
273 >>> from_key_val_list([('key', 'val')])
274 OrderedDict([('key', 'val')])
275 >>> from_key_val_list('string')
276 Traceback (most recent call last):
277 ...
278 ValueError: cannot encode objects that are not 2-tuples
279 >>> from_key_val_list({'key': 'val'})
280 OrderedDict([('key', 'val')])
282 :rtype: OrderedDict
283 """
284 if value is None:
285 return None
287 if isinstance(value, (str, bytes, bool, int)):
288 raise ValueError('cannot encode objects that are not 2-tuples')
290 return OrderedDict(value)
293def to_key_val_list(value):
294 """Take an object and test to see if it can be represented as a
295 dictionary. If it can be, return a list of tuples, e.g.,
297 ::
299 >>> to_key_val_list([('key', 'val')])
300 [('key', 'val')]
301 >>> to_key_val_list({'key': 'val'})
302 [('key', 'val')]
303 >>> to_key_val_list('string')
304 Traceback (most recent call last):
305 ...
306 ValueError: cannot encode objects that are not 2-tuples
308 :rtype: list
309 """
310 if value is None:
311 return None
313 if isinstance(value, (str, bytes, bool, int)):
314 raise ValueError('cannot encode objects that are not 2-tuples')
316 if isinstance(value, Mapping):
317 value = value.items()
319 return list(value)
322# From mitsuhiko/werkzeug (used with permission).
323def parse_list_header(value):
324 """Parse lists as described by RFC 2068 Section 2.
326 In particular, parse comma-separated lists where the elements of
327 the list may include quoted-strings. A quoted-string could
328 contain a comma. A non-quoted string could have quotes in the
329 middle. Quotes are removed automatically after parsing.
331 It basically works like :func:`parse_set_header` just that items
332 may appear multiple times and case sensitivity is preserved.
334 The return value is a standard :class:`list`:
336 >>> parse_list_header('token, "quoted value"')
337 ['token', 'quoted value']
339 To create a header from the :class:`list` again, use the
340 :func:`dump_header` function.
342 :param value: a string with a list header.
343 :return: :class:`list`
344 :rtype: list
345 """
346 result = []
347 for item in _parse_list_header(value):
348 if item[:1] == item[-1:] == '"':
349 item = unquote_header_value(item[1:-1])
350 result.append(item)
351 return result
354# From mitsuhiko/werkzeug (used with permission).
355def parse_dict_header(value):
356 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
357 convert them into a python dict:
359 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
360 >>> type(d) is dict
361 True
362 >>> sorted(d.items())
363 [('bar', 'as well'), ('foo', 'is a fish')]
365 If there is no value for a key it will be `None`:
367 >>> parse_dict_header('key_without_value')
368 {'key_without_value': None}
370 To create a header from the :class:`dict` again, use the
371 :func:`dump_header` function.
373 :param value: a string with a dict header.
374 :return: :class:`dict`
375 :rtype: dict
376 """
377 result = {}
378 for item in _parse_list_header(value):
379 if '=' not in item:
380 result[item] = None
381 continue
382 name, value = item.split('=', 1)
383 if value[:1] == value[-1:] == '"':
384 value = unquote_header_value(value[1:-1])
385 result[name] = value
386 return result
389# From mitsuhiko/werkzeug (used with permission).
390def unquote_header_value(value, is_filename=False):
391 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
392 This does not use the real unquoting but what browsers are actually
393 using for quoting.
395 :param value: the header value to unquote.
396 :rtype: str
397 """
398 if value and value[0] == value[-1] == '"':
399 # this is not the real unquoting, but fixing this so that the
400 # RFC is met will result in bugs with internet explorer and
401 # probably some other browsers as well. IE for example is
402 # uploading files with "C:\foo\bar.txt" as filename
403 value = value[1:-1]
405 # if this is a filename and the starting characters look like
406 # a UNC path, then just return the value without quotes. Using the
407 # replace sequence below on a UNC path has the effect of turning
408 # the leading double slash into a single slash and then
409 # _fix_ie_filename() doesn't work correctly. See #458.
410 if not is_filename or value[:2] != '\\\\':
411 return value.replace('\\\\', '\\').replace('\\"', '"')
412 return value
415def dict_from_cookiejar(cj):
416 """Returns a key/value dictionary from a CookieJar.
418 :param cj: CookieJar object to extract cookies from.
419 :rtype: dict
420 """
422 cookie_dict = {}
424 for cookie in cj:
425 cookie_dict[cookie.name] = cookie.value
427 return cookie_dict
430def add_dict_to_cookiejar(cj, cookie_dict):
431 """Returns a CookieJar from a key/value dictionary.
433 :param cj: CookieJar to insert cookies into.
434 :param cookie_dict: Dict of key/values to insert into CookieJar.
435 :rtype: CookieJar
436 """
438 return cookiejar_from_dict(cookie_dict, cj)
441def get_encodings_from_content(content):
442 """Returns encodings from given content string.
444 :param content: bytestring to extract encodings from.
445 """
446 warnings.warn((
447 'In requests 3.0, get_encodings_from_content will be removed. For '
448 'more information, please see the discussion on issue #2266. (This'
449 ' warning should only appear once.)'),
450 DeprecationWarning)
452 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
453 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
454 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
456 return (charset_re.findall(content) +
457 pragma_re.findall(content) +
458 xml_re.findall(content))
461def _parse_content_type_header(header):
462 """Returns content type and parameters from given header
464 :param header: string
465 :return: tuple containing content type and dictionary of
466 parameters
467 """
469 tokens = header.split(';')
470 content_type, params = tokens[0].strip(), tokens[1:]
471 params_dict = {}
472 items_to_strip = "\"' "
474 for param in params:
475 param = param.strip()
476 if param:
477 key, value = param, True
478 index_of_equals = param.find("=")
479 if index_of_equals != -1:
480 key = param[:index_of_equals].strip(items_to_strip)
481 value = param[index_of_equals + 1:].strip(items_to_strip)
482 params_dict[key.lower()] = value
483 return content_type, params_dict
486def get_encoding_from_headers(headers):
487 """Returns encodings from given HTTP Header Dict.
489 :param headers: dictionary to extract encoding from.
490 :rtype: str
491 """
493 content_type = headers.get('content-type')
495 if not content_type:
496 return None
498 content_type, params = _parse_content_type_header(content_type)
500 if 'charset' in params:
501 return params['charset'].strip("'\"")
503 if 'text' in content_type:
504 return 'ISO-8859-1'
506 if 'application/json' in content_type:
507 # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset
508 return 'utf-8'
511def stream_decode_response_unicode(iterator, r):
512 """Stream decodes a iterator."""
514 if r.encoding is None:
515 for item in iterator:
516 yield item
517 return
519 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
520 for chunk in iterator:
521 rv = decoder.decode(chunk)
522 if rv:
523 yield rv
524 rv = decoder.decode(b'', final=True)
525 if rv:
526 yield rv
529def iter_slices(string, slice_length):
530 """Iterate over slices of a string."""
531 pos = 0
532 if slice_length is None or slice_length <= 0:
533 slice_length = len(string)
534 while pos < len(string):
535 yield string[pos:pos + slice_length]
536 pos += slice_length
539def get_unicode_from_response(r):
540 """Returns the requested content back in unicode.
542 :param r: Response object to get unicode content from.
544 Tried:
546 1. charset from content-type
547 2. fall back and replace all unicode characters
549 :rtype: str
550 """
551 warnings.warn((
552 'In requests 3.0, get_unicode_from_response will be removed. For '
553 'more information, please see the discussion on issue #2266. (This'
554 ' warning should only appear once.)'),
555 DeprecationWarning)
557 tried_encodings = []
559 # Try charset from content-type
560 encoding = get_encoding_from_headers(r.headers)
562 if encoding:
563 try:
564 return str(r.content, encoding)
565 except UnicodeError:
566 tried_encodings.append(encoding)
568 # Fall back:
569 try:
570 return str(r.content, encoding, errors='replace')
571 except TypeError:
572 return r.content
575# The unreserved URI characters (RFC 3986)
576UNRESERVED_SET = frozenset(
577 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
580def unquote_unreserved(uri):
581 """Un-escape any percent-escape sequences in a URI that are unreserved
582 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
584 :rtype: str
585 """
586 parts = uri.split('%')
587 for i in range(1, len(parts)):
588 h = parts[i][0:2]
589 if len(h) == 2 and h.isalnum():
590 try:
591 c = chr(int(h, 16))
592 except ValueError:
593 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
595 if c in UNRESERVED_SET:
596 parts[i] = c + parts[i][2:]
597 else:
598 parts[i] = '%' + parts[i]
599 else:
600 parts[i] = '%' + parts[i]
601 return ''.join(parts)
604def requote_uri(uri):
605 """Re-quote the given URI.
607 This function passes the given URI through an unquote/quote cycle to
608 ensure that it is fully and consistently quoted.
610 :rtype: str
611 """
612 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
613 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
614 try:
615 # Unquote only the unreserved characters
616 # Then quote only illegal characters (do not quote reserved,
617 # unreserved, or '%')
618 return quote(unquote_unreserved(uri), safe=safe_with_percent)
619 except InvalidURL:
620 # We couldn't unquote the given URI, so let's try quoting it, but
621 # there may be unquoted '%'s in the URI. We need to make sure they're
622 # properly quoted so they do not cause issues elsewhere.
623 return quote(uri, safe=safe_without_percent)
626def address_in_network(ip, net):
627 """This function allows you to check if an IP belongs to a network subnet
629 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
630 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
632 :rtype: bool
633 """
634 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
635 netaddr, bits = net.split('/')
636 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
637 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
638 return (ipaddr & netmask) == (network & netmask)
641def dotted_netmask(mask):
642 """Converts mask from /xx format to xxx.xxx.xxx.xxx
644 Example: if mask is 24 function returns 255.255.255.0
646 :rtype: str
647 """
648 bits = 0xffffffff ^ (1 << 32 - mask) - 1
649 return socket.inet_ntoa(struct.pack('>I', bits))
652def is_ipv4_address(string_ip):
653 """
654 :rtype: bool
655 """
656 try:
657 socket.inet_aton(string_ip)
658 except socket.error:
659 return False
660 return True
663def is_valid_cidr(string_network):
664 """
665 Very simple check of the cidr format in no_proxy variable.
667 :rtype: bool
668 """
669 if string_network.count('/') == 1:
670 try:
671 mask = int(string_network.split('/')[1])
672 except ValueError:
673 return False
675 if mask < 1 or mask > 32:
676 return False
678 try:
679 socket.inet_aton(string_network.split('/')[0])
680 except socket.error:
681 return False
682 else:
683 return False
684 return True
687@contextlib.contextmanager
688def set_environ(env_name, value):
689 """Set the environment variable 'env_name' to 'value'
691 Save previous value, yield, and then restore the previous value stored in
692 the environment variable 'env_name'.
694 If 'value' is None, do nothing"""
695 value_changed = value is not None
696 if value_changed:
697 old_value = os.environ.get(env_name)
698 os.environ[env_name] = value
699 try:
700 yield
701 finally:
702 if value_changed:
703 if old_value is None:
704 del os.environ[env_name]
705 else:
706 os.environ[env_name] = old_value
709def should_bypass_proxies(url, no_proxy):
710 """
711 Returns whether we should bypass proxies or not.
713 :rtype: bool
714 """
715 # Prioritize lowercase environment variables over uppercase
716 # to keep a consistent behaviour with other http projects (curl, wget).
717 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
719 # First check whether no_proxy is defined. If it is, check that the URL
720 # we're getting isn't in the no_proxy list.
721 no_proxy_arg = no_proxy
722 if no_proxy is None:
723 no_proxy = get_proxy('no_proxy')
724 parsed = urlparse(url)
726 if parsed.hostname is None:
727 # URLs don't always have hostnames, e.g. file:/// urls.
728 return True
730 if no_proxy:
731 # We need to check whether we match here. We need to see if we match
732 # the end of the hostname, both with and without the port.
733 no_proxy = (
734 host for host in no_proxy.replace(' ', '').split(',') if host
735 )
737 if is_ipv4_address(parsed.hostname):
738 for proxy_ip in no_proxy:
739 if is_valid_cidr(proxy_ip):
740 if address_in_network(parsed.hostname, proxy_ip):
741 return True
742 elif parsed.hostname == proxy_ip:
743 # If no_proxy ip was defined in plain IP notation instead of cidr notation &
744 # matches the IP of the index
745 return True
746 else:
747 host_with_port = parsed.hostname
748 if parsed.port:
749 host_with_port += ':{}'.format(parsed.port)
751 for host in no_proxy:
752 if parsed.hostname.endswith(host) or host_with_port.endswith(host):
753 # The URL does match something in no_proxy, so we don't want
754 # to apply the proxies on this URL.
755 return True
757 with set_environ('no_proxy', no_proxy_arg):
758 # parsed.hostname can be `None` in cases such as a file URI.
759 try:
760 bypass = proxy_bypass(parsed.hostname)
761 except (TypeError, socket.gaierror):
762 bypass = False
764 if bypass:
765 return True
767 return False
770def get_environ_proxies(url, no_proxy=None):
771 """
772 Return a dict of environment proxies.
774 :rtype: dict
775 """
776 if should_bypass_proxies(url, no_proxy=no_proxy):
777 return {}
778 else:
779 return getproxies()
782def select_proxy(url, proxies):
783 """Select a proxy for the url, if applicable.
785 :param url: The url being for the request
786 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
787 """
788 proxies = proxies or {}
789 urlparts = urlparse(url)
790 if urlparts.hostname is None:
791 return proxies.get(urlparts.scheme, proxies.get('all'))
793 proxy_keys = [
794 urlparts.scheme + '://' + urlparts.hostname,
795 urlparts.scheme,
796 'all://' + urlparts.hostname,
797 'all',
798 ]
799 proxy = None
800 for proxy_key in proxy_keys:
801 if proxy_key in proxies:
802 proxy = proxies[proxy_key]
803 break
805 return proxy
808def default_user_agent(name="python-requests"):
809 """
810 Return a string representing the default user agent.
812 :rtype: str
813 """
814 return '%s/%s' % (name, __version__)
817def default_headers():
818 """
819 :rtype: requests.structures.CaseInsensitiveDict
820 """
821 return CaseInsensitiveDict({
822 'User-Agent': default_user_agent(),
823 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
824 'Accept': '*/*',
825 'Connection': 'keep-alive',
826 })
829def parse_header_links(value):
830 """Return a list of parsed link headers proxies.
832 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
834 :rtype: list
835 """
837 links = []
839 replace_chars = ' \'"'
841 value = value.strip(replace_chars)
842 if not value:
843 return links
845 for val in re.split(', *<', value):
846 try:
847 url, params = val.split(';', 1)
848 except ValueError:
849 url, params = val, ''
851 link = {'url': url.strip('<> \'"')}
853 for param in params.split(';'):
854 try:
855 key, value = param.split('=')
856 except ValueError:
857 break
859 link[key.strip(replace_chars)] = value.strip(replace_chars)
861 links.append(link)
863 return links
866# Null bytes; no need to recreate these on each call to guess_json_utf
867_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
868_null2 = _null * 2
869_null3 = _null * 3
872def guess_json_utf(data):
873 """
874 :rtype: str
875 """
876 # JSON always starts with two ASCII characters, so detection is as
877 # easy as counting the nulls and from their location and count
878 # determine the encoding. Also detect a BOM, if present.
879 sample = data[:4]
880 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
881 return 'utf-32' # BOM included
882 if sample[:3] == codecs.BOM_UTF8:
883 return 'utf-8-sig' # BOM included, MS style (discouraged)
884 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
885 return 'utf-16' # BOM included
886 nullcount = sample.count(_null)
887 if nullcount == 0:
888 return 'utf-8'
889 if nullcount == 2:
890 if sample[::2] == _null2: # 1st and 3rd are null
891 return 'utf-16-be'
892 if sample[1::2] == _null2: # 2nd and 4th are null
893 return 'utf-16-le'
894 # Did not detect 2 valid UTF-16 ascii-range characters
895 if nullcount == 3:
896 if sample[:3] == _null3:
897 return 'utf-32-be'
898 if sample[1:] == _null3:
899 return 'utf-32-le'
900 # Did not detect a valid UTF-32 ascii-range character
901 return None
904def prepend_scheme_if_needed(url, new_scheme):
905 """Given a URL that may or may not have a scheme, prepend the given scheme.
906 Does not replace a present scheme with the one provided as an argument.
908 :rtype: str
909 """
910 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
912 # urlparse is a finicky beast, and sometimes decides that there isn't a
913 # netloc present. Assume that it's being over-cautious, and switch netloc
914 # and path if urlparse decided there was no netloc.
915 if not netloc:
916 netloc, path = path, netloc
918 return urlunparse((scheme, netloc, path, params, query, fragment))
921def get_auth_from_url(url):
922 """Given a url with authentication components, extract them into a tuple of
923 username,password.
925 :rtype: (str,str)
926 """
927 parsed = urlparse(url)
929 try:
930 auth = (unquote(parsed.username), unquote(parsed.password))
931 except (AttributeError, TypeError):
932 auth = ('', '')
934 return auth
937# Moved outside of function to avoid recompile every call
938_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
939_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
942def check_header_validity(header):
943 """Verifies that header value is a string which doesn't contain
944 leading whitespace or return characters. This prevents unintended
945 header injection.
947 :param header: tuple, in the format (name, value).
948 """
949 name, value = header
951 if isinstance(value, bytes):
952 pat = _CLEAN_HEADER_REGEX_BYTE
953 else:
954 pat = _CLEAN_HEADER_REGEX_STR
955 try:
956 if not pat.match(value):
957 raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
958 except TypeError:
959 raise InvalidHeader("Value for header {%s: %s} must be of type str or "
960 "bytes, not %s" % (name, value, type(value)))
963def urldefragauth(url):
964 """
965 Given a url remove the fragment and the authentication part.
967 :rtype: str
968 """
969 scheme, netloc, path, params, query, fragment = urlparse(url)
971 # see func:`prepend_scheme_if_needed`
972 if not netloc:
973 netloc, path = path, netloc
975 netloc = netloc.rsplit('@', 1)[-1]
977 return urlunparse((scheme, netloc, path, params, query, ''))
980def rewind_body(prepared_request):
981 """Move file pointer back to its recorded starting position
982 so it can be read again on redirect.
983 """
984 body_seek = getattr(prepared_request.body, 'seek', None)
985 if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
986 try:
987 body_seek(prepared_request._body_position)
988 except (IOError, OSError):
989 raise UnrewindableBodyError("An error occurred when rewinding request "
990 "body for redirect.")
991 else:
992 raise UnrewindableBodyError("Unable to rewind request body for redirect.")