Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/cardinal_pythonlib/network.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# cardinal_pythonlib/network.py
4"""
5===============================================================================
7 Original code copyright (C) 2009-2021 Rudolf Cardinal (rudolf@pobox.com).
9 This file is part of cardinal_pythonlib.
11 Licensed under the Apache License, Version 2.0 (the "License");
12 you may not use this file except in compliance with the License.
13 You may obtain a copy of the License at
15 https://www.apache.org/licenses/LICENSE-2.0
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
23===============================================================================
25**Network support functions.**
27NOTES:
29- ``ping`` requires root authority to create ICMP sockets in Linux
30- the ``/bin/ping`` command doesn't need prior root authority (because it has
31 the setuid bit set)
32- For Linux, it's therefore best to use the system ``ping``.
34https://stackoverflow.com/questions/2953462/pinging-servers-in-python
35https://stackoverflow.com/questions/316866/ping-a-site-in-python
37- Note that if you want a sub-second timeout, things get trickier.
38 One option is ``fping``.
40"""
42import os
43import ssl
44import subprocess
45import sys
46import tempfile
47from typing import BinaryIO, Dict, Generator, Iterable
48import urllib.request
50from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
52log = get_brace_style_log_with_null_handler(__name__)
55# =============================================================================
56# Ping
57# =============================================================================
59def ping(hostname: str, timeout_s: int = 5) -> bool:
60 """
61 Pings a host, using OS tools.
63 Args:
64 hostname: host name or IP address
65 timeout_s: timeout in seconds
67 Returns:
68 was the ping successful?
70 """
71 if sys.platform == "win32":
72 timeout_ms = timeout_s * 1000
73 args = [
74 "ping",
75 hostname,
76 "-n", "1", # ping count
77 "-w", str(timeout_ms), # timeout
78 ]
79 elif sys.platform.startswith('linux'):
80 args = [
81 "ping",
82 hostname,
83 "-c", "1", # ping count
84 "-w", str(timeout_s), # timeout
85 ]
86 else:
87 raise AssertionError("Don't know how to ping on this operating system")
88 proc = subprocess.Popen(args,
89 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
90 proc.communicate()
91 retcode = proc.returncode
92 return retcode == 0 # zero success, non-zero failure
95# =============================================================================
96# Download things
97# =============================================================================
99def download(url: str,
100 filename: str,
101 skip_cert_verify: bool = True,
102 headers: Dict[str, str] = None) -> None:
103 """
104 Downloads a URL to a file.
106 Args:
107 url:
108 URL to download from
109 filename:
110 file to save to
111 skip_cert_verify:
112 skip SSL certificate check?
113 headers:
114 request headers (if not specified, a default will be used that
115 mimics Mozilla 5.0 to avoid certain HTTP 403 errors)
116 """
117 headers = {
118 'User-Agent': 'Mozilla/5.0'
119 } if headers is None else headers
120 log.info("Downloading from {} to {}", url, filename)
122 # urllib.request.urlretrieve(url, filename)
123 # ... sometimes fails (e.g. downloading
124 # https://www.openssl.org/source/openssl-1.1.0g.tar.gz under Windows) with:
125 # ssl.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:777) # noqa
126 # ... due to this certificate root problem (probably because OpenSSL
127 # [used by Python] doesn't play entirely by the same rules as others?):
128 # https://stackoverflow.com/questions/27804710
129 # So:
131 # Patching this by faking a browser request by adding User-Agent to request
132 # headers, using this as example:
133 # https://stackoverflow.com/questions/42863240/how-to-get-round-the-http-error-403-forbidden-with-urllib-request-using-python # noqa
135 ctx = ssl.create_default_context() # type: ssl.SSLContext
136 if skip_cert_verify:
137 log.debug("Skipping SSL certificate check for " + url)
138 ctx.check_hostname = False
139 ctx.verify_mode = ssl.CERT_NONE
140 page = urllib.request.Request(url, headers=headers)
141 with urllib.request.urlopen(page, context=ctx) as u, \
142 open(filename, 'wb') as f:
143 f.write(u.read())
146# =============================================================================
147# Generators
148# =============================================================================
150def gen_binary_files_from_urls(
151 urls: Iterable[str],
152 on_disk: bool = False,
153 show_info: bool = True) -> Generator[BinaryIO, None, None]:
154 """
155 Generate binary files from a series of URLs (one per URL).
157 Args:
158 urls: iterable of URLs
159 on_disk: if ``True``, yields files that are on disk (permitting
160 random access); if ``False``, yields in-memory files (which will
161 not permit random access)
162 show_info: show progress to the log?
164 Yields:
165 files, each of type :class:`BinaryIO`
167 """
168 for url in urls:
169 if on_disk:
170 # Necessary for e.g. zip processing (random access)
171 with tempfile.TemporaryDirectory() as tmpdir:
172 filename = os.path.join(tmpdir, "tempfile")
173 download(url=url, filename=filename)
174 with open(filename, 'rb') as f:
175 yield f
176 else:
177 if show_info:
178 log.info("Reading from URL: {}", url)
179 with urllib.request.urlopen(url) as f:
180 yield f
181 if show_info:
182 log.info("... finished reading from URL: {}", url)