Coverage for /var/devmt/py/utils4_1.7.0/utils4/utils.py: 100%

126 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-21 20:09 +0000

1# -*- coding: utf-8 -*- 

2""" 

3:Purpose: Central library for general utility-based methods. 

4 

5 This ``utils`` module was the starting place of the original 

6 ``utils`` library. Therefore, it's historically been a 

7 'dumping-ground' for general S3DEV utilities and function 

8 wrappers specialised to the needs of S3DEV projects, which 

9 did not seem to fit in anywhere else. So we'll be honest, 

10 it's a bit of a melting pot of functions. 

11 

12 With the overhaul of the ``utils3`` library into ``utils4``, 

13 *many* of the original functions, which were no longer being 

14 used, have been removed in an effort to clean the module's 

15 code base. 

16 

17 If you are looking for a function which used to be here, 

18 please refer to the last ``utils3`` release, which is 

19 v0.15.1. 

20 

21:Platform: Linux/Windows | Python 3.7+ 

22:Developer: J Berendt 

23:Email: support@s3dev.uk 

24 

25Note: 

26 Any libraries which are not built-in, are imported *only* 

27 if/when the function which uses them is called. 

28 

29 This helps to reduce the packages required by ``utils4``. 

30 

31:Example: 

32 

33 For usage examples, please refer to the docstring for each method. 

34 

35""" 

36# pylint: disable=import-error 

37# pylint: disable=import-outside-toplevel # Keep required dependencies to a minimum. 

38# pylint: disable=wrong-import-order 

39 

40from __future__ import annotations 

41 

42import importlib 

43import io 

44import os 

45import platform 

46import re 

47import site 

48import string 

49import subprocess 

50from datetime import datetime 

51from typing import Generator, Union 

52# locals 

53from utils4.reporterror import reporterror 

54from utils4.user_interface import ui 

55try: 

56 # The C library is only available if installed. 

57 from . import futils # pylint: disable=no-name-in-module 

58except ImportError: 

59 pass 

60 

61# OS-dependent imports 

62try: # pragma: nocover 

63 import win32api 

64 import win32file 

65except ImportError: 

66 pass 

67 

68 

69def clean_dataframe(df: pd.DataFrame): # noqa # pylint: disable=undefined-variable 

70 """Clean a ``pandas.DataFrame`` data structure. 

71 

72 Args: 

73 df (pd.DataFrame): DataFrame to be cleaned. 

74 

75 :Design: 

76 The DataFrame is cleaned *in-place*. An object is *not* returned by 

77 this function. 

78 

79 The following cleaning tasks are performed: 

80 

81 - Column names: 

82 

83 - All punctuation characters are removed, with the exception 

84 of three characters. See next bullet point. 

85 - The ``-``, ``[space]`` and ``_`` characters are replaced 

86 with an underscore. 

87 - All column names are converted to lower case. 

88 

89 - Data: 

90 

91 - All ``object`` (string) fields, are stripped of leading and 

92 trailing whitespace. 

93 

94 :Example: 

95 

96 Example for cleaning a DataFrame:: 

97 

98 >>> import pandas as pd # For demonstration only. 

99 >>> from utils4 import utils 

100 

101 >>> # Define a dirty testing dataset. 

102 >>> df = pd.DataFrame({'Column #1': [' Text field 1.', 

103 ' Text field 2.', 

104 ' Text field 3. ', 

105 ' Text field 4. ', 

106 ' Text field 5. '], 

107 ' COLUmn (2)': [1.0, 

108 2.0, 

109 3.0, 

110 '4', 

111 '5.0'], 

112 'COLUMN 3 ': [1, 

113 2, 

114 3.0, 

115 4, 

116 5.0]}) 

117 >>> utils.clean_dataframe(df) 

118 >>> df 

119 column_1 column_2 column_3 

120 0 Text field 1. 1.0 1.0 

121 1 Text field 2. 2.0 2.0 

122 2 Text field 3. 3.0 3.0 

123 3 Text field 4. 4 4.0 

124 4 Text field 5. 5.0 5.0 

125 

126 """ 

127 # Define replacement/translation characters. 

128 repls = {k: '' for k in string.punctuation} 

129 repls.update({'-':'_', '_': '_', ' ': '_'}) 

130 trans = str.maketrans(repls) 

131 # Clean column names. 

132 df.columns = [c.strip().lower().translate(trans) for c in df.columns] 

133 # Strip whitespace from text values. 

134 for col in df: 

135 if df[col].dtype == object: 

136 df[col] = df[col].astype(str).str.strip() 

137 

138def direxists(path: str, create_path: bool=False) -> bool: 

139 """Test if a directory exists. If not, create it, if instructed. 

140 

141 Args: 

142 path (str): The directory path to be tested. 

143 create_path (bool, optional): Create the path if it doesn't exist. 

144 Defaults to False. 

145 

146 :Design: 

147 Function designed to test if a directory path exists. If the 

148 path does *not* exist, the path can be created; as determined by 

149 the ``create_path`` parameter. 

150 

151 This function extends the built-in :func:`os.path.exists()` function 

152 in that the path can be created if it doesn't already exist, by 

153 passing the ``create_path`` parameter as ``True``. 

154 

155 If the path is created by this function, the function is recursively 

156 called to test if the path exists, and will return ``True``. 

157 

158 If a filename is passed with the path, the filename is automatically 

159 stripped from the path before the test begins. 

160 

161 :Example: 

162 

163 Test if a directory exists, and create it if it does not exist:: 

164 

165 >>> from utils4 import utils 

166 

167 >>> utils.direxists(path='/tmp/path/to_create/file.csv', 

168 create_path=True) 

169 

170 Returns: 

171 bool: True if the directory exists (or was created), otherwise False. 

172 

173 """ 

174 found = False 

175 if os.path.splitext(path)[1]: 

176 path, _ = os.path.split(path) # Remove file if passed with the path. 

177 if os.path.exists(path): 

178 found = True 

179 else: 

180 if create_path: 

181 os.makedirs(name=path) 

182 found = direxists(path=path, create_path=False) 

183 return found 

184 

185def excludedirs(source: list[str], exclude: list[str]) -> list[str]: 

186 """Exclude the listed directories from the source. 

187 

188 Args: 

189 source (list[str]): List of source paths. 

190 exclude (list[str]): List of directories to be excluded from 

191 ``source``. 

192 

193 :Design: 

194 The paths in ``exclude`` are expanded to their realpath, with 

195 a trailing path separator explicitly added to ensure only 

196 directory paths are matched. 

197 

198 For example, if the trailing path separator was not added, 

199 ``.gitignore`` would be excluded if ``./.git`` was in 

200 ``exclude`` paths. Adding the trailing path separator 

201 prevents this. 

202 

203 Returns: 

204 list[str]: A new list of paths where any ``source`` path 

205 sharing a common base path with any ``exclude`` path has 

206 been removed. 

207 

208 """ 

209 # Cannot be a generator as it's iterated multiple times. 

210 exclude = tuple(map(lambda x: f'{os.path.realpath(x)}/', exclude)) 

211 return [s for s in source if all(e not in s for e in exclude)] 

212 

213def fileexists(filepath: str, error: str='ignore') -> bool: 

214 """Test if a file exists. If not, notify the user or raise an error. 

215 

216 Args: 

217 filepath (str): Full file path to test. 

218 error (bool, optional): Action to be taken if the file does not exist. 

219 Defaults to 'ignore'. Options: 

220 

221 - ``'ignore'``: Take no action. 

222 - ``'alert'``: Alert the user the filepath does not exist via 

223 a simple message to the terminal. 

224 - ``'raise'``: Raise a ``FileNotFoundError``. This will abort 

225 all subsequent processing. 

226 

227 :Design: 

228 Function designed check if a file exists. A boolean value is 

229 returned to the calling program. 

230 

231 This function extends the built-in :func:`os.path.isfile` function 

232 in that the user can be notified if the path does not exist, or an 

233 error can be raised. 

234 

235 :Example: 

236 

237 Test if a file exists, using ``'ignore'``, the default action:: 

238 

239 >>> from utils4 import utils 

240 

241 >>> if utils.fileexists(filepath='/tmp/path/to/file.csv'): 

242 >>> ... 

243 >>> else: 

244 >>> ... 

245 

246 

247 Test if a file exists, using ``'alert'``:: 

248 

249 >>> from utils4 import utils 

250 

251 >>> if utils.fileexists(filepath='/tmp/path/to/file.csv', 

252 error='alert'): 

253 >>> ... 

254 >>> else: 

255 >>> ... 

256 

257 File not found: /tmp/path/to/file.csv 

258 

259 

260 Test if a file exists, using ``'raise'``:: 

261 

262 >>> from utils4 import utils 

263 

264 >>> if utils.fileexists(filepath='/tmp/path/to/file.csv', 

265 error='raise'): 

266 >>> ... 

267 >>> else: 

268 >>> ... 

269 

270 FileNotFoundError: File not found: /tmp/path/to/file.csv 

271 

272 Raises: 

273 FileNotFoundError: If the filepath does not exist and the ``error`` 

274 parameter is ``'raise'``. 

275 

276 Returns: 

277 bool: True if the file exists, otherwise False. 

278 

279 """ 

280 found = False 

281 if os.path.isfile(filepath): 

282 found = True 

283 else: 

284 if error == 'alert': 

285 ui.print_warning(f'\nFile not found: {filepath}') 

286 elif error == 'raise': 

287 raise FileNotFoundError(f'File not found: {filepath}') 

288 return found 

289 

290 

291def format_exif_date(datestring: str, 

292 input_format: str='%Y:%m:%d %H:%M:%S', 

293 output_format: str='%Y%m%d%H%M%S', 

294 return_datetime: bool=False) -> Union[datetime, str]: 

295 """Format an exif timestamp. 

296 

297 This function is useful for storing an exif date as a datetime string. 

298 For example, extracting the exif data from an image to be stored into 

299 a database. 

300 

301 Args: 

302 datestring (str): The datetime string to be formatted. 

303 A typical exif date format is: yyyy:mm:dd hh:mi:ss 

304 input_format (str, optional): Format mask for the input datetime value. 

305 Defaults to '%Y:%m:%d %H:%M:%S'. 

306 output_format (str, optional): Format mask for the output datetime, 

307 if returned as a string. Defaults to '%Y%m%d%H%M%S'. 

308 return_datetime (bool, optional): Return a ``datetime`` object, rather 

309 than a formatted string. 

310 

311 :Design: 

312 Function designed to convert the exif date/timestamp from 

313 '2010:01:31 12:31:18' (or a caller specified format) to a format 

314 specified by the caller. 

315 

316 The default input mask is the standard exif capture datetime format. 

317 

318 :Example: 

319 

320 Convert the exif datetime to the default output string format:: 

321 

322 >>> from utils4 import utils 

323 

324 >>> formatted = utils.format_exif_date('2010:01:31 12:31:18') 

325 >>> formatted 

326 '20100131123118' 

327 

328 

329 Convert the exif datetime to a datetime object:: 

330 

331 >>> from utils4 import utils 

332 

333 >>> formatted = utils.format_exif_date('2010:01:31 12:31:18', 

334 return_datetime=True) 

335 >>> formatted 

336 datetime.datetime(2010, 1, 31, 12, 31, 18) 

337 

338 

339 Returns: 

340 Union[str, datetime.datetime]: A formatted datetime string, if the 

341 ``return_datetime`` parameter is ``False``, otherwise a 

342 ``datetime.datetime`` object. 

343 

344 """ 

345 # pylint: disable=no-else-return 

346 _dt = datetime.strptime(datestring, input_format) 

347 if return_datetime: 

348 return _dt 

349 else: 

350 return _dt.strftime(output_format) 

351 

352def get_os() -> str: 

353 """Get the platform's OS. 

354 

355 This method is a very thin wrapper around the :func:`platform.system()` 

356 function. 

357 

358 :Example: 

359 :: 

360 

361 >>> from utils4 import utils 

362 

363 >>> utils.get_os() 

364 'linux' 

365 

366 Returns: 

367 str: A string of the platform's operating system, in lower case. 

368 

369 """ 

370 return platform.system().lower() 

371 

372def get_removable_drives() -> Generator[str, str, str]: 

373 """Return a generator of removable drives. 

374 

375 .. important:: 

376 

377 This is a Windows-only function. 

378 

379 Note: 

380 A removable drive is identified by the constant 2, which is the 

381 value of the enum ``win32con.DRIVE_REMOVABLE``. 

382 

383 This code uses the integer 2 to: 

384 

385 1) Save the extra import. 

386 2) Help keep the code compact, concise and clear. 

387 

388 :Example: 

389 

390 To obtain a list of removable drives from a Windows system:: 

391 

392 >>> from utils4 import utils 

393 

394 >>> list(utils.get_removable_drives()) 

395 ['E:', 'H:'] 

396 

397 Raises: 

398 NotImplementedError: Raised if the OS is not Windows. 

399 

400 Yields: 

401 Generator[str]: Each removable drive letter as a 

402 string. For example: ``'E:'`` 

403 

404 """ 

405 if get_os() == 'windows': # pragma: nocover 

406 yield from filter(lambda x: win32file.GetDriveType(x) == 2, 

407 win32api.GetLogicalDriveStrings().split('\\\x00')) 

408 else: 

409 raise NotImplementedError('This function is Windows-only.') 

410 

411def getdrivername(driver: str, return_all: bool=False) -> list: # pragma: nocover 

412 """Return a list of ODBC driver names, matching the regex pattern. 

413 

414 Args: 

415 driver (str): A **regex pattern** for the ODBC driver you're searching. 

416 return_all (bool, optional): If True, *all* drivers matching the 

417 pattern are returned. Defaults to False, which returns only the 

418 first driver name. 

419 

420 :Design: 

421 This is a helper function designed to get and return the names 

422 of ODBC drivers. 

423 

424 The ``driver`` parameter should be formatted as a regex 

425 pattern. If multiple drivers are found, by default, only the 

426 first driver in the list is returned. However, the 

427 ``return_all`` parameter adjusts this action to return all driver 

428 names. 

429 

430 This function has a dependency on the ``pyodbc`` library. Therefore, 

431 the :func:`~utils.testimport()` function is called before ``pyodbc`` 

432 is imported. If the ``pyodbc`` library is not installed, the user is 

433 notified. 

434 

435 :Dependencies: 

436 - ``pyodbc`` library 

437 

438 :Example: 

439 

440 Get the driver name for the SQL Server ODBC driver:: 

441 

442 >>> from utils4 import utils 

443 >>> driver = utils.getdrivername(driver='SQL Server.*') 

444 

445 :Troubleshooting: 

446 

447 - On Unix-like systems, the following error message:: 

448 

449 ImportError: libodbc.so.2: cannot open shared object file: No such file or directory 

450 

451 can be resolved by installing the ``unixodbc-dev`` package as:: 

452 

453 $ sudo apt install unixodbc-dev 

454 

455 Returns: 

456 list: A list of ODBC drivers, if any were found. 

457 

458 """ 

459 drivers = [] 

460 if testimport('pyodbc', verbose=True): 

461 import pyodbc 

462 drivers = [i for i in pyodbc.drivers() if re.search(driver, i)] 

463 if not return_all and drivers: 

464 drivers = drivers[0] 

465 return drivers 

466 

467def getsitepackages() -> str: 

468 """Return the Python installation's site packages directory. 

469 

470 :Design: 

471 The function first uses the local :func:`~utils.get_os()` 

472 function to get the system's OS. The OS is then tested and the 

473 site-packages location is returned using the OS-appropriate element 

474 from the list returned by the built-in :func:`site.getsitepackages` 

475 function. 

476 

477 If the OS is not accounted for, or fails the test, a value of 

478 'unknown' is returned. 

479 

480 :Rationale: 

481 The need for this function comes out of the observation there are many 

482 (many!) different ways on stackoverflow (and other sites) to get the 

483 location to which ``pip`` will install a package, and many of the 

484 answers contradict each other. Also, the :func:`site.getsitepackages` 

485 function returns a list of options (in all tested cases); and the 

486 Linux / Windows paths are in different locations in this list. 

487 

488 :Example: 

489 

490 Get the location of the ``site-packages`` directory:: 

491 

492 >>> from utils4 import utils 

493 

494 >>> utils.getsitepackages() 

495 '/home/<username>/venvs/py38/lib/python3.8/site-packages' 

496 

497 Returns: 

498 str: Full path to the ``site-packages`` directory. 

499 

500 """ 

501 _os = get_os() 

502 pkgs = 'unknown' 

503 if 'win' in _os: # pragma: nocover # utils4 will *rarely* ever be tested on Windows. 

504 pkgs = site.getsitepackages()[1] 

505 elif 'lin' in _os: 

506 pkgs = site.getsitepackages()[0] 

507 return pkgs 

508 

509def gzip_compress(in_path: str, out_path: str=None, size: int=None) -> str: 

510 """Compress a file using ``gzip``. 

511 

512 Args: 

513 in_path (str): Full path to the file to be compressed. If the file 

514 does not exist, a ``FileNotFoundError`` is raised. 

515 out_path (str, optional): Full path to the compressed output file. 

516 Defaults to None. If this value is ``None`` a ``'.gz'`` file 

517 extension is appended to the path provided to the ``in_path`` 

518 parameter. 

519 size (int, optional): Size of the chunk to be read / written during 

520 compression. Defaults to 10MiB. 

521 

522 :Example: 

523 

524 Compress a text file:: 

525 

526 >>> from utils4 import utils 

527 

528 >>> utils.gzip_compress(in_path='/tmp/rand.txt') 

529 '/tmp/rand.txt.gz' 

530 

531 

532 Compress a text file, specifying the output path:: 

533 

534 >>> from utils4 import utils 

535 

536 >>> utils.gzip_compress(in_path='/tmp/rand.txt', out_path='/tmp/rand2.txt.gz') 

537 '/tmp/rand2.txt.gz' 

538 

539 Returns: 

540 str: Full path to the output file. 

541 

542 """ 

543 import gzip 

544 size = 1024*1024*10 if size is None else size # Default to 10MiB. 

545 if fileexists(filepath=in_path, error='raise'): 

546 if out_path is None: 

547 out_path = f'{in_path}.gz' 

548 with open(in_path, 'rb') as f_in, open(out_path, 'wb') as f_out: 

549 chunk = f_in.read(size) 

550 while len(chunk) > 0: 

551 comp = gzip.compress(data=chunk, compresslevel=9) 

552 f_out.write(comp) 

553 chunk = f_in.read(size) 

554 return out_path 

555 

556def gzip_decompress(path: str, encoding: str='utf-8', size: int=None) -> bool: 

557 """Decompress a ``.gz`` file using ``gzip``. 

558 

559 Args: 

560 path (str): Full path to the file to be decompressed. If the file 

561 does not exist, a ``FileNotFoundError`` is raised. 

562 encoding (str, optional): Encoding to be used to decode the 

563 decompressed binary data. Defaults to 'utf-8'. 

564 size (int, optional): Size of the chunk to be read / written during 

565 decompression. Defaults to 1MiB. 

566 

567 Note: 

568 The output path is simply the ``path`` value with *last* file 

569 extension removed. 

570 

571 In general cases, a file compressed using gzip will have a ``.gz`` 

572 extension appended onto the existing filename and extension. 

573 For example: ``data.txt.gz``. 

574 

575 Note: 

576 **Newline Characters:** 

577 

578 When the decompressed file is written, the ``newline`` character is 

579 specified as ``''``, which enables 'universal newline mode', whereby 

580 the system's newline character is used. However, the *original* line 

581 endings - those used in the compressed file - are written back to the 

582 decompressed file. 

583 

584 This method is used to ensure the checksum hash on the original 

585 (unzipped) and decompressed file can be compared. 

586 

587 :Example: 

588 

589 Decompress a text file:: 

590 

591 >>> from utils4 import utils 

592 

593 >>> utils.gzip_decompress(path='/tmp/rand.txt.gz') 

594 True 

595 

596 Returns: 

597 bool: True if the decompression was successful, otherwise False. 

598 

599 """ 

600 # pylint: disable=line-too-long 

601 import gzip 

602 size = (1<<2)**10 if size is None else size # Default to 1 MiB. 

603 success = False 

604 try: 

605 if fileexists(filepath=path, error='raise'): 

606 out_path = os.path.splitext(path)[0] 

607 with open(path, 'rb') as f_in, open(out_path, 'w', encoding='utf-8', newline='') as f_out: 

608 chunk = f_in.read(size) 

609 while len(chunk) > 1: 

610 decomp = gzip.decompress(data=chunk).decode(encoding=encoding) 

611 f_out.write(decomp) 

612 chunk = f_in.read(size) 

613 success = True 

614 except Exception as err: 

615 reporterror(err) 

616 return success 

617 

618# Tested by the test_x_futils module. 

619def isascii(path: str, size: int=2048) -> bool: # pragma: nocover 

620 """Determine if a file is plain-text (ASCII only). 

621 

622 A file is deemed non-binary if *all* of the characters in the file 

623 are within ASCII's printable range. 

624 

625 Args: 

626 path (str): Full path to the file to be tested. 

627 size (int, optional): Number of bytes to read in a chunk. 

628 Defaults to 2048 (2 MiB). 

629 

630 :Example: 

631 

632 Test if a file is a plain-text (ASCII-only) file:: 

633 

634 >>> from utils4 import utils 

635 

636 >>> utils.isascii('/usr/local/bin/python3.12-config') 

637 True 

638 

639 :Design: 

640 This function simply inverts the return value of the 

641 :func:`isbinary` function. For design detail, refer to the 

642 :meth:`isbinary` documentation. 

643 

644 This method calls the :func:`futils.isascii` function with the 

645 given arguments. 

646 

647 Returns: 

648 bool: True if *all* characters in the file are plain-text 

649 (ASCII only), otherwise False. 

650 

651 """ 

652 return bool(futils.isascii(path, size)) 

653 

654# Tested by the test_x_futils module. 

655def isbinary(path: str, size: int=1024) -> bool: # pragma: nocover 

656 """Determine if a file is binary. 

657 

658 A file is deemed non-binary if *all* of the characters in the file 

659 are within ASCII's printable range. Refer to the **References** 

660 section for further definition. 

661 

662 Args: 

663 path (str): Full path to the file to be tested. 

664 size (int, optional): Number of bytes to read in a chunk. 

665 Defaults to 1024 (1 MiB). 

666 

667 :Example: 

668 

669 Test if a file is a binary file or executable:: 

670 

671 >>> from utils4 import utils 

672 

673 >>> utils.isbinary('/usr/bin/python3') 

674 True 

675 

676 :Design: 

677 For each chunk of size ``size``, read each character; if the 

678 character is outside the ASCII printable range, True is returned 

679 immediately as the file is not plain-text. Otherwise, if a file 

680 is read to the end, with all characters being within ASCII's 

681 printable range, False is returned as the file is plain-text 

682 (ASCII only). 

683 

684 This method calls the :func:`futils.isbinary` function with the 

685 given arguments. 

686 

687 :References: 

688 

689 - `How to detect if a file is binary <so_ref1_>`_ 

690 - `ASCII printable character reference <so_ref2_>`_ 

691 

692 .. _so_ref1: https://stackoverflow.com/a/7392391/6340496 

693 .. _so_ref2: https://stackoverflow.com/a/32184831/6340496 

694 

695 Returns: 

696 bool: True if *any* of the characters in the file are outside 

697 ASCII's printable range. Otherwise, False. 

698 

699 """ 

700 return bool(futils.isbinary(path, size)) 

701 

702# Tested by the test_x_futils module. 

703def iszip(path: str) -> bool: # pragma: nocover 

704 r"""Determine if a file is a ``ZIP`` archive. 

705 

706 Args: 

707 path (str): Full path to the file to be tested. 

708 

709 Tip: 

710 As Python wheel files are `ZIP-format archives <zip-wheel_>`_ 

711 (per PEP-491), this function can be used to test wheel files as 

712 well. 

713 

714 :Example: 

715 

716 Test if a file is a ZIP archive:: 

717 

718 >>> from utils4 import utils 

719 

720 >>> utils.iszip('/path/to/file.zip') 

721 True 

722 

723 Test if a file is a true Python wheel:: 

724 

725 >>> from utils4 import utils 

726 

727 >>> utils.iszip('/path/to/sphinx-8.1.3-py3-none-any.whl') 

728 True 

729 

730 Note: 

731 A file is tested to be a ``ZIP`` archive by checking the 

732 `first four bytes <zip-format_>`_ of the file itself, *not* 

733 using the file extension. 

734 

735 It is up to the caller to handle empty or spanned ZIP 

736 archives appropriately. 

737 

738 :Design: 

739 This method calls the :func:`futils.iszip` function with the 

740 given arguments. 

741 

742 Returns: 

743 bool: True if the first four bytes of the file match any of 

744 the below. Otherwise, False. 

745 

746 - ``\x50\x4b\x03\x04``: 'Standard' archive 

747 - ``\x50\x4b\x05\x06``: Empty archive 

748 - ``\x50\x4b\x07\x08``: Spanned archive 

749 

750 .. _zip-format: https://en.wikipedia.org/wiki/ZIP_(file_format)#Local_file_header 

751 .. _zip-wheel: https://peps.python.org/pep-0491/#abstract 

752 

753 """ 

754 return bool(futils.iszip(path)) 

755 

756def ping(server: str, count: int=1, timeout: int=5, verbose: bool=False) -> bool: 

757 r"""Ping an IP address, server or web address. 

758 

759 Args: 

760 server (str): IP address, server name or web address. 

761 count (int, optional): The number of ping attempts. Defaults to 1. 

762 timeout (int, optional): Number of seconds to wait for response. 

763 Defaults to 5. 

764 verbose (bool, optional): Display all stdout and/or stderr output, if 

765 the returned status code is non-zero. Defaults to False. 

766 

767 :Design: 

768 Using the platform's native ``ping`` command (via a ``subprocess`` 

769 call) the host is pinged, and a boolean value is returned to the 

770 caller to indicate if the ping was successful. 

771 

772 A ping status: 

773 

774 - 0 returns True 

775 - Non-zero returns False 

776 

777 If the server name is preceeded by ``\\`` or ``//``, these are 

778 stripped out using the built-in :func:`os.path.basename()` function. 

779 

780 :Example: 

781 

782 Ping the local PC at 127.0.0.1:: 

783 

784 >>> from utils4 import utils 

785 

786 >>> utils.ping(server='127.0.0.1') 

787 True 

788 

789 

790 Ping an unknown server:: 

791 

792 >>> from utils4 import utils 

793 

794 >>> utils.ping(server='//S3DHOST01', verbose=True) 

795 

796 [PingError]: 

797 ping: S3DHOST01: Temporary failure in name resolution 

798 False 

799 

800 

801 Ping an unreachable IP address:: 

802 

803 >>> from utils4 import utils 

804 

805 >>> utils.ping(server='192.168.0.99', count=3, verbose=True) 

806 

807 [PingError]: 

808 PING 192.168.0.99 (192.168.0.99) 56(84) bytes of data. 

809 From 192.168.0.XX icmp_seq=1 Destination Host Unreachable 

810 From 192.168.0.XX icmp_seq=2 Destination Host Unreachable 

811 From 192.168.0.XX icmp_seq=3 Destination Host Unreachable 

812 

813 --- 192.168.0.99 ping statistics --- 

814 3 packets transmitted, 0 received, +3 errors, 100% packet loss, time 2037ms 

815 False 

816 

817 Returns: 

818 bool: True if the ping was successful, otherwise False. 

819 

820 """ 

821 cmd = [] 

822 server = os.path.basename(server) 

823 status = 1 

824 stdout = None 

825 stderr = None 

826 _os = get_os() 

827 if 'win' in _os: # pragma: nocover # utils4 will *rarely* ever be tested on Windows. 

828 timeout *= 1000 # Windows timeout (-w) is in milliseconds. 

829 cmd = ['ping', '-n', str(count), '-w', str(timeout), server] 

830 elif 'lin' in _os: 

831 cmd = ['ping', f'-c{count}', f'-W{timeout}', server] 

832 else: # pragma: nocover 

833 ui.print_alert('\nProcess aborted, unsupported OS.\n' 

834 f'- OS identified as: {_os}\n') 

835 if cmd: 

836 with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc: 

837 stdout, stderr = proc.communicate() 

838 status = proc.returncode 

839 if ('win' in _os) & (b'Destination host unreachable' in stdout): # pragma nocover 

840 # Hard code status if host is unreachable. 

841 # Generally, this will return 0, so it must be overridden. 

842 status = 1 

843 if all([verbose, cmd, status != 0]): 

844 ui.print_alert('\n[PingError]:') 

845 if stdout: 

846 ui.print_alert(text=stdout.decode().strip()) 

847 if stderr: 

848 ui.print_alert(text=stderr.decode().strip()) 

849 return status == 0 

850 

851def strip_ansi_colour(text: str): 

852 r"""Strip ANSI colour sequences from a string. 

853 

854 Args: 

855 text (str): Text string to be cleaned. 

856 

857 Note: 

858 This method is *very* basic and only caters to colour sequences. 

859 

860 It is designed to yield all characters that are not part of the 

861 ``\x1b`` sequence start, and the ``m`` sequence end. In other 

862 words, all text before and after each ``\x1b[M;Nm`` sequence. 

863 

864 :Example: 

865 

866 Strip the colouring sequence from terminal text and return a 

867 single string:: 

868 

869 clean = ''.join(strip_ansi_colour(text)) 

870 

871 Strip the colouring sequence from terminal text and return a list 

872 of lines, with empty lines removed:: 

873 

874 lines = list(filter(None, ''.join(strip_ansi_colour(text)).split('\n'))) 

875 

876 Yields: 

877 str: Each character which not part of the ANSI escape sequence 

878 is yielded to the caller. Essentially, this is a generator 

879 method. 

880 

881 """ 

882 # pylint: disable=multiple-statements 

883 buff = io.StringIO(text) 

884 while (b := buff.read(1)): 

885 if b == '\x1b': 

886 while ( b := buff.read(1) ) != 'm': continue # Fast-forward from \x1b to m. 

887 else: 

888 yield b 

889 

890def testimport(module_name: str, verbose: bool=True) -> bool: 

891 """Test if a Python library is installed. 

892 

893 Args: 

894 module_name (str): Exact name of the module to be found. 

895 verbose (bool, optional): Notify if the library is not installed. 

896 Defaults to True. 

897 

898 :Design: 

899 This is a small helper function designed to test if a library is 

900 installed before trying to import it. 

901 

902 If the library is not intalled the user is notified, if the ``verbose`` 

903 argument is True. 

904 

905 :Internal Use: 

906 For example, the :meth:`~utils.getdrivername` function uses this 

907 function before attempting to import the ``pyodbc`` library. 

908 

909 :Example: 

910 

911 Execute a path only if ``mymodule`` is installed:: 

912 

913 >>> from utils4 import utils 

914 

915 >>> if utils.testimport('mymodule', verbose=True): 

916 >>> import mymodule 

917 >>> ... 

918 >>> else: 

919 >>> ... 

920 

921 Returns: 

922 bool: True if the library is installed, otherwise False. 

923 

924 """ 

925 found = False 

926 if importlib.util.find_spec(module_name): 

927 found = True 

928 if (verbose) & (not found): 

929 ui.print_warning(f'\nLibrary/module not installed: {module_name}') 

930 return found 

931 

932def unidecode(string: str, **kwargs) -> str: 

933 """Attempt to convert a Unicode string object into a 7-bit ASCII string. 

934 

935 Args: 

936 string (str): The string to be decoded. 

937 **kwargs (dict): Keyword arguments passed directly into the underlying 

938 :func:`unidecode.unidecode` function. 

939 

940 :Design: 

941 This function is a light wrapper around the :func:`unidecode.unidecode` 

942 function. 

943 

944 **Per the** ``unicode`` **docstring:** 

945 

946 "Transliterate an Unicode object into an ASCII string." 

947 

948 Example:: 

949 

950 >>> unidecode(u"北亰") 

951 "Bei Jing " 

952 

953 "This function first tries to convert the string using ASCII codec. 

954 If it fails (because of non-ASCII characters), it falls back to 

955 transliteration using the character tables." 

956 

957 "This is approx. five times faster if the string only contains ASCII 

958 characters, but slightly slower than 

959 :func:`unidecode.unicode_expect_nonascii` if non-ASCII characters are 

960 present." 

961 

962 :Dependencies: 

963 

964 - ``unidecode`` library 

965 

966 :Example: 

967 

968 Convert a Polish address into pure ASCII:: 

969 

970 >>> from utils4 import utils 

971 

972 >>> addr = 'ul. Bałtów 8a 27-423 Bałtów, woj. świętokrzyskie' 

973 >>> utils.unidecode(addr) 

974 'ul. Baltow 8a 27-423 Baltow, woj. swietokrzyskie' 

975 

976 

977 Convert the first line of 'The Seventh Letter', by Plato:: 

978 

979 >>> from utils4 import utils 

980 

981 >>> text = 'Πλάτων τοῖς Δίωνος οἰκείοις τε καὶ ἑταίροις εὖ πράττειν.' 

982 >>> utils.unidecode(text) 

983 'Platon tois Dionos oikeiois te kai etairois eu prattein.' 

984 

985 Returns: 

986 str: If the ``unidecode`` library is installed and the passed 

987 ``string`` value is a ``str`` data type, the decoded string is 

988 returned, otherwise the original value is returned. 

989 

990 """ 

991 # pylint: disable=redefined-outer-name # No adverse effects and keeps clear variable name. 

992 if testimport(module_name='unidecode', verbose=True): 

993 import unidecode as unidecode_ 

994 decoded = unidecode_.unidecode(string, **kwargs) if isinstance(string, str) else string 

995 else: # pragma: nocover 

996 decoded = string 

997 return decoded