Coverage for /var/devmt/py/utils4_1.7.0/utils4/srccheck.py: 100%
80 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-21 17:18 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-21 17:18 +0000
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3"""
4:Purpose: This module is used to perform checksum calculations on a
5 collection of files to verify if the checksum *calculated* on each
6 file matches the *expected* checksum value.
8 In practical terms, an application can call the
9 :meth:`~SourceCheck.check` method by passing a list of filepaths
10 to be checksummed, along with a reference file (containing the
11 expected checksums). If the checksum values match the reference
12 file, a value of ``True`` is returned to the caller application,
13 signaling the inspected source code files have *not* been modified
14 and are 'safe' for use. Otherwise, a value of ``False`` is
15 returned to the caller the filenames of each failing file are
16 printed to the terminal.
18:Platform: Linux/Windows | Python 3.7+
19:Developer: J Berendt
20:Email: development@s3dev.uk
22:Comments: n/a
24:Example usage:
26 Generate an *un-encrypted* reference file::
28 >>> from utils4.srccheck import srccheck
30 >>> files = ['list.c', 'of.py', 'files.sql']
31 >>> srccheck.generate(filepaths=files, encrypt=False)
34 Verify checksums from within an application, with an *un-encrypted*
35 reference file::
37 >>> from utils4.srccheck import srccheck
39 >>> srccheck.check(ref_file='path/to/srccheck.ref')
40 True
42 Generate an **encrypted** reference file::
44 >>> from utils4.srccheck import srccheck
46 >>> files = ['list.c', 'of.py', 'files.sql']
47 >>> srccheck.generate(filepaths=files, encrypt=True)
50 Verify checksums from within an application, with an *encrypted* reference
51 file::
53 >>> from utils4.srccheck import srccheck
55 >>> srccheck.check(ref_file='path/to/srccheck.ref',
56 key_file='path/to/srccheck.key')
57 True
60 **Advanced usage:**
62 If you wish to *delay the output* of mismatched files (to give the caller
63 application display control), the caller can redirected the output from
64 the :meth:`~SourceCheck.check` method into a buffer and display at a more
65 appropriate time. For example::
67 >>> from contextlib import redirect_stdout
68 >>> from io import StringIO
69 >>> from utils4.srccheck import srccheck
71 >>> buff = StringIO()
72 >>> with redirect_stdout(buff):
73 >>> test = srccheck.check(ref_file='path/to/srccheck.ref')
75 >>> # ...
77 >>> if not test:
78 >>> print(buff.getvalue())
79 >>> buff.close()
81 Checksum verification has failed for the following:
82 - 02-01_first.c
83 - 10-09_ptr_exchange.c
84 - 06-ex07.c
85 - 15-ex05_col_output.c
86 - 02-03_multi_lines.c
88"""
89# pylint: disable=wrong-import-order
91import json
92import os
93import pickle
94import sys
95import uuid
96from cryptography import fernet
97from typing import List
98from utils4.crypto import crypto
101class SourceCheck:
102 """Verify source code checksums values are as expected."""
104 def check(self, ref_file: str, key_file: str='') -> bool:
105 """Verify the provided source code file checksums are as expected.
107 If any checksum do not match, the names of those files are reported
108 to the terminal.
110 Args:
111 ref_file (str): Full path to the reference file containing the
112 full paths to the file(s) to be tested and the associated
113 checksum value(s).
114 key_file (str, optional): Full path to the key file. If a key file
115 is not provided, the method assumes the reference file is in
116 plaintext CSV and does not attempt to decrypt.
117 Defaults to ''.
119 Note:
120 If the ``key_file`` argument is *not* provided, it is assumed the
121 ``ref_file`` is a plaintext CSV file, and decryption is *not*
122 attempted.
124 If the ``key_file`` argument *is* provided, it is assumed the
125 ``ref_file`` has been encrypted, and decryption is carried out.
127 Raises:
128 FileNotFoundError: If either the reference file, or key file do
129 not exist.
131 Returns:
132 bool: True if all file's checksum values agree with the checksum
133 listed in the reference file; otherwise False.
135 """
136 # pylint: disable=no-else-return
137 if not os.path.exists(ref_file):
138 raise FileNotFoundError(f'Reference file not found: {ref_file}')
139 if all([key_file, not os.path.exists(key_file)]):
140 raise FileNotFoundError(f'Key file not found: {key_file}')
141 if key_file:
142 # Decrypt reference file.
143 with open(ref_file, 'rb') as rfp:
144 data = pickle.load(rfp)
145 with open(key_file, 'rb') as kfp:
146 f = fernet.Fernet(kfp.read())
147 ref = json.loads(f.decrypt(data).decode())
148 else:
149 # Read plaintext reference file.
150 ref = {}
151 with open(ref_file, 'r', encoding='utf-8') as rfp:
152 for line in rfp:
153 ref.update([line.strip().split(',')])
154 chksums = self._checksum(files=ref.keys())
155 # Object check for quick validation.
156 if chksums == ref:
157 return True
158 else:
159 self._report_mismatches(checksums=chksums, reference=ref)
160 return False
162 def generate(self, filepaths: List[str], encrypt: bool=False):
163 """Generate the reference file containing the source file checksums,
164 and the associated key file.
166 Args:
167 filepaths (list[str]): A list of full paths which are to be
168 included in the reference file.
169 encrypt (bool, optional): Encrypt the reference file and generate
170 a key file. Defaults to False.
172 :Reference File:
174 **If unencrypted:**
176 The reference file is a flat, plaintext CSV file with the file
177 path as the first field and the checksum value as the second field.
179 For example::
181 filepath_01,md5_hash_string_01
182 filepath_02,md5_hash_string_02
183 filepath_03,md5_hash_string_03
184 ...
185 filepath_NN,md5_hash_string_NN
187 **If encrypted:**
189 The reference file contains is a serialised, encrypted
190 representation of the full path and associated checksum value for
191 all provided files, in JSON format. This data is written to the
192 ``srccheck.ref`` file.
194 A unique encryption key is created and stored with *each* call to
195 this method, and stored to the ``srccheck.key`` file.
197 To perform checks, both the reference file *and* the key file must
198 be provided to the :meth:`~check` method.
200 .. note:: These files are a **pair**. If one file is lost, the
201 other file is useless.
203 :Layout:
205 **If encrypted:**
207 The layout of the *deserialised* and *decrypted* reference file is
208 in basic JSON format, with the filename as the keys, and checksum
209 values as the values.
211 For example::
213 {"filepath_01": "md5_hash_string_01",
214 "filepath_02": "md5_hash_string_02",
215 "filepath_03": "md5_hash_string_03",
216 ...,
217 "filepath_NN": "md5_hash_string_NN"}
219 Raises:
220 FileNotFoundError: If any of the files provided to the
221 ``filepaths`` argument do not exist.
223 """
224 if not self._all_files_exist(files=filepaths):
225 raise FileNotFoundError('The files listed above were not found.')
226 op_ref, op_key = self._build_outpaths()
227 chksums = self._checksum(files=filepaths)
228 if encrypt:
229 key = crypto.b64(uuid.uuid4().hex, decode=False)
230 with open(op_key, 'wb') as kfp:
231 kfp.write(key)
232 f = fernet.Fernet(key=key)
233 with open(op_ref, 'wb') as rfp:
234 pickle.dump(f.encrypt(json.dumps(chksums).encode()), rfp)
235 print('\nComplete.\nThe reference and key files are available on your desktop.')
236 else:
237 with open(op_ref, 'w', encoding='utf-8') as rfp:
238 for k, v in chksums.items():
239 rfp.write(f'{k},{v}\n')
240 print('\nComplete.\nThe reference file is available on your desktop.')
242 @staticmethod
243 def _all_files_exist(files: list) -> bool:
244 """Verify all provided files exist.
246 If any file does not exist, the user is alerted via the terminal and a
247 ``FileNotFoundError`` exception is raised by the caller.
249 Args:
250 files (list): List of files to be tested.
252 Returns:
253 bool: True, if all files exist, otherwise False.
255 """
256 # pylint: disable=consider-using-f-string
257 success = True
258 nexist = []
259 for f in files:
260 if not os.path.exists(f):
261 nexist.append(f)
262 success = False
263 if nexist:
264 print('\nThe following files do not exist:')
265 print(*map(' - {}'.format, nexist), sep='\n')
266 print('')
267 return success
269 @staticmethod
270 def _build_outpaths() -> tuple:
271 """Build the output path to the reference and key files.
273 Returns:
274 tuple: Full path to the reference and key files as::
276 ('fname.ref', 'fname.key')
278 """
279 _os = sys.platform.lower()
280 fn_ref = 'srccheck.ref'
281 fn_key = 'srccheck.key'
282 if 'win' in _os: # pragma nocover
283 desk = os.path.join(os.environ.get('USERPROFILE'), 'Desktop')
284 elif 'lin' in _os:
285 desk = os.path.join(os.environ.get('HOME'), 'Desktop')
286 else: # pragma nocover
287 raise NotImplementedError(f'Not a currently supported OS: {_os}')
288 return os.path.join(desk, fn_ref), os.path.join(desk, fn_key)
290 @staticmethod
291 def _checksum(files: list) -> dict:
292 """Calculate checksum for all passed files.
294 Args:
295 files (list): List of full paths against which a checksum is to be
296 calculated.
298 Returns:
299 dict: A dictionary containing the filename and checksum for all
300 passed files, as::
302 {'fname_01': 'checksum_hash_01',
303 'fname_02': 'checksum_hash_02',
304 'fname_03': 'checksum_hash_03',
305 ...,
306 'fname_NN': 'checksum_hash_NN'}
308 """
309 return {f: crypto.checksum_md5(path=f) for f in files}
311 @staticmethod
312 def _report_mismatches(checksums: dict, reference: dict):
313 """Report the files for which the checksums do not match.
315 Args:
316 checksums (dict): A dictionary containing the recently calculated
317 checksums.
318 reference (dict): A dictionary containing the *expected* checksums.
320 """
321 # pylint: disable=consider-using-f-string
322 m = []
323 for k, v in reference.items():
324 if checksums.get(k) != v:
325 m.append(os.path.basename(k))
326 print('\nChecksum verification has failed for the following:')
327 print(*map('- {}'.format, m), sep='\n')
328 print('')
331srccheck = SourceCheck()