caellion-python-commons
file_hashing.py
Go to the documentation of this file.
1 """!
2 This module provides utilities related to hashing files
3 """
4 
5 import hashlib
6 
7 
9  """!
10  This exception is raised whenever provided algorithm is not one of allowed algorithms.
11  """
12 
13  pass
14 
15 
16 class AlreadyDoneException(Exception):
17  """!
18  This exception is raised when hashing was requested on a FileHasher that has already finished hashing.
19  """
20 
21  pass
22 
23 
24 class NotDoneYetException(Exception):
25  """!
26  This exception is raised when a hash was requested from a FileHasher that has not hashed any file yet.
27  """
28 
29  pass
30 
31 
32 class FileHasher:
33  """!
34  This class provides hashlib-based file hasher
35  """
36 
37 
38  hashlib_instance = None
39 
40  islocked = False
41 
42  digest = None
43 
44  hexdigest = None
45 
46  def __init__(self, algo=None):
47  """!
48  Initializes hasher
49 
50  Currently supported hashes: SHA1, SHA256, SHA384, SHA512, MD5 (not recommended)
51 
52  @param algo String with name of algorithm to use
53 
54  @throws UnsupportedAlgorithmException when attempting to use unsupported algorithm
55  """
56  if algo is None or algo.lower() == "" or algo.lower() == "sha1":
57  self.hashlib_instancehashlib_instance = hashlib.sha1()
58  elif algo.lower() == "sha256":
59  self.hashlib_instancehashlib_instance = hashlib.sha256()
60  elif algo.lower() == "sha384":
61  self.hashlib_instancehashlib_instance = hashlib.sha384()
62  elif algo.lower() == "sha512":
63  self.hashlib_instancehashlib_instance = hashlib.sha512()
64  elif algo.lower() == "md5":
65  self.hashlib_instancehashlib_instance = hashlib.md5()
66  else:
67  raise UnsupportedAlgorithmException("'%s' is not allowed as a hashing algorithm." % (algo,))
68 
69  def hash_file(self, file):
70  """!
71  Hashes file-object with currently chosen algorithm
72 
73  @param file file-object to be hashed
74 
75  @throws AlreadyDoneException if this FileHasher is already done hashing a different file
76  """
77  if self.islockedislockedislocked:
78  raise AlreadyDoneException()
79  b = bytearray(128 * 1024)
80  mv = memoryview(b)
81  for n in iter(lambda: file.readinto(mv), 0):
82  self.hashlib_instancehashlib_instance.update(mv[:n])
83  self.digestdigest = self.hashlib_instancehashlib_instance.digest()
84  self.hexdigesthexdigest = self.hashlib_instancehashlib_instance.hexdigest()
85  self.islockedislockedislocked = True
86 
87  def hash_path(self, path):
88  """!
89  Hashes file-object with currently chosen algorithm
90 
91  @param path path to file to be hashed
92 
93  @throws AlreadyDoneException if this FileHasher is already done hashing a different file
94  """
95  if self.islockedislockedislocked:
96  raise AlreadyDoneException()
97  b = bytearray(128 * 1024)
98  mv = memoryview(b)
99  with open(path, "rb", buffering=0) as f:
100  for n in iter(lambda: f.readinto(mv), 0):
101  self.hashlib_instancehashlib_instance.update(mv[:n])
102  self.digestdigest = self.hashlib_instancehashlib_instance.digest()
103  self.hexdigesthexdigest = self.hashlib_instancehashlib_instance.hexdigest()
104  self.islockedislockedislocked = True
105 
106  def get_hexdigest(self):
107  """!
108  Returns hexadecimal value of hash from this object.
109 
110  @returns hexadecimal string representation of hash
111 
112  @throws NotDoneYetException if no file was hashed with this FileHasher.
113  """
114  if self.islockedislockedislocked:
115  return self.hexdigesthexdigest
116  else:
117  raise NotDoneYetException()
118 
119  def get_hashbytes(self):
120  """!
121  Returns hexadecimal value of hash from this object.
122 
123  @returns bytes of hash
124 
125  @throws NotDoneYetException if no file was hashed with this FileHasher.
126  """
127  if self.islockedislockedislocked:
128  return self.digestdigest
129  else:
130  raise NotDoneYetException()
131 
133  """!
134  Hashes a file-object and returns hash
135 
136  @param file file-object to be hashed
137 
138  @returns bytes of hash
139 
140  @throws AlreadyDoneException if this FileHasher is already done hashing a different file
141  """
142  self.hash_filehash_file(file)
143  return self.get_hashbytesget_hashbytes()
144 
146  """!
147  Hashes a file at path and returns hash
148 
149  @param path path to the file to be hashed
150 
151  @returns bytes of hash
152 
153  @throws AlreadyDoneException if this FileHasher is already done hashing a different file
154  """
155  self.hash_pathhash_path(path)
156  return self.get_hashbytesget_hashbytes()
157 
159  """!
160  Hashes a file-object and returns hash
161 
162  @param file file-object to be hashed
163 
164  @returns hexadecimal string representation of hash
165 
166  @throws AlreadyDoneException if this FileHasher is already done hashing a different file
167  """
168  self.hash_filehash_file(file)
169  return self.get_hexdigestget_hexdigest()
170 
172  """!
173  Hashes a file at path and returns hash
174 
175  @param path path to the file to be hashed
176 
177  @returns hexadecimal string representation of hash
178 
179  @throws AlreadyDoneException if this FileHasher is already done hashing a different file
180  """
181  self.hash_pathhash_path(path)
182  return self.get_hexdigestget_hexdigest()
This exception is raised when hashing was requested on a FileHasher that has already finished hashing...
Definition: file_hashing.py:16
This class provides hashlib-based file hasher.
Definition: file_hashing.py:32
def hash_file_and_get_hashbytes(self, file)
Hashes a file-object and returns hash.
def hash_path_and_get_hashbytes(self, path)
Hashes a file at path and returns hash.
def hash_file_and_get_hexdigest(self, file)
Hashes a file-object and returns hash.
def hash_path_and_get_hexdigest(self, path)
Hashes a file at path and returns hash.
def get_hashbytes(self)
Returns hexadecimal value of hash from this object.
def hash_file(self, file)
Hashes file-object with currently chosen algorithm.
Definition: file_hashing.py:69
def get_hexdigest(self)
Returns hexadecimal value of hash from this object.
def __init__(self, algo=None)
Initializes hasher.
Definition: file_hashing.py:46
def hash_path(self, path)
Hashes file-object with currently chosen algorithm.
Definition: file_hashing.py:87
bool islocked
Is this FileHasher done hashing a file.
Definition: file_hashing.py:40
hashlib_instance
Instance of hashlib algorithm implementation.
Definition: file_hashing.py:38
This exception is raised when a hash was requested from a FileHasher that has not hashed any file yet...
Definition: file_hashing.py:24
This exception is raised whenever provided algorithm is not one of allowed algorithms.
Definition: file_hashing.py:8