Module multiformats.multihash.raw
Implementation of raw hash functions used by multihash multicodecs.
Hash functions are implemented using the following libraries:
Core functionality is provided by the exists()
and get()
functions, which can be used to check
whether an implementatino with given name is known, and if so to get the corresponding pair
of hash function and max digest size:
>>> multihash.hashfun.exists("sha2-256")
True
>>> multihash.hashfun.get("sha2-256")
(<function _hashlib_sha.<locals>.hashfun at 0x0000013F4A3C6160>, 32)
The hash functions take a single bytes
input (the data) and return a bytes
output (the hash digest).
The max digest sizes (if not None
) are used to sense-check hash digests passed to wrap()
and/or obtained from unwrap()
: telling whether a digest has been generated by a hash function
is deemed to be computationally unfeasible in general, but hash digests of length greater than the max digest size
can always be discounted as invalid.
Expand source code
"""
Implementation of raw hash functions used by multihash multicodecs.
Hash functions are implemented using the following libraries:
- [`hashlib`](https://docs.python.org/3/library/hashlib.html)
- [`pyskein`](https://pythonhosted.org/pyskein/)
Core functionality is provided by the `exists` and `get` functions, which can be used to check
whether an implementatino with given name is known, and if so to get the corresponding pair
of hash function and max digest size:
```py
>>> multihash.hashfun.exists("sha2-256")
True
>>> multihash.hashfun.get("sha2-256")
(<function _hashlib_sha.<locals>.hashfun at 0x0000013F4A3C6160>, 32)
```
The hash functions take a single `bytes` input (the data) and return a `bytes` output (the hash digest).
The max digest sizes (if not `None`) are used to sense-check hash digests passed to `multiformats.multihash.wrap`
and/or obtained from `multiformats.multihash.unwrap`: telling whether a digest has been generated by a hash function
is deemed to be computationally unfeasible in general, but hash digests of length greater than the max digest size
can always be discounted as invalid.
"""
import hashlib
from typing import Callable, Dict, Optional, Tuple
from typing_validation import validate
import skein # type: ignore
from multiformats import multicodec
from multiformats.varint import BytesLike
from . import err
Hashfun = Callable[[BytesLike], bytes]
_hashfun: Dict[str, Tuple[Hashfun, Optional[int]]] = {}
MultihashImpl = Tuple[Hashfun, Optional[int]]
def get(name: str) -> MultihashImpl:
"""
Given a multihash multicodec name, returns its implementation as a pair of a hash function
and a max digest size (possibly `None`).
Raises `err.KeyError` if no implementation is available for this name.
```py
>>> multihash.hashfun.get("sha2-256")
(<function _hashlib_sha.<locals>.hashfun at 0x0000013F4A3C6160>, 32)
```
"""
validate(name, str)
if name not in _hashfun:
raise err.KeyError(f"No implementation for multihash multicodec {repr(name)}.")
return _hashfun[name]
def exists(name: str) -> bool:
"""
Checks whether the multihash multicodec with given name has an implementation.
```py
>>> multihash.hashfun.exists("sha2-256")
True
```
"""
validate(name, str)
return name in _hashfun
def register(name: str, hashfun: Hashfun, digest_size: Optional[int], *, overwrite: bool = False) -> None:
"""
Registers a hash function and hash digest size implementing the multihash multicodec with given name,
which must already exist.
The optional keyword argument `overwrite` (default: `False`) can be used to overwrite an existing implementation.
If `overwrite` is `False`, raises `err.ValueError` if an implementation the same name already exists.
Example usage (from the source code of this module):
```py
register("sha1", _hashlib_sha(1), 20) # max digest size is 20 bytes, i.e. 160 bits
register(f"sha2-256", _hashlib_sha(2, 256), 256//8)
```
"""
validate(name, str)
# validate(hashfun, Hashfun) # TODO: not yet supported by typing-validation
validate(digest_size, Optional[int])
validate(overwrite, bool)
if digest_size is not None and digest_size <= 0:
raise err.ValueError("Digest size must be positive or None.")
if not overwrite and name in _hashfun:
raise err.ValueError(f"An implementation for the multihash multicodec named {repr(name)} already exists.")
if name not in _hashfun:
multihash = multicodec.get(name)
if multihash.tag != "multihash":
raise err.ValueError(f"Multicodec '{multihash.name}' exists, but it is not a multihash multicodec.")
_hashfun[name] = (hashfun, digest_size)
def unregister(name: str) -> None:
"""
Unregisters a raw encoding by name.
Raises `err.KeyError` if no such raw encoding exists.
"""
validate(name, str)
if name not in _hashfun:
raise err.KeyError(f"There is no implementation for multihash multicodec with name {repr(name)}.")
del _hashfun[name]
def _identity(data: BytesLike) -> bytes:
validate(data, BytesLike)
return bytes(data)
register("identity", _identity, None)
def _hashlib_sha(version: int, digest_bits: Optional[int] = None) -> Hashfun:
name = ("sha1", f"sha{digest_bits}", f"sha3_{digest_bits}")[version-1]
h = getattr(hashlib, name)
def hashfun(data: BytesLike) -> bytes:
validate(data, BytesLike)
m: hashlib._Hash = h() # pylint: disable = no-member
m.update(data)
return m.digest()
return hashfun
register("sha1", _hashlib_sha(1), 20) # 20B = 160 bits
for digest_bits in (256, 512):
register(f"sha2-{digest_bits}", _hashlib_sha(2, digest_bits), digest_bits//8)
for digest_bits in (224, 256, 384, 512):
register(f"sha3-{digest_bits}", _hashlib_sha(3, digest_bits), digest_bits//8)
def _hashlib_shake(digest_bits: int) -> Hashfun:
h = getattr(hashlib, f"shake_{digest_bits//2}")
def hashfun(data: BytesLike) -> bytes:
validate(data, BytesLike)
m: hashlib._Hash = h() # pylint: disable = no-member
m.update(data)
return m.digest(digest_bits//8) # type: ignore
return hashfun
for digest_bits in (256, 512):
register(f"shake-{digest_bits//2}", _hashlib_shake(digest_bits), digest_bits//8)
def _hashlib_blake2(version: str, digest_bits: int) -> Hashfun:
h = getattr(hashlib, f"blake2{version}")
def hashfun(data: BytesLike) -> bytes:
validate(data, BytesLike)
m: hashlib._Hash = h(digest_size=digest_bits//8) # pylint: disable = no-member
m.update(data)
return m.digest()
return hashfun
for blake2_version in ("b", "s"):
for digest_bits in range(8, 513 if blake2_version == "b" else 257, 8):
register(f"blake2{blake2_version}-{digest_bits}", _hashlib_blake2(blake2_version, digest_bits), digest_bits//8)
def _skein(version: int, digest_bits: int) -> Hashfun:
h = getattr(skein, f"skein{version}")
def hashfun(data: BytesLike) -> bytes:
validate(data, BytesLike)
m: hashlib._Hash = h(digest_bits=digest_bits) # pylint: disable = no-member
m.update(data)
return m.digest()
return hashfun
for skein_version in (256, 512, 1024):
for digest_bits in range(8, skein_version+1, 8):
register(f"skein{skein_version}-{digest_bits}", _skein(skein_version, digest_bits), digest_bits//8)
Functions
def exists(name: str) ‑> bool
-
Checks whether the multihash multicodec with given name has an implementation.
>>> multihash.hashfun.exists("sha2-256") True
Expand source code
def exists(name: str) -> bool: """ Checks whether the multihash multicodec with given name has an implementation. ```py >>> multihash.hashfun.exists("sha2-256") True ``` """ validate(name, str) return name in _hashfun
def get(name: str) ‑> Tuple[Callable[[Union[bytes, bytearray, memoryview]], bytes], Optional[int]]
-
Given a multihash multicodec name, returns its implementation as a pair of a hash function and a max digest size (possibly
None
). Raiseserr.KeyError
if no implementation is available for this name.>>> multihash.hashfun.get("sha2-256") (<function _hashlib_sha.<locals>.hashfun at 0x0000013F4A3C6160>, 32)
Expand source code
def get(name: str) -> MultihashImpl: """ Given a multihash multicodec name, returns its implementation as a pair of a hash function and a max digest size (possibly `None`). Raises `err.KeyError` if no implementation is available for this name. ```py >>> multihash.hashfun.get("sha2-256") (<function _hashlib_sha.<locals>.hashfun at 0x0000013F4A3C6160>, 32) ``` """ validate(name, str) if name not in _hashfun: raise err.KeyError(f"No implementation for multihash multicodec {repr(name)}.") return _hashfun[name]
def register(name: str, hashfun: Callable[[Union[bytes, bytearray, memoryview]], bytes], digest_size: Optional[None], *, overwrite: bool = False) ‑> None
-
Registers a hash function and hash digest size implementing the multihash multicodec with given name, which must already exist.
The optional keyword argument
overwrite
(default:False
) can be used to overwrite an existing implementation. Ifoverwrite
isFalse
, raiseserr.ValueError
if an implementation the same name already exists.Example usage (from the source code of this module):
register("sha1", _hashlib_sha(1), 20) # max digest size is 20 bytes, i.e. 160 bits register(f"sha2-256", _hashlib_sha(2, 256), 256//8)
Expand source code
def register(name: str, hashfun: Hashfun, digest_size: Optional[int], *, overwrite: bool = False) -> None: """ Registers a hash function and hash digest size implementing the multihash multicodec with given name, which must already exist. The optional keyword argument `overwrite` (default: `False`) can be used to overwrite an existing implementation. If `overwrite` is `False`, raises `err.ValueError` if an implementation the same name already exists. Example usage (from the source code of this module): ```py register("sha1", _hashlib_sha(1), 20) # max digest size is 20 bytes, i.e. 160 bits register(f"sha2-256", _hashlib_sha(2, 256), 256//8) ``` """ validate(name, str) # validate(hashfun, Hashfun) # TODO: not yet supported by typing-validation validate(digest_size, Optional[int]) validate(overwrite, bool) if digest_size is not None and digest_size <= 0: raise err.ValueError("Digest size must be positive or None.") if not overwrite and name in _hashfun: raise err.ValueError(f"An implementation for the multihash multicodec named {repr(name)} already exists.") if name not in _hashfun: multihash = multicodec.get(name) if multihash.tag != "multihash": raise err.ValueError(f"Multicodec '{multihash.name}' exists, but it is not a multihash multicodec.") _hashfun[name] = (hashfun, digest_size)
def unregister(name: str) ‑> None
-
Unregisters a raw encoding by name. Raises
err.KeyError
if no such raw encoding exists.Expand source code
def unregister(name: str) -> None: """ Unregisters a raw encoding by name. Raises `err.KeyError` if no such raw encoding exists. """ validate(name, str) if name not in _hashfun: raise err.KeyError(f"There is no implementation for multihash multicodec with name {repr(name)}.") del _hashfun[name]