Module multiformats.multibase.raw
Implementation of raw data encodings used by multibase encodings.
The majority of the encodings is provided by the bases
library,
as instances of the BaseEncoding
class. The following custom encodings are also implemented:
- multibase identity
- multibase proquints
Core functionality is provided by the get()
and exists()
functions, which can be used to check
whether a raw encoding with given name is known, and if so to get the corresponding object:
>>> from multiformats.multibase import raw_encoding
>>> raw_encoding.exists("base10")
True
>>> raw_encoding.get("base10")
ZeropadBaseEncoding(StringAlphabet('0123456789'))
The raw encoding objects have encode
and decode
methods that can be used to
convert between bytestrings and strings (not including the multibase code):
>>> base16 = raw_encoding.get("base16")
>>> base16.encode(bytes([0xAB, 0xCD]))
'abcd'
>>> base16.decode('abcd')
b'\xab\xcd'
Expand source code
"""
Implementation of raw data encodings used by multibase encodings.
The majority of the encodings is provided by the [`bases`](https://github.com/hashberg-io/bases) library,
as instances of the `BaseEncoding` class. The following custom encodings are also implemented:
- multibase identity
- multibase proquints
Core functionality is provided by the `get` and `exists` functions, which can be used to check
whether a raw encoding with given name is known, and if so to get the corresponding object:
```py
>>> from multiformats.multibase import raw_encoding
>>> raw_encoding.exists("base10")
True
>>> raw_encoding.get("base10")
ZeropadBaseEncoding(StringAlphabet('0123456789'))
```
The raw encoding objects have `encode` and `decode` methods that can be used to
convert between bytestrings and strings (not including the multibase code):
```py
>>> base16 = raw_encoding.get("base16")
>>> base16.encode(bytes([0xAB, 0xCD]))
'abcd'
>>> base16.decode('abcd')
b'\\xab\\xcd'
```
"""
import binascii
from types import MappingProxyType
from typing import Callable, Dict, List, Union
from typing_validation import validate
from bases import (base2, base16, base8, base10, base36, base58btc, base58flickr,
base32, base32hex, base32z, base64, base64url,)
from bases.encoding import BaseEncoding
from multiformats.varint import BytesLike
from . import err
RawEncoder = Callable[[BytesLike], str]
RawDecoder = Callable[[str], bytes]
class CustomEncoding:
"""
Class for custom raw encodings, implemented by explicitly passing raw encoding and decoding functions.
The raw encoder and decoder are expected to validate their own arguments.
"""
_raw_encoder: Callable[[bytes], str]
_raw_decoder: Callable[[str], bytes]
def __init__(self, raw_encoder: Callable[[bytes], str], raw_decoder: Callable[[str], bytes]):
# validate(raw_encoder, Callable[[bytes], str]) # TODO: not yet supported by typing-validation
# validate(raw_decoder, Callable[[str], bytes]) # TODO: not yet supported by typing-validation
self._raw_encoder = raw_encoder # type: ignore
self._raw_decoder = raw_decoder # type: ignore
def encode(self, b: BytesLike) -> str:
"""
Calls the custom raw encoder.
"""
raw_encoder: Callable[[BytesLike], str] = self._raw_encoder # type: ignore
return raw_encoder(b)
def decode(self, s: str) -> bytes:
"""
Calls the custom raw decoder.
"""
raw_decoder: Callable[[str], bytes] = self._raw_decoder # type: ignore
return raw_decoder(s)
def __repr__(self) -> str:
_raw_encoder: Callable[[bytes], str] = self._raw_encoder # type: ignore
_raw_decoder: Callable[[str], bytes] = self._raw_decoder # type: ignore
return f"CustomEncoding({repr(_raw_encoder)}, {repr(_raw_decoder)})"
RawEncoding = Union[CustomEncoding, BaseEncoding]
_raw_encodings: Dict[str, RawEncoding] = {}
def get(name: str) -> RawEncoding:
"""
Gets the raw encoding with given name. Raises `err.KeyError` if no such encoding exists.
Example usage:
```py
>>> raw_encoding.get("base16")
ZeropadBaseEncoding(
StringAlphabet('0123456789abcdef',
case_sensitive=False),
block_nchars=2)
```
"""
validate(name, str)
if name not in _raw_encodings:
raise err.KeyError(f"No raw encoding named {repr(name)}.")
return _raw_encodings[name]
def exists(name: str) -> bool:
"""
Checks whether a raw encoding with given name exists.
Example usage:
```py
>>> raw_encoding.exists("base16")
True
```
"""
validate(name, str)
return name in _raw_encodings
def register(name: str, enc: RawEncoding, *, overwrite: bool = False) -> None:
"""
Registers a raw encoding by name. The optional keyword argument `overwrite` (default: `False`)
can be used to overwrite a multibase encoding with existing name.
If `overwrite` is `False`, raises `err.ValueError` if a raw encoding with the same name already exists.
Example usage:
```py
>>> from bases import base45
>>> raw_encoding.register("base45upper", base45)
>>> raw_encoding.get("base45upper")
BlockBaseEncoding(
StringAlphabet('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:',
case_sensitive=False),
block_size={1: 2, 2: 3}, reverse_blocks=True)
```
"""
validate(name, str)
validate(enc, RawEncoding)
validate(overwrite, bool)
if not overwrite and name in _raw_encodings:
raise err.ValueError(f"Raw encoding with name {repr(name)} already exists: {_raw_encodings[name]}")
_raw_encodings[name] = enc
def unregister(name: str) -> None:
"""
Unregisters a raw encoding by name.
Raises `err.KeyError` if no such raw encoding exists.
Example usage:
```py
>>> raw_encoding.unregister("base45upper")
>>> raw_encoding.exists("base45upper")
False
```
"""
validate(name, str)
if name not in _raw_encodings:
raise err.KeyError(f"Raw encoding with name {repr(name)} does not exist.")
del _raw_encodings[name]
def identity_raw_encoder(b: BytesLike) -> str:
"""
Implementation of the raw identity encoder according to the [multibase spec](https://github.com/multiformats/multibase/).
"""
if isinstance(b, (bytes, bytearray)):
return b.decode("utf-8") # type: ignore
validate(b, memoryview)
return bytes(b).decode("utf-8")
identity_raw_encoder.__repr__ = lambda: "identity_raw_encoder" # type: ignore
def identity_raw_decoder(s: str) -> bytes:
"""
Implementation of the raw identity decoder according to the [multibase spec](https://github.com/multiformats/multibase/).
"""
validate(s, str)
return s.encode("utf-8")
identity_raw_decoder.__repr__ = lambda: "identity_raw_decoder" # type: ignore
_proquint_consonants = "bdfghjklmnprstvz"
_proquint_consonants_set = frozenset("bdfghjklmnprstvz")
_proquint_vowels = "aiou"
_proquint_vowels_set = frozenset("aiou")
_proquint_consonants_revdir = MappingProxyType({char: idx for idx, char in enumerate(_proquint_consonants)})
_proquint_vowels_revdir = MappingProxyType({char: idx for idx, char in enumerate(_proquint_vowels)})
def proquint_raw_encoder(b: BytesLike) -> str:
"""
Implementation of the proquint encoder according to the [proquint spec](https://arxiv.org/html/0901.4016),
with additional 'ro-' prefix as prescribed by the [multibase spec](https://github.com/multiformats/multibase/)
and extended to include odd-length bytestrings (adding a final 3-letter block, using two zero pad bits).
"""
validate(b, BytesLike)
b = memoryview(b) # makes slicing cheap
consonants = _proquint_consonants
vowels = _proquint_vowels
char_blocks: List[str] = []
for idx in range(0, len(b), 2):
byte_block = b[idx: idx+2]
i = int.from_bytes(byte_block, byteorder="big")
if len(byte_block) == 2: # ordinary byte pair
i, c2 = divmod(i, 16) # 4 bits
i, v1 = divmod(i, 4) # 2 bits
i, c1 = divmod(i, 16) # 4 bits
i, v0 = divmod(i, 4) # 2 bits
i, c0 = divmod(i, 16) # 4 bits
assert i == 0
char_block = consonants[c0]+vowels[v0]+consonants[c1]+vowels[v1]+consonants[c2]
char_blocks.append(char_block)
else: # final byte for odd-length bytestrings
i <<= 2 # add 2 zero pad bits
i, c1 = divmod(i, 16) # 4 bits
i, v0 = divmod(i, 4) # 2 bits
i, c0 = divmod(i, 16) # 4 bits
assert i == 0
char_block = consonants[c0]+vowels[v0]+consonants[c1]
char_blocks.append(char_block)
prefix = "ro-" # follows multibase code "p" to make "pro-", e.g. "pro-lusab-babad"
return prefix+"-".join(char_blocks)
proquint_raw_encoder.__repr__ = lambda: "proquint_raw_encoder" # type: ignore
def proquint_raw_decoder(s: str) -> bytes:
"""
Implementation of the proquint decoder according to the [proquint spec](https://arxiv.org/html/0901.4016),
with additional 'ro-' prefix as prescribed by the [multibase spec](https://github.com/multiformats/multibase/)
and extended to include odd-length bytestrings (adding a final 3-letter block, using two zero pad bits).
"""
# pylint: disable = too-many-branches
validate(s, str)
consonants = _proquint_consonants
vowels = _proquint_vowels
consonants_set = _proquint_consonants_set
vowels_set = _proquint_vowels_set
consonants_revdir = _proquint_consonants_revdir
vowels_revdir = _proquint_vowels_revdir
# validate string
if not s.startswith("ro-"):
raise binascii.Error("Multibase proquint encoded strings must start with 'ro-'.")
# remove 'ro-' prefix, return empty bytestring if resultant string is empty
s = s[3:]
if len(s) == 0:
return b""
# validate length for patterns cvcvc (len 5), cvcvc-...-cvc (len 6k+3) or cvcvc-...-cvcvc (len 6k+5)
if len(s) % 6 not in (3, 5):
raise binascii.Error("Proquint encoded string length must give remainder of 3 or 5 when divided by 6.")
# validate characters and convert encoded string into unsigned integer
i = 0
for idx, char in enumerate(s):
if idx % 6 == 5: # separator
if char != "-":
raise binascii.Error(f"Incorrect char at position {idx}: expected '-', found {repr(char)}.")
elif idx % 2 == 0: # consonant
if char not in consonants_set:
raise binascii.Error(f"Incorrect char at position {idx}: expected consonant in {repr(consonants)}, "
f"found {repr(char)}.")
i <<= 4 # make space for 4 bits
i += consonants_revdir[char] # insert consonant bits
else: # vowel
if char not in vowels_set:
raise binascii.Error(f"Incorrect char at position {idx}: expected vowel in {repr(vowels)}, "
f"found {repr(char)}.")
i <<= 2 # make space for 2 bits
i += vowels_revdir[char] # insert vowel bits
# set number of bytes to number of quintuplets
nbytes = 2*((len(s)+1)//6)
# deal with the case of terminating tripled (odd bytestring length)
if len(s) % 6 == 3:
# ensure pad bits are zero
i, pad_bits = divmod(i, 4)
if pad_bits != 0:
raise binascii.Error(f"Expected pad bits to be 00, found {bin(pad_bits)[2:]} instead.")
# add an extra byte
nbytes += 1
# convert unsigned integer to bytes and return
return i.to_bytes(nbytes, byteorder="big")
proquint_raw_decoder.__repr__ = lambda: "proquint_raw_decoder" # type: ignore
# custom encodings
register("identity", CustomEncoding(identity_raw_encoder, identity_raw_decoder))
register("proquint", CustomEncoding(proquint_raw_encoder, proquint_raw_decoder))
# base encodings
register("base2", base2)
register("base8", base8)
register("base10", base10)
register("base16", base16.lower())
register("base16upper", base16)
register("base32hex", base32hex.nopad().lower())
register("base32hexupper", base32hex.nopad())
register("base32hexpad", base32hex.lower())
register("base32hexpadupper", base32hex)
register("base32", base32.nopad().lower())
register("base32upper", base32.nopad())
register("base32pad", base32.lower())
register("base32padupper", base32)
register("base32z", base32z)
register("base36", base36.lower())
register("base36upper", base36)
register("base58btc", base58btc)
register("base58flickr", base58flickr)
register("base64", base64.nopad())
register("base64pad", base64)
register("base64url", base64url.nopad())
register("base64urlpad", base64url)
# additional docs info
__pdoc__ = {
"identity_raw_encoder": False, # exclude from docs
"identity_raw_decoder": False, # exclude from docs
"proquint_raw_encoder": False, # exclude from docs
"proquint_raw_decoder": False, # exclude from docs
}
Functions
def exists(name: str) ‑> bool
-
Checks whether a raw encoding with given name exists.
Example usage:
>>> raw_encoding.exists("base16") True
Expand source code
def exists(name: str) -> bool: """ Checks whether a raw encoding with given name exists. Example usage: ```py >>> raw_encoding.exists("base16") True ``` """ validate(name, str) return name in _raw_encodings
def get(name: str) ‑> Union[CustomEncoding, bases.encoding.base.BaseEncoding]
-
Gets the raw encoding with given name. Raises
err.KeyError
if no such encoding exists.Example usage:
>>> raw_encoding.get("base16") ZeropadBaseEncoding( StringAlphabet('0123456789abcdef', case_sensitive=False), block_nchars=2)
Expand source code
def get(name: str) -> RawEncoding: """ Gets the raw encoding with given name. Raises `err.KeyError` if no such encoding exists. Example usage: ```py >>> raw_encoding.get("base16") ZeropadBaseEncoding( StringAlphabet('0123456789abcdef', case_sensitive=False), block_nchars=2) ``` """ validate(name, str) if name not in _raw_encodings: raise err.KeyError(f"No raw encoding named {repr(name)}.") return _raw_encodings[name]
def register(name: str, enc: Union[CustomEncoding, bases.encoding.base.BaseEncoding], *, overwrite: bool = False) ‑> None
-
Registers a raw encoding by name. The optional keyword argument
overwrite
(default:False
) can be used to overwrite a multibase encoding with existing name.If
overwrite
isFalse
, raiseserr.ValueError
if a raw encoding with the same name already exists.Example usage:
>>> from bases import base45 >>> raw_encoding.register("base45upper", base45) >>> raw_encoding.get("base45upper") BlockBaseEncoding( StringAlphabet('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:', case_sensitive=False), block_size={1: 2, 2: 3}, reverse_blocks=True)
Expand source code
def register(name: str, enc: RawEncoding, *, overwrite: bool = False) -> None: """ Registers a raw encoding by name. The optional keyword argument `overwrite` (default: `False`) can be used to overwrite a multibase encoding with existing name. If `overwrite` is `False`, raises `err.ValueError` if a raw encoding with the same name already exists. Example usage: ```py >>> from bases import base45 >>> raw_encoding.register("base45upper", base45) >>> raw_encoding.get("base45upper") BlockBaseEncoding( StringAlphabet('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:', case_sensitive=False), block_size={1: 2, 2: 3}, reverse_blocks=True) ``` """ validate(name, str) validate(enc, RawEncoding) validate(overwrite, bool) if not overwrite and name in _raw_encodings: raise err.ValueError(f"Raw encoding with name {repr(name)} already exists: {_raw_encodings[name]}") _raw_encodings[name] = enc
def unregister(name: str) ‑> None
-
Unregisters a raw encoding by name. Raises
err.KeyError
if no such raw encoding exists.Example usage:
>>> raw_encoding.unregister("base45upper") >>> raw_encoding.exists("base45upper") False
Expand source code
def unregister(name: str) -> None: """ Unregisters a raw encoding by name. Raises `err.KeyError` if no such raw encoding exists. Example usage: ```py >>> raw_encoding.unregister("base45upper") >>> raw_encoding.exists("base45upper") False ``` """ validate(name, str) if name not in _raw_encodings: raise err.KeyError(f"Raw encoding with name {repr(name)} does not exist.") del _raw_encodings[name]
Classes
class CustomEncoding (raw_encoder: Callable[[bytes], str], raw_decoder: Callable[[str], bytes])
-
Class for custom raw encodings, implemented by explicitly passing raw encoding and decoding functions. The raw encoder and decoder are expected to validate their own arguments.
Expand source code
class CustomEncoding: """ Class for custom raw encodings, implemented by explicitly passing raw encoding and decoding functions. The raw encoder and decoder are expected to validate their own arguments. """ _raw_encoder: Callable[[bytes], str] _raw_decoder: Callable[[str], bytes] def __init__(self, raw_encoder: Callable[[bytes], str], raw_decoder: Callable[[str], bytes]): # validate(raw_encoder, Callable[[bytes], str]) # TODO: not yet supported by typing-validation # validate(raw_decoder, Callable[[str], bytes]) # TODO: not yet supported by typing-validation self._raw_encoder = raw_encoder # type: ignore self._raw_decoder = raw_decoder # type: ignore def encode(self, b: BytesLike) -> str: """ Calls the custom raw encoder. """ raw_encoder: Callable[[BytesLike], str] = self._raw_encoder # type: ignore return raw_encoder(b) def decode(self, s: str) -> bytes: """ Calls the custom raw decoder. """ raw_decoder: Callable[[str], bytes] = self._raw_decoder # type: ignore return raw_decoder(s) def __repr__(self) -> str: _raw_encoder: Callable[[bytes], str] = self._raw_encoder # type: ignore _raw_decoder: Callable[[str], bytes] = self._raw_decoder # type: ignore return f"CustomEncoding({repr(_raw_encoder)}, {repr(_raw_decoder)})"
Methods
def decode(self, s: str) ‑> bytes
-
Calls the custom raw decoder.
Expand source code
def decode(self, s: str) -> bytes: """ Calls the custom raw decoder. """ raw_decoder: Callable[[str], bytes] = self._raw_decoder # type: ignore return raw_decoder(s)
def encode(self, b: Union[bytes, bytearray, memoryview]) ‑> str
-
Calls the custom raw encoder.
Expand source code
def encode(self, b: BytesLike) -> str: """ Calls the custom raw encoder. """ raw_encoder: Callable[[BytesLike], str] = self._raw_encoder # type: ignore return raw_encoder(b)