Module multiformats.multibase
Implementation of the multibase spec.
Expand source code
"""
Implementation of the [multibase spec](https://github.com/multiformats/multibase).
"""
from abc import ABC, abstractmethod
import base64
import binascii
import csv
from dataclasses import dataclass, field, InitVar
from importlib import resources
from itertools import product
import re
from typing import Callable, cast, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple, Union
import base58
RawEncoder = Callable[[bytes], str]
RawDecoder = Callable[[str], bytes]
@dataclass(frozen=True)
class Encoding:
"""
Dataclass for a multibase encoding.
Example usage:
Encoding.from_json({'encoding': 'base8', 'code': '7', 'status': 'draft', 'description': 'octal'})
```py
>>> Encoding.from_json({
... 'encoding': 'base8', 'code': '7',
... 'status': 'draft', 'description': 'octal'})
Encoding(encoding='base8', code='7',
status='draft', description='octal')
```
Direct instantiation should be avoided: it is field-order dependent
and might change without warning in the future.
"""
encoding: str
"""
Encoding name. Must satisfy the following:
```py
re.match(r"^[a-z][a-z0-9_-]+$", name)
```
"""
code: str
""" Encoding code. Must be a single ASCII character.
More specifically, it must be a single unicode codepoint satisfying:
```py
ord(code) in range(0x00, 0x80)
```
"""
status: str
""" Encoding status. Must be 'draft', 'candidate' or 'default'."""
description: str
""" Encoding description. """
def __post_init__(self):
if not re.match(r"^[a-z][a-z0-9_-]+$", self.name): # ensure len(name) > 1
raise ValueError(f"Invalid multibase encoding name {repr(self.name)}")
if self.status not in ("draft", "candidate", "default"):
raise ValueError(f"Invalid multibase encoding status {repr(self.status)}.")
if len(self.code) != 1:
raise ValueError(f"Invalid multibase encoding code {repr(self.code)} (length != 1).")
if ord(self.code) not in range(0x00, 0x80):
codepoint = hex(ord(self.code))[2:]
if len(codepoint) % 2 != 0:
codepoint = "0"+codepoint
raise ValueError(f"Invalid multibase encoding code '\\x{codepoint}'")
@property
def name(self) -> str:
"""
An alias for the `Encoding.encoding` attribute.
Using `Encoding.name` over `Encoding.encoding` is preferred, both for uniformity
with the [multicodec spec](https://github.com/multiformats/multicodec) and
to avoid potential confusion between encoding names (strings) and encoding
objects (instances of `Encoding`).
"""
return self.encoding
@property
def raw_encoder(self) -> RawEncoder:
"""
Returns the raw encoder for this encoding:
given bytes, it produces the encoded string without the multibase prefix.
"""
if self.name not in _encoder_table:
raise NotImplementedError(f"Encoding using {self.name} is not yet implemented.")
return _encoder_table[self.name]
@property
def raw_decoder(self) -> RawDecoder:
"""
Returns the raw encoder for this encoding:
given a string without the multibase prefix, it produces the decoded data.
"""
if self.name not in _decoder_table:
raise NotImplementedError(f"Decoding using {self.name} is not yet implemented.")
return _decoder_table[self.name]
def encode(self, data: bytes) -> str:
"""
Encodes bytes into a multibase string: it first uses `Encoding.raw_encoder`,
and then prepends the multibase prefix given by `Encoding.code` and returns
the resulting multibase string.
Example usage:
```py
```
"""
return self.code+self.raw_encoder(data)
def decode(self, data: str) -> bytes:
"""
Decodes a multibase string into bytes: it first checks that the multibase
prefix matches the value specified by `Encoding.code`, then uses
`Encoding.raw_encoder` on the string without prefix and returns the bytes.
Example usage:
```py
```
"""
if data[0] != self.code:
raise ValueError(f"Expected {repr(self.name)} encoding, "
f"found {repr(get(data[0]).name)} encoding instead.")
return self.raw_decoder(data[1:])
def to_json(self) -> Mapping[str, str]:
"""
Returns a JSON dictionary representation of this `Encoding` object,
compatible with the one from the multibase.csv table found in the
[multibase spec](https://github.com/multiformats/multibase).
Example usage:
```py
```
"""
code = self.code
if code not in range(0x20, 0x7F):
code = hex(ord(code))
return {
"encoding": self.encoding,
"code": code,
"status": self.status,
"description": self.description
}
@staticmethod
def from_json(multibase_encoding: Mapping[str, Union[str, int]]) -> "Encoding":
"""
Creates an `Encoding` object from a JSON dictionary representation
compatible with the one from the multibase.csv table found in the
[multibase spec](https://github.com/multiformats/multibase).
Example usage:
```py
```
"""
encoding = multibase_encoding["encoding"]
code = multibase_encoding["code"]
status = multibase_encoding["status"]
description = multibase_encoding["description"]
if not isinstance(encoding, str):
raise TypeError(f"Expected string, found {encoding = }.")
if not isinstance(status, str):
raise TypeError(f"Expected string, found {status = }.")
if not isinstance(description, str):
raise TypeError(f"Expected string, found {description = }.")
if not isinstance(code, str):
raise TypeError(f"Expected string, found {code = }")
if code.startswith("0x"):
code = chr(int(code, base=16))
return Encoding(encoding, code, status, description)
def get(name_or_code: str) -> Encoding:
"""
Gets the multibase encoding with given name (if a string of length >= 2 is passed)
or multibase code (if a string of length 1 is passed). Raises `ValueError` if the
empty string is passed. Raises `KeyError` if no such encoding exists.
Example usage:
```py
>>> multibase.get("base8")
Encoding(encoding='base8', code='7',
status='draft', description='octal')
>>> multibase.get('t')
Encoding(encoding='base32hexpad', code='t', status='candidate',
description='rfc4648 case-insensitive - with padding')
```
"""
if len(name_or_code) == 1:
code = name_or_code
if code not in _code_table:
raise KeyError(f"No multibase encoding with code {repr(code)}.")
return _code_table[code]
if len(name_or_code) == 0:
raise ValueError("Empty string is neither a name nor a code.")
name = name_or_code
if name not in _name_table:
raise KeyError(f"No multibase encoding named {repr(name)}.")
return _name_table[name]
def exists(name_or_code: str) -> bool:
"""
Checks whether a multibase encoding with given name (if a string of length >= 2 is passed)
or multibase code (if a string of length 1 is passed) exists. Raises `ValueError` if the
empty string is passed.
Example usage:
```py
>>> multibase.exists("base8")
True
>>> multibase.exists('t')
True
```
"""
if len(name_or_code) == 1:
code = name_or_code
return code in _code_table
if len(name_or_code) == 0:
raise ValueError("Empty string is neither a name nor a code.")
name = name_or_code
return name in _name_table
def table() -> Iterator[Encoding]:
"""
Iterates through the registered encodings, in order of ascending code.
Example usage:
```py
>>> [e.code for e in multibase.table()]
['\\x00', '0', '7', '9', 'B', 'C', 'F', 'K', 'M', 'T', 'U', 'V',
'Z','b', 'c', 'f', 'h', 'k', 'm', 'p', 't', 'u', 'v', 'z']
```
"""
for code in sorted(_code_table.keys()):
yield _code_table[code]
def encoding_of(data: str) -> Encoding:
"""
Returns the multibase encoding for the data, according to the code specified by its first character.
Raises `ValueError` if the empty string is passed.
Raises `KeyError` if no encoding exists with that code.
Example usage:
```py
>>> multibase.encoding_of("mSGVsbG8gd29ybGQh")
Encoding(encoding='base64', code='m', status='default',
description='rfc4648 no padding')
```
"""
if len(data) == 0:
raise ValueError("Empty string is not valid for encoded data.")
code = data[0]
return get(code)
def encode(data: bytes, encoding: Union[str, "Encoding"]) -> str:
"""
Encodes the given bytes into a multibase string using the given encoding.
If the encoding is passed by name or code (i.e. as a string), the `get`
function is used to retrieve it. Encoding is performed by `Encoding.encode`.
Example usage:
```py
>>> multibase.encode(b"Hello world!", "base64")
'mSGVsbG8gd29ybGQh'
```
"""
if isinstance(encoding, str):
name_or_code = encoding
encoding = get(name_or_code)
return encoding.encode(data)
def decode(data: str) -> bytes:
"""
Decodes the given multibase string into bytes.
The encoding is inferred using the `encoding_of` function.
Decoding is then performed by `Encoding.decode`.
Example usage:
```py
>>> multibase.decode("mSGVsbG8gd29ybGQh")
b'Hello world!'
```
"""
encoding = encoding_of(data)
return encoding.decode(data)
def build_multibase_tables(encodings: Iterable[Encoding]) -> Tuple[Dict[str, Encoding], Dict[str, Encoding]]:
"""
Creates code->encoding and name->encoding mappings from a finite iterable of encodings, returning the mappings.
Raises `ValueError` if the same encoding code or name is encountered multiple times
Example usage:
```py
code_table, name_table = build_multicodec_tables(encodings)
```
"""
code_table: Dict[str, Encoding] = {}
name_table: Dict[str, Encoding] = {}
for e in encodings:
if e.code in code_table:
raise ValueError(f"Multicodec name {e.name} appears multiple times in table.")
code_table[e.code] = e
if e.name in name_table:
raise ValueError(f"Multicodec name {e.name} appears multiple times in table.")
name_table[e.name] = e
return code_table, name_table
# Create the global code->multicodec and name->multicodec mappings.
# _code_table: Dict[str, Encoding] = {}
# _name_table: Dict[str, Encoding] = {}
with resources.open_text("multiformats", "multibase-table.csv") as csv_table:
reader = csv.DictReader(csv_table)
multicodecs = (Encoding.from_json({k.strip(): v.strip() for k, v in _row.items()})
for _row in reader)
_code_table, _name_table = build_multibase_tables(multicodecs)
class RawEncoding(ABC):
@abstractmethod
def encode(self, b: bytes) -> str:
...
@abstractmethod
def decode(self, s: str) -> bytes:
...
class CustomRawEncoding(RawEncoding):
_raw_encoder: Callable[[bytes], str]
_raw_decoder: Callable[[str], bytes]
def __init__(self, raw_encoder: Callable[[bytes], str], raw_decoder: Callable[[str], bytes]):
self._raw_encoder = raw_encoder # type: ignore
self._raw_decoder = raw_decoder # type: ignore
def encode(self, b: bytes) -> str:
raw_encoder: Callable[[bytes], str] = self.raw_encoder # type: ignore
return raw_encoder(b)
def decode(self, s: str) -> bytes:
raw_decoder: Callable[[str], bytes] = self.raw_decoder # type: ignore
return raw_decoder(s)
class AlphabeticRawEncoding(RawEncoding):
"""
Class for raw encodings analogous to base64, base32 and base16,
described by [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html).
The constructor takes three positional parameters:
1. `alphabet: str` is the encoding alphabet. It must not be empty, nor contain repeated characters.
Characters allowed are all printable ASCII characters, except for the delete character, i.e.
```all(ord(c) not in range(0x20, 0x7F) for c in alphabet)```
2. `group_nchars: int` is the number of characters in a group. Must be a positive integer.
For example, the value in base64 is 4, while the value in base32 is 8.
3. `group_nbytes: int` is the number of bytes in a group. Must be a positive integer.
For example, the value in base64 is 3, while the value in base32 is 5.
The constructor takes the following keyword-only arguments, all optional:
- `pad_char: Optional[str] = None` is an optional character to be used as padding.
For example, the value in both base64 and base32 is `"="`.
If `None`, a `binascii.Error` will be raised upon encoding if the encoded string requires padding.
If `include_padding` is `False`, this has no effect upon encoding; however, the specified padding character
is still counted as a legal character at the end of a string being decoded.
- `include_padding: bool = True` determines whether padding will be included in encoded strings.
If `False`, the result of `AlphabeticRawEncoding.encode` might be a string of incorrect length according
to [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html) (i.e. one with length which is not a miltiple of `group_nchars`).
- `require_padding: bool = False` determines whether strings with an incorrect length (i.e. one with length which is
not a multiple of `group_nchars`) should be rejected on decoding (raising `binascii.Error`).
- `require_exact_bytes: bool = False` determines whether bytestrings with an incorrect length (i.e. one with length which is
not a multiple of `group_nbytes`) should be rejected on encoding (raising `binascii.Error`).
"""
# pylint: disable = too-many-instance-attributes
_alphabet: str
_group_nchars: int
_group_nbytes: int
_pad_char: Optional[str]
_include_padding: bool
_require_padding: bool
_require_exact_bytes: bool
_lookup_table: Optional[Dict[bytes, str]]
def __init__(self, alphabet: str, group_nchars: int, group_nbytes: int, *,
pad_char: Optional[str] = None, include_padding: bool = True, require_padding: bool = False,
require_exact_bytes: bool = False, lookup_table: bool = False):
if len(alphabet) == 0:
raise ValueError("Empty alphabet not allowed.")
if len(alphabet) != len(set(alphabet)):
raise ValueError("Repeated letters in the alphabet.")
for c in alphabet:
if ord(c) not in range(0x20, 0x7F):
codepoint = hex(ord(c))[2:]
if len(codepoint) % 2 != 0:
codepoint = "0"+codepoint
raise ValueError(f"Allowed characters are 0x20-0x7E (inclusive), found '\\x{codepoint}'")
if group_nbytes <= 0:
raise ValueError("Number of bytes in a group must be a positive integer.")
if group_nchars <= 0:
raise ValueError("Number of chars in a group must be a positive integer.")
group_nbits = 8*group_nbytes
if group_nbits%group_nchars != 0:
raise ValueError(f"Number of bits in a group ({group_nbits}) is not divisible "
f"by number of characters in a group ({group_nchars})")
if pad_char is not None and len(pad_char) != 1:
raise ValueError("If specified, padding character must be a string object of length 1.")
self._alphabet = alphabet
self._group_nchars = group_nchars
self._group_nbytes = group_nbytes
self._pad_char = pad_char
self._include_padding = include_padding
self._require_padding = require_padding
self._require_exact_bytes = require_exact_bytes
self._lookup_table = None
if lookup_table:
_lookup_table = {}
for i in range(2**group_nbytes):
byte_group = i.to_bytes(group_nbytes, byteorder="big")
try:
_lookup_table[byte_group] = self.encode(byte_group)
except binascii.Error:
pass
self._lookup_table = _lookup_table
def encode(self, b: bytes) -> str:
# pylint: disable = too-many-locals
# extract encoding parameters
group_nbytes = self._group_nbytes
group_nchars = self._group_nchars
pad_char = self._pad_char
char_nbits = 8*group_nbytes//group_nchars
char_bitmask = 2**char_nbits
alphabet = self._alphabet
lookup_table = self._lookup_table
# compute padded length for bytes
l = len(b)
padding = 0
zero_bytes = group_nbytes-l%group_nbytes
if l%group_nbytes != 0:
l += zero_bytes
padding = zero_bytes*8//char_nbits
if padding > 0 and pad_char is None:
raise binascii.Error(f"Padding of length {padding} required, but no padding character.")
# main loop: compute chars of encoded string
chars: List[str] = []
for b_idx in range(0, l, group_nbytes):
# extract the next group of bytes, padding if necessary
if b_idx+group_nbytes <= len(b):
byte_group = b[b_idx:b_idx+group_nbytes]
else:
byte_group = b[b_idx:]+b"\x00"*zero_bytes
if lookup_table is not None:
# get characters from precomputed lookup table
if byte_group not in lookup_table:
raise binascii.Error(f"Invalid byte group #{b_idx}.")
chars.append(lookup_table[byte_group])
else:
# transform bytes into integer for easier bitwise manipulation
group_int = int.from_bytes(byte_group, byteorder="big")
# compute group characters, in reverse order
revchars: List[str] = []
for j in range(group_nchars):
if j < padding:
# if padding > 0, pad_char is not None
revchars.append(cast(str, pad_char))
else:
alphabet_idx = group_int%char_bitmask
if alphabet_idx >= len(alphabet):
raise binascii.Error(f"Invalid byte group #{b_idx}.")
revchars.append(alphabet[alphabet_idx])
group_int >>= char_nbits
# append group characters to string, in correct order
chars.append("".join(reversed(revchars)))
# create and return encoded string
return "".join(chars)
def decode(self, s: str) -> bytes:
...
_alphabets = {
"base16": "0123456789ABCDEF",
"base32": "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
"base32hex": "0123456789ABCDEFGHIJKLMNOPQRSTUV",
"base64": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
"base64url": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
}
_raw_encodings: Dict[str, RawEncoding] = {
"identity": CustomRawEncoding(lambda b: b.decode("utf-8"), lambda s: s.encode("utf-8")),
"base16": AlphabeticRawEncoding(_alphabets["base32"], 2, 1, lookup_table=True),
"base32": AlphabeticRawEncoding(_alphabets["base32"], 8, 5, pad_char="="),
"base32hex": AlphabeticRawEncoding(_alphabets["base32hex"], 8, 5, pad_char="="),
"base64": AlphabeticRawEncoding(_alphabets["base64"], 4, 3, pad_char="="),
"base64url": AlphabeticRawEncoding(_alphabets["base64url"], 4, 3, pad_char="="),
}
# Utility functions
_b32pad = lambda s: s+"="*(8-len(s)%8) if len(s)%8!=0 else s
_b64pad = lambda s: s+"="*(4-len(s)%4) if len(s)%4!=0 else s
_b32nopad = lambda s: s.rstrip("=")
_b64nopad = _b32nopad
# Creates table of raw encoders
_encoder_table: Dict[str, RawEncoder] = {}
# identity encoder
_encoder_table["identity"] = lambda b: b.decode("utf-8")
# base16 encoders
for _upper in ["", "upper"]:
encoder = lambda b: base64.b16encode(b).decode("utf-8")
# encoder = _raw_encodings["base16"].encode
if not _upper:
encoder = lambda b: encoder(b).lower()
_encoder_table[f"base16{_upper}"] = encoder
# base32 encoders
for _hex, _pad, _upper in product(["", "hex"], ["", "pad"], ["", "upper"]):
if _hex:
# base64.b32hexencode is available starting from Python 3.10
encoder = _raw_encodings["base32hex"].encode
else:
encoder = lambda b: base64.b32encode(b).decode("utf-8")
# encoder = _raw_encodings["base32"].encode
if not _pad:
encoder = lambda b: _b32nopad(encoder(b))
if not _upper:
encoder = lambda b: encoder(b).lower()
_encoder_table[f"base32{_hex}{_pad}{_upper}"] = encoder
# print(_encoder_table["base32"])
# base64 encoders
for _url, _pad in product(["", "url"], ["", "pad"]):
if _url:
encoder = lambda b: base64.urlsafe_b64encode(b).decode("utf-8")
else:
encoder = lambda b: base64.standard_b64encode(b).decode("utf-8")
if not _pad:
encoder = lambda b: _b64nopad(encoder(b))
_encoder_table[f"base64{_url}{_pad}"] = encoder
# Creates table of raw decoders
_decoder_table: Dict[str, RawDecoder] = {}
# identity decoder
_decoder_table["identity"] = lambda s: s.encode("utf-8")
# base16 decoders
for _upper in ["", "upper"]:
decoder = lambda s: base64.b16decode(s.encode("utf-8"), casefold=True)
_decoder_table[f"base16{_upper}"] = decoder
# base32 decoders
for _hex, _pad, _upper in product(["", "hex"], ["", "pad"], ["", "upper"]):
decoder = lambda s: base64.b32decode(_b32pad(s).encode("utf-8"), casefold=True)
_decoder_table[f"base32{_hex}{_pad}{_upper}"] = decoder
# base64 decoders
for _url, _pad in product(["", "url"], ["", "pad"]):
if _url:
decoder = lambda s: base64.urlsafe_b64decode(_b64pad(s).encode("utf-8"))
else:
decoder = lambda s: base64.standard_b64decode(_b64pad(s).encode("utf-8"))
_decoder_table[f"base64{_url}{_pad}"] = decoder
# base58 decoders
Functions
def build_multibase_tables(encodings: Iterable[Encoding]) ‑> Tuple[Dict[str, Encoding], Dict[str, Encoding]]
-
Creates code->encoding and name->encoding mappings from a finite iterable of encodings, returning the mappings.
Raises
ValueError
if the same encoding code or name is encountered multiple timesExample usage:
code_table, name_table = build_multicodec_tables(encodings)
Expand source code
def build_multibase_tables(encodings: Iterable[Encoding]) -> Tuple[Dict[str, Encoding], Dict[str, Encoding]]: """ Creates code->encoding and name->encoding mappings from a finite iterable of encodings, returning the mappings. Raises `ValueError` if the same encoding code or name is encountered multiple times Example usage: ```py code_table, name_table = build_multicodec_tables(encodings) ``` """ code_table: Dict[str, Encoding] = {} name_table: Dict[str, Encoding] = {} for e in encodings: if e.code in code_table: raise ValueError(f"Multicodec name {e.name} appears multiple times in table.") code_table[e.code] = e if e.name in name_table: raise ValueError(f"Multicodec name {e.name} appears multiple times in table.") name_table[e.name] = e return code_table, name_table
def decode(data: str) ‑> bytes
-
Decodes the given multibase string into bytes. The encoding is inferred using the
encoding_of()
function. Decoding is then performed byEncoding.decode()
.Example usage:
>>> multibase.decode("mSGVsbG8gd29ybGQh") b'Hello world!'
Expand source code
def decode(data: str) -> bytes: """ Decodes the given multibase string into bytes. The encoding is inferred using the `encoding_of` function. Decoding is then performed by `Encoding.decode`. Example usage: ```py >>> multibase.decode("mSGVsbG8gd29ybGQh") b'Hello world!' ``` """ encoding = encoding_of(data) return encoding.decode(data)
def decoder(s)
-
Expand source code
decoder = lambda s: base64.urlsafe_b64decode(_b64pad(s).encode("utf-8"))
def encode(data: bytes, encoding: Union[str, ForwardRef('Encoding')]) ‑> str
-
Encodes the given bytes into a multibase string using the given encoding. If the encoding is passed by name or code (i.e. as a string), the
get()
function is used to retrieve it. Encoding is performed byEncoding.encode()
.Example usage:
>>> multibase.encode(b"Hello world!", "base64") 'mSGVsbG8gd29ybGQh'
Expand source code
def encode(data: bytes, encoding: Union[str, "Encoding"]) -> str: """ Encodes the given bytes into a multibase string using the given encoding. If the encoding is passed by name or code (i.e. as a string), the `get` function is used to retrieve it. Encoding is performed by `Encoding.encode`. Example usage: ```py >>> multibase.encode(b"Hello world!", "base64") 'mSGVsbG8gd29ybGQh' ``` """ if isinstance(encoding, str): name_or_code = encoding encoding = get(name_or_code) return encoding.encode(data)
def encoder(b)
-
Expand source code
encoder = lambda b: base64.urlsafe_b64encode(b).decode("utf-8")
def encoding_of(data: str) ‑> Encoding
-
Returns the multibase encoding for the data, according to the code specified by its first character. Raises
ValueError
if the empty string is passed. RaisesKeyError
if no encoding exists with that code.Example usage:
>>> multibase.encoding_of("mSGVsbG8gd29ybGQh") Encoding(encoding='base64', code='m', status='default', description='rfc4648 no padding')
Expand source code
def encoding_of(data: str) -> Encoding: """ Returns the multibase encoding for the data, according to the code specified by its first character. Raises `ValueError` if the empty string is passed. Raises `KeyError` if no encoding exists with that code. Example usage: ```py >>> multibase.encoding_of("mSGVsbG8gd29ybGQh") Encoding(encoding='base64', code='m', status='default', description='rfc4648 no padding') ``` """ if len(data) == 0: raise ValueError("Empty string is not valid for encoded data.") code = data[0] return get(code)
def exists(name_or_code: str) ‑> bool
-
Checks whether a multibase encoding with given name (if a string of length >= 2 is passed) or multibase code (if a string of length 1 is passed) exists. Raises
ValueError
if the empty string is passed.Example usage:
>>> multibase.exists("base8") True >>> multibase.exists('t') True
Expand source code
def exists(name_or_code: str) -> bool: """ Checks whether a multibase encoding with given name (if a string of length >= 2 is passed) or multibase code (if a string of length 1 is passed) exists. Raises `ValueError` if the empty string is passed. Example usage: ```py >>> multibase.exists("base8") True >>> multibase.exists('t') True ``` """ if len(name_or_code) == 1: code = name_or_code return code in _code_table if len(name_or_code) == 0: raise ValueError("Empty string is neither a name nor a code.") name = name_or_code return name in _name_table
def get(name_or_code: str) ‑> Encoding
-
Gets the multibase encoding with given name (if a string of length >= 2 is passed) or multibase code (if a string of length 1 is passed). Raises
ValueError
if the empty string is passed. RaisesKeyError
if no such encoding exists.Example usage:
>>> multibase.get("base8") Encoding(encoding='base8', code='7', status='draft', description='octal') >>> multibase.get('t') Encoding(encoding='base32hexpad', code='t', status='candidate', description='rfc4648 case-insensitive - with padding')
Expand source code
def get(name_or_code: str) -> Encoding: """ Gets the multibase encoding with given name (if a string of length >= 2 is passed) or multibase code (if a string of length 1 is passed). Raises `ValueError` if the empty string is passed. Raises `KeyError` if no such encoding exists. Example usage: ```py >>> multibase.get("base8") Encoding(encoding='base8', code='7', status='draft', description='octal') >>> multibase.get('t') Encoding(encoding='base32hexpad', code='t', status='candidate', description='rfc4648 case-insensitive - with padding') ``` """ if len(name_or_code) == 1: code = name_or_code if code not in _code_table: raise KeyError(f"No multibase encoding with code {repr(code)}.") return _code_table[code] if len(name_or_code) == 0: raise ValueError("Empty string is neither a name nor a code.") name = name_or_code if name not in _name_table: raise KeyError(f"No multibase encoding named {repr(name)}.") return _name_table[name]
def table() ‑> Iterator[Encoding]
-
Iterates through the registered encodings, in order of ascending code.
Example usage:
>>> [e.code for e in multibase.table()] ['\x00', '0', '7', '9', 'B', 'C', 'F', 'K', 'M', 'T', 'U', 'V', 'Z','b', 'c', 'f', 'h', 'k', 'm', 'p', 't', 'u', 'v', 'z']
Expand source code
def table() -> Iterator[Encoding]: """ Iterates through the registered encodings, in order of ascending code. Example usage: ```py >>> [e.code for e in multibase.table()] ['\\x00', '0', '7', '9', 'B', 'C', 'F', 'K', 'M', 'T', 'U', 'V', 'Z','b', 'c', 'f', 'h', 'k', 'm', 'p', 't', 'u', 'v', 'z'] ``` """ for code in sorted(_code_table.keys()): yield _code_table[code]
Classes
class AlphabeticRawEncoding (alphabet: str, group_nchars: int, group_nbytes: int, *, pad_char: Optional[str] = None, include_padding: bool = True, require_padding: bool = False, require_exact_bytes: bool = False, lookup_table: bool = False)
-
Class for raw encodings analogous to base64, base32 and base16, described by rfc4648.
The constructor takes three positional parameters:
alphabet: str
is the encoding alphabet. It must not be empty, nor contain repeated characters. Characters allowed are all printable ASCII characters, except for the delete character, i.e.all(ord(c) not in range(0x20, 0x7F) for c in alphabet)
group_nchars: int
is the number of characters in a group. Must be a positive integer. For example, the value in base64 is 4, while the value in base32 is 8.group_nbytes: int
is the number of bytes in a group. Must be a positive integer. For example, the value in base64 is 3, while the value in base32 is 5.
The constructor takes the following keyword-only arguments, all optional:
pad_char: Optional[str] = None
is an optional character to be used as padding. For example, the value in both base64 and base32 is"="
. IfNone
, abinascii.Error
will be raised upon encoding if the encoded string requires padding. Ifinclude_padding
isFalse
, this has no effect upon encoding; however, the specified padding character is still counted as a legal character at the end of a string being decoded.include_padding: bool = True
determines whether padding will be included in encoded strings. IfFalse
, the result ofAlphabeticRawEncoding.encode()
might be a string of incorrect length according to rfc4648 (i.e. one with length which is not a miltiple ofgroup_nchars
).require_padding: bool = False
determines whether strings with an incorrect length (i.e. one with length which is not a multiple ofgroup_nchars
) should be rejected on decoding (raisingbinascii.Error
).require_exact_bytes: bool = False
determines whether bytestrings with an incorrect length (i.e. one with length which is not a multiple ofgroup_nbytes
) should be rejected on encoding (raisingbinascii.Error
).
Expand source code
class AlphabeticRawEncoding(RawEncoding): """ Class for raw encodings analogous to base64, base32 and base16, described by [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html). The constructor takes three positional parameters: 1. `alphabet: str` is the encoding alphabet. It must not be empty, nor contain repeated characters. Characters allowed are all printable ASCII characters, except for the delete character, i.e. ```all(ord(c) not in range(0x20, 0x7F) for c in alphabet)``` 2. `group_nchars: int` is the number of characters in a group. Must be a positive integer. For example, the value in base64 is 4, while the value in base32 is 8. 3. `group_nbytes: int` is the number of bytes in a group. Must be a positive integer. For example, the value in base64 is 3, while the value in base32 is 5. The constructor takes the following keyword-only arguments, all optional: - `pad_char: Optional[str] = None` is an optional character to be used as padding. For example, the value in both base64 and base32 is `"="`. If `None`, a `binascii.Error` will be raised upon encoding if the encoded string requires padding. If `include_padding` is `False`, this has no effect upon encoding; however, the specified padding character is still counted as a legal character at the end of a string being decoded. - `include_padding: bool = True` determines whether padding will be included in encoded strings. If `False`, the result of `AlphabeticRawEncoding.encode` might be a string of incorrect length according to [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html) (i.e. one with length which is not a miltiple of `group_nchars`). - `require_padding: bool = False` determines whether strings with an incorrect length (i.e. one with length which is not a multiple of `group_nchars`) should be rejected on decoding (raising `binascii.Error`). - `require_exact_bytes: bool = False` determines whether bytestrings with an incorrect length (i.e. one with length which is not a multiple of `group_nbytes`) should be rejected on encoding (raising `binascii.Error`). """ # pylint: disable = too-many-instance-attributes _alphabet: str _group_nchars: int _group_nbytes: int _pad_char: Optional[str] _include_padding: bool _require_padding: bool _require_exact_bytes: bool _lookup_table: Optional[Dict[bytes, str]] def __init__(self, alphabet: str, group_nchars: int, group_nbytes: int, *, pad_char: Optional[str] = None, include_padding: bool = True, require_padding: bool = False, require_exact_bytes: bool = False, lookup_table: bool = False): if len(alphabet) == 0: raise ValueError("Empty alphabet not allowed.") if len(alphabet) != len(set(alphabet)): raise ValueError("Repeated letters in the alphabet.") for c in alphabet: if ord(c) not in range(0x20, 0x7F): codepoint = hex(ord(c))[2:] if len(codepoint) % 2 != 0: codepoint = "0"+codepoint raise ValueError(f"Allowed characters are 0x20-0x7E (inclusive), found '\\x{codepoint}'") if group_nbytes <= 0: raise ValueError("Number of bytes in a group must be a positive integer.") if group_nchars <= 0: raise ValueError("Number of chars in a group must be a positive integer.") group_nbits = 8*group_nbytes if group_nbits%group_nchars != 0: raise ValueError(f"Number of bits in a group ({group_nbits}) is not divisible " f"by number of characters in a group ({group_nchars})") if pad_char is not None and len(pad_char) != 1: raise ValueError("If specified, padding character must be a string object of length 1.") self._alphabet = alphabet self._group_nchars = group_nchars self._group_nbytes = group_nbytes self._pad_char = pad_char self._include_padding = include_padding self._require_padding = require_padding self._require_exact_bytes = require_exact_bytes self._lookup_table = None if lookup_table: _lookup_table = {} for i in range(2**group_nbytes): byte_group = i.to_bytes(group_nbytes, byteorder="big") try: _lookup_table[byte_group] = self.encode(byte_group) except binascii.Error: pass self._lookup_table = _lookup_table def encode(self, b: bytes) -> str: # pylint: disable = too-many-locals # extract encoding parameters group_nbytes = self._group_nbytes group_nchars = self._group_nchars pad_char = self._pad_char char_nbits = 8*group_nbytes//group_nchars char_bitmask = 2**char_nbits alphabet = self._alphabet lookup_table = self._lookup_table # compute padded length for bytes l = len(b) padding = 0 zero_bytes = group_nbytes-l%group_nbytes if l%group_nbytes != 0: l += zero_bytes padding = zero_bytes*8//char_nbits if padding > 0 and pad_char is None: raise binascii.Error(f"Padding of length {padding} required, but no padding character.") # main loop: compute chars of encoded string chars: List[str] = [] for b_idx in range(0, l, group_nbytes): # extract the next group of bytes, padding if necessary if b_idx+group_nbytes <= len(b): byte_group = b[b_idx:b_idx+group_nbytes] else: byte_group = b[b_idx:]+b"\x00"*zero_bytes if lookup_table is not None: # get characters from precomputed lookup table if byte_group not in lookup_table: raise binascii.Error(f"Invalid byte group #{b_idx}.") chars.append(lookup_table[byte_group]) else: # transform bytes into integer for easier bitwise manipulation group_int = int.from_bytes(byte_group, byteorder="big") # compute group characters, in reverse order revchars: List[str] = [] for j in range(group_nchars): if j < padding: # if padding > 0, pad_char is not None revchars.append(cast(str, pad_char)) else: alphabet_idx = group_int%char_bitmask if alphabet_idx >= len(alphabet): raise binascii.Error(f"Invalid byte group #{b_idx}.") revchars.append(alphabet[alphabet_idx]) group_int >>= char_nbits # append group characters to string, in correct order chars.append("".join(reversed(revchars))) # create and return encoded string return "".join(chars) def decode(self, s: str) -> bytes: ...
Ancestors
- RawEncoding
- abc.ABC
Methods
def decode(self, s: str) ‑> bytes
-
Expand source code
def decode(self, s: str) -> bytes: ...
def encode(self, b: bytes) ‑> str
-
Expand source code
def encode(self, b: bytes) -> str: # pylint: disable = too-many-locals # extract encoding parameters group_nbytes = self._group_nbytes group_nchars = self._group_nchars pad_char = self._pad_char char_nbits = 8*group_nbytes//group_nchars char_bitmask = 2**char_nbits alphabet = self._alphabet lookup_table = self._lookup_table # compute padded length for bytes l = len(b) padding = 0 zero_bytes = group_nbytes-l%group_nbytes if l%group_nbytes != 0: l += zero_bytes padding = zero_bytes*8//char_nbits if padding > 0 and pad_char is None: raise binascii.Error(f"Padding of length {padding} required, but no padding character.") # main loop: compute chars of encoded string chars: List[str] = [] for b_idx in range(0, l, group_nbytes): # extract the next group of bytes, padding if necessary if b_idx+group_nbytes <= len(b): byte_group = b[b_idx:b_idx+group_nbytes] else: byte_group = b[b_idx:]+b"\x00"*zero_bytes if lookup_table is not None: # get characters from precomputed lookup table if byte_group not in lookup_table: raise binascii.Error(f"Invalid byte group #{b_idx}.") chars.append(lookup_table[byte_group]) else: # transform bytes into integer for easier bitwise manipulation group_int = int.from_bytes(byte_group, byteorder="big") # compute group characters, in reverse order revchars: List[str] = [] for j in range(group_nchars): if j < padding: # if padding > 0, pad_char is not None revchars.append(cast(str, pad_char)) else: alphabet_idx = group_int%char_bitmask if alphabet_idx >= len(alphabet): raise binascii.Error(f"Invalid byte group #{b_idx}.") revchars.append(alphabet[alphabet_idx]) group_int >>= char_nbits # append group characters to string, in correct order chars.append("".join(reversed(revchars))) # create and return encoded string return "".join(chars)
class CustomRawEncoding (raw_encoder: Callable[[bytes], str], raw_decoder: Callable[[str], bytes])
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class CustomRawEncoding(RawEncoding): _raw_encoder: Callable[[bytes], str] _raw_decoder: Callable[[str], bytes] def __init__(self, raw_encoder: Callable[[bytes], str], raw_decoder: Callable[[str], bytes]): self._raw_encoder = raw_encoder # type: ignore self._raw_decoder = raw_decoder # type: ignore def encode(self, b: bytes) -> str: raw_encoder: Callable[[bytes], str] = self.raw_encoder # type: ignore return raw_encoder(b) def decode(self, s: str) -> bytes: raw_decoder: Callable[[str], bytes] = self.raw_decoder # type: ignore return raw_decoder(s)
Ancestors
- RawEncoding
- abc.ABC
Methods
def decode(self, s: str) ‑> bytes
-
Expand source code
def decode(self, s: str) -> bytes: raw_decoder: Callable[[str], bytes] = self.raw_decoder # type: ignore return raw_decoder(s)
def encode(self, b: bytes) ‑> str
-
Expand source code
def encode(self, b: bytes) -> str: raw_encoder: Callable[[bytes], str] = self.raw_encoder # type: ignore return raw_encoder(b)
class Encoding (encoding: str, code: str, status: str, description: str)
-
Dataclass for a multibase encoding.
Example usage:
Encoding.from_json({'encoding': 'base8', 'code': '7', 'status': 'draft', 'description': 'octal'})
```py >>> Encoding.from_json({ ... 'encoding': 'base8', 'code': '7', ... 'status': 'draft', 'description': 'octal'}) Encoding(encoding='base8', code='7', status='draft', description='octal') ``` Direct instantiation should be avoided: it is field-order dependent and might change without warning in the future.
Expand source code
@dataclass(frozen=True) class Encoding: """ Dataclass for a multibase encoding. Example usage: Encoding.from_json({'encoding': 'base8', 'code': '7', 'status': 'draft', 'description': 'octal'}) ```py >>> Encoding.from_json({ ... 'encoding': 'base8', 'code': '7', ... 'status': 'draft', 'description': 'octal'}) Encoding(encoding='base8', code='7', status='draft', description='octal') ``` Direct instantiation should be avoided: it is field-order dependent and might change without warning in the future. """ encoding: str """ Encoding name. Must satisfy the following: ```py re.match(r"^[a-z][a-z0-9_-]+$", name) ``` """ code: str """ Encoding code. Must be a single ASCII character. More specifically, it must be a single unicode codepoint satisfying: ```py ord(code) in range(0x00, 0x80) ``` """ status: str """ Encoding status. Must be 'draft', 'candidate' or 'default'.""" description: str """ Encoding description. """ def __post_init__(self): if not re.match(r"^[a-z][a-z0-9_-]+$", self.name): # ensure len(name) > 1 raise ValueError(f"Invalid multibase encoding name {repr(self.name)}") if self.status not in ("draft", "candidate", "default"): raise ValueError(f"Invalid multibase encoding status {repr(self.status)}.") if len(self.code) != 1: raise ValueError(f"Invalid multibase encoding code {repr(self.code)} (length != 1).") if ord(self.code) not in range(0x00, 0x80): codepoint = hex(ord(self.code))[2:] if len(codepoint) % 2 != 0: codepoint = "0"+codepoint raise ValueError(f"Invalid multibase encoding code '\\x{codepoint}'") @property def name(self) -> str: """ An alias for the `Encoding.encoding` attribute. Using `Encoding.name` over `Encoding.encoding` is preferred, both for uniformity with the [multicodec spec](https://github.com/multiformats/multicodec) and to avoid potential confusion between encoding names (strings) and encoding objects (instances of `Encoding`). """ return self.encoding @property def raw_encoder(self) -> RawEncoder: """ Returns the raw encoder for this encoding: given bytes, it produces the encoded string without the multibase prefix. """ if self.name not in _encoder_table: raise NotImplementedError(f"Encoding using {self.name} is not yet implemented.") return _encoder_table[self.name] @property def raw_decoder(self) -> RawDecoder: """ Returns the raw encoder for this encoding: given a string without the multibase prefix, it produces the decoded data. """ if self.name not in _decoder_table: raise NotImplementedError(f"Decoding using {self.name} is not yet implemented.") return _decoder_table[self.name] def encode(self, data: bytes) -> str: """ Encodes bytes into a multibase string: it first uses `Encoding.raw_encoder`, and then prepends the multibase prefix given by `Encoding.code` and returns the resulting multibase string. Example usage: ```py ``` """ return self.code+self.raw_encoder(data) def decode(self, data: str) -> bytes: """ Decodes a multibase string into bytes: it first checks that the multibase prefix matches the value specified by `Encoding.code`, then uses `Encoding.raw_encoder` on the string without prefix and returns the bytes. Example usage: ```py ``` """ if data[0] != self.code: raise ValueError(f"Expected {repr(self.name)} encoding, " f"found {repr(get(data[0]).name)} encoding instead.") return self.raw_decoder(data[1:]) def to_json(self) -> Mapping[str, str]: """ Returns a JSON dictionary representation of this `Encoding` object, compatible with the one from the multibase.csv table found in the [multibase spec](https://github.com/multiformats/multibase). Example usage: ```py ``` """ code = self.code if code not in range(0x20, 0x7F): code = hex(ord(code)) return { "encoding": self.encoding, "code": code, "status": self.status, "description": self.description } @staticmethod def from_json(multibase_encoding: Mapping[str, Union[str, int]]) -> "Encoding": """ Creates an `Encoding` object from a JSON dictionary representation compatible with the one from the multibase.csv table found in the [multibase spec](https://github.com/multiformats/multibase). Example usage: ```py ``` """ encoding = multibase_encoding["encoding"] code = multibase_encoding["code"] status = multibase_encoding["status"] description = multibase_encoding["description"] if not isinstance(encoding, str): raise TypeError(f"Expected string, found {encoding = }.") if not isinstance(status, str): raise TypeError(f"Expected string, found {status = }.") if not isinstance(description, str): raise TypeError(f"Expected string, found {description = }.") if not isinstance(code, str): raise TypeError(f"Expected string, found {code = }") if code.startswith("0x"): code = chr(int(code, base=16)) return Encoding(encoding, code, status, description)
Class variables
var code : str
-
Encoding code. Must be a single ASCII character.
More specifically, it must be a single unicode codepoint satisfying:
ord(code) in range(0x00, 0x80)
var description : str
-
Encoding description.
var encoding : str
-
Encoding name. Must satisfy the following:
re.match(r"^[a-z][a-z0-9_-]+$", name)
var status : str
-
Encoding status. Must be 'draft', 'candidate' or 'default'.
Static methods
def from_json(multibase_encoding: Mapping[str, Union[str, int]]) ‑> Encoding
-
Creates an
Encoding
object from a JSON dictionary representation compatible with the one from the multibase.csv table found in the multibase spec.Example usage:
Expand source code
@staticmethod def from_json(multibase_encoding: Mapping[str, Union[str, int]]) -> "Encoding": """ Creates an `Encoding` object from a JSON dictionary representation compatible with the one from the multibase.csv table found in the [multibase spec](https://github.com/multiformats/multibase). Example usage: ```py ``` """ encoding = multibase_encoding["encoding"] code = multibase_encoding["code"] status = multibase_encoding["status"] description = multibase_encoding["description"] if not isinstance(encoding, str): raise TypeError(f"Expected string, found {encoding = }.") if not isinstance(status, str): raise TypeError(f"Expected string, found {status = }.") if not isinstance(description, str): raise TypeError(f"Expected string, found {description = }.") if not isinstance(code, str): raise TypeError(f"Expected string, found {code = }") if code.startswith("0x"): code = chr(int(code, base=16)) return Encoding(encoding, code, status, description)
Instance variables
var name : str
-
An alias for the
Encoding.encoding
attribute.Using
Encoding.name
overEncoding.encoding
is preferred, both for uniformity with the multicodec spec and to avoid potential confusion between encoding names (strings) and encoding objects (instances ofEncoding
).Expand source code
@property def name(self) -> str: """ An alias for the `Encoding.encoding` attribute. Using `Encoding.name` over `Encoding.encoding` is preferred, both for uniformity with the [multicodec spec](https://github.com/multiformats/multicodec) and to avoid potential confusion between encoding names (strings) and encoding objects (instances of `Encoding`). """ return self.encoding
var raw_decoder : Callable[[str], bytes]
-
Returns the raw encoder for this encoding: given a string without the multibase prefix, it produces the decoded data.
Expand source code
@property def raw_decoder(self) -> RawDecoder: """ Returns the raw encoder for this encoding: given a string without the multibase prefix, it produces the decoded data. """ if self.name not in _decoder_table: raise NotImplementedError(f"Decoding using {self.name} is not yet implemented.") return _decoder_table[self.name]
var raw_encoder : Callable[[bytes], str]
-
Returns the raw encoder for this encoding: given bytes, it produces the encoded string without the multibase prefix.
Expand source code
@property def raw_encoder(self) -> RawEncoder: """ Returns the raw encoder for this encoding: given bytes, it produces the encoded string without the multibase prefix. """ if self.name not in _encoder_table: raise NotImplementedError(f"Encoding using {self.name} is not yet implemented.") return _encoder_table[self.name]
Methods
def decode(self, data: str) ‑> bytes
-
Decodes a multibase string into bytes: it first checks that the multibase prefix matches the value specified by
Encoding.code
, then usesEncoding.raw_encoder
on the string without prefix and returns the bytes.Example usage:
Expand source code
def decode(self, data: str) -> bytes: """ Decodes a multibase string into bytes: it first checks that the multibase prefix matches the value specified by `Encoding.code`, then uses `Encoding.raw_encoder` on the string without prefix and returns the bytes. Example usage: ```py ``` """ if data[0] != self.code: raise ValueError(f"Expected {repr(self.name)} encoding, " f"found {repr(get(data[0]).name)} encoding instead.") return self.raw_decoder(data[1:])
def encode(self, data: bytes) ‑> str
-
Encodes bytes into a multibase string: it first uses
Encoding.raw_encoder
, and then prepends the multibase prefix given byEncoding.code
and returns the resulting multibase string.Example usage:
Expand source code
def encode(self, data: bytes) -> str: """ Encodes bytes into a multibase string: it first uses `Encoding.raw_encoder`, and then prepends the multibase prefix given by `Encoding.code` and returns the resulting multibase string. Example usage: ```py ``` """ return self.code+self.raw_encoder(data)
def to_json(self) ‑> Mapping[str, str]
-
Returns a JSON dictionary representation of this
Encoding
object, compatible with the one from the multibase.csv table found in the multibase spec.Example usage:
Expand source code
def to_json(self) -> Mapping[str, str]: """ Returns a JSON dictionary representation of this `Encoding` object, compatible with the one from the multibase.csv table found in the [multibase spec](https://github.com/multiformats/multibase). Example usage: ```py ``` """ code = self.code if code not in range(0x20, 0x7F): code = hex(ord(code)) return { "encoding": self.encoding, "code": code, "status": self.status, "description": self.description }
class RawEncoding
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class RawEncoding(ABC): @abstractmethod def encode(self, b: bytes) -> str: ... @abstractmethod def decode(self, s: str) -> bytes: ...
Ancestors
- abc.ABC
Subclasses
Methods
def decode(self, s: str) ‑> bytes
-
Expand source code
@abstractmethod def decode(self, s: str) -> bytes: ...
def encode(self, b: bytes) ‑> str
-
Expand source code
@abstractmethod def encode(self, b: bytes) -> str: ...