Module multiformats.varint
Implementation of the unsigned-varint spec.
Functionality is provided by the encode()
and decode()
functions, converting between non-negative
int
values and the corresponding varint bytes
:
>>> from multiformats import varint
>>> varint.encode(128)
b'\x80\x01'
>>> varint.decode(b'\x80\x01')
128
Expand source code
"""
Implementation of the [unsigned-varint spec](https://github.com/multiformats/unsigned-varint).
Functionality is provided by the `encode` and `decode` functions, converting between non-negative
`int` values and the corresponding varint `bytes`:
```py
>>> from multiformats import varint
>>> varint.encode(128)
b'\\x80\\x01'
>>> varint.decode(b'\\x80\\x01')
128
```
"""
from io import BufferedIOBase
from typing import cast, List, overload, Tuple, Union, TypeVar
from typing_extensions import Final
from typing_validation import validate
_max_num_bytes: int = 9
BytesLike = Union[bytes, bytearray, memoryview]
byteslike: Final = (bytes, bytearray, memoryview)
def encode(x: int) -> bytes:
"""
Encodes a non-negative integer as an unsigned varint, returning the encoded bytes.
Raises `ValueError` if:
- `x < 0` (varints encode unsigned integers)
- `x >= 2**63` (from specs, varints are limited to 9 bytes)
Example usage:
```py
>>> from multiformats import varint
>>> varint.encode(128)
b'\\x80\\x01'
```
"""
validate(x, int)
if x < 0:
raise ValueError("Integer is negative.")
varint_bytelist: List[int] = []
while True:
next_byte = x & 0b0111_1111
x >>= 7
if x > 0:
varint_bytelist.append(next_byte | 0b1000_0000)
else:
varint_bytelist.append(next_byte)
break
if len(varint_bytelist) >= _max_num_bytes:
raise ValueError(f"Varints must be at most {_max_num_bytes} bytes long.")
return bytes(varint_bytelist)
def decode(varint: Union[BytesLike, BufferedIOBase]) -> int:
"""
Decodes an unsigned varint from a `bytes` object or a buffered binary stream.
If a stream is passed, only the bytes encoding the varint are read from it.
If a `bytes`-like object is passed, the varint encoding must use all bytes.
Raises `ValueError` if:
- `varint` contains no bytes (from specs, the number 0 is encoded as 0b0000_0000)
- the 9th byte of `varint` is a continuation byte (from specs, no number >= 2**63 is allowed)
- the last byte of `varint` is a continuation byte (invalid format)
- the decoded integer could be encoded in fewer bytes than were read (from specs, encoding must be minimal)
- `varint` is a `bytes`-like object and the number of bytes used by the encoding is fewer than its length
The last point is a designed choice aimed to reduce errors when decoding fixed-length bytestrings (rather than streams).
If this behaviour is undesirable, consider using `decode_head` instead.
Example usage with bytes:
```py
>>> from multiformats import varint
>>> varint.decode(b'\\x80\\x01')
128
```
Example usage with streams, for the (typical) situation where the varint is only part of the data:
```py
>>> from io import BytesIO
>>> stream = BytesIO(b"\\x80\\x01\\x12\\xff\\x01")
>>> varint.decode(stream)
128
>>> stream.read() # what's left in the stream
b'\\x12\\xff\\x01'
```
"""
x, num_bytes_read, _ = decode_raw(varint)
if isinstance(varint, byteslike) and len(varint) > num_bytes_read:
raise ValueError("A bytes-like object was passed, but not all bytes were used by the encoding.")
return x
def _no_next_byte_error(num_bytes_read: int) -> ValueError:
if num_bytes_read == 0:
return ValueError("Varints must be at least 1 byte long.")
return ValueError(f"Byte #{num_bytes_read-1} was a continuation byte, but byte #{num_bytes_read} not available.")
_BufferedIOT = TypeVar("_BufferedIOT", bound=BufferedIOBase)
@overload
def decode_raw(varint: BytesLike) -> Tuple[int, int, memoryview]:
...
@overload
def decode_raw(varint: _BufferedIOT) -> Tuple[int, int, _BufferedIOT]:
...
def decode_raw(varint: Union[BytesLike, BufferedIOBase]) -> Tuple[int, int, Union[memoryview, BufferedIOBase]]:
"""
Specialised version of `decode` for partial decoding, returning a pair `(x, n)` of
the decoded varint `x` and the number `n` of bytes read from the start and/or consumed from the stream.
Unlike `decode`, doesn't raise `ValueError` if not all bytes were read in the process.
Example usage with bytes:
```py
>>> bs = b"\\x80\\x01\\x12\\xff\\x01"
>>> x, n, m = varint.decode_raw(bs)
>>> x
128
>>> n
2
# read first 2 bytes: b"\\x80\\x01"
>>> m
<memory at 0x000001A6E55DDA00>
>>> bytes(m)
b'\\x12\\xff\\x01'
# memoryview on remaining bytes
# note: bytes(m) did not consume the bytes
```
Example usage with streams, for the (typical) situation where the varint is only part of the data:
```py
>>> from io import BytesIO
>>> stream = BytesIO(b"\\x80\\x01\\x12\\xff\\x01")
>>> x, n = varint.decode_head(stream) # same as decode, but additionally returns number of bytes read
>>> x
128
>>> n
2
# read first 2 bytes: b"\\x80\\x01"
>>> m
<_io.BytesIO object at 0x000001A6E554BBD0>
>>> m == stream
True
# original stream returned, containing remaining bytes
>>> stream.read()
b'\\x12\\xff\\x01'
# 2 bytes were consumed decoding the varint, so 3 bytes were left in the stream
# note: stream.read() consumed the bytes
```
"""
if isinstance(varint, BufferedIOBase):
stream_mode = True
validate(varint, BufferedIOBase)
else:
stream_mode = False
validate(varint, BytesLike)
expect_next = True
num_bytes_read = 0
x = 0
while expect_next:
if stream_mode:
_next_byte: bytes = cast(BufferedIOBase, varint).read(1)
if len(_next_byte) == 0:
raise _no_next_byte_error(num_bytes_read)
next_byte: int = _next_byte[0]
else:
if num_bytes_read >= len(cast(BytesLike, varint)):
raise _no_next_byte_error(num_bytes_read)
next_byte = cast(BytesLike, varint)[num_bytes_read]
x += (next_byte & 0b0111_1111) << (7 * num_bytes_read)
expect_next = (next_byte >> 7) == 0b1
num_bytes_read += 1
if expect_next and num_bytes_read >= _max_num_bytes:
raise ValueError(f"Varints must be at most {_max_num_bytes} bytes long.")
if num_bytes_read > 1 and x < 2**(7*(num_bytes_read-1)):
raise ValueError(f"Number {x} was not minimally encoded (as a {num_bytes_read} bytes varint).")
if stream_mode:
return x, num_bytes_read, cast(BufferedIOBase, varint)
return x, num_bytes_read, memoryview(cast(BytesLike, varint))[num_bytes_read:]
Functions
def decode(varint: Union[bytes, bytearray, memoryview, io.BufferedIOBase]) ‑> int
-
Decodes an unsigned varint from a
bytes
object or a buffered binary stream. If a stream is passed, only the bytes encoding the varint are read from it. If abytes
-like object is passed, the varint encoding must use all bytes.Raises
ValueError
if:varint
contains no bytes (from specs, the number 0 is encoded as 0b0000_0000)- the 9th byte of
varint
is a continuation byte (from specs, no number >= 2**63 is allowed) - the last byte of
varint
is a continuation byte (invalid format) - the decoded integer could be encoded in fewer bytes than were read (from specs, encoding must be minimal)
varint
is abytes
-like object and the number of bytes used by the encoding is fewer than its length
The last point is a designed choice aimed to reduce errors when decoding fixed-length bytestrings (rather than streams). If this behaviour is undesirable, consider using
decode_head
instead.Example usage with bytes:
>>> from multiformats import varint >>> varint.decode(b'\x80\x01') 128
Example usage with streams, for the (typical) situation where the varint is only part of the data:
>>> from io import BytesIO >>> stream = BytesIO(b"\x80\x01\x12\xff\x01") >>> varint.decode(stream) 128 >>> stream.read() # what's left in the stream b'\x12\xff\x01'
Expand source code
def decode(varint: Union[BytesLike, BufferedIOBase]) -> int: """ Decodes an unsigned varint from a `bytes` object or a buffered binary stream. If a stream is passed, only the bytes encoding the varint are read from it. If a `bytes`-like object is passed, the varint encoding must use all bytes. Raises `ValueError` if: - `varint` contains no bytes (from specs, the number 0 is encoded as 0b0000_0000) - the 9th byte of `varint` is a continuation byte (from specs, no number >= 2**63 is allowed) - the last byte of `varint` is a continuation byte (invalid format) - the decoded integer could be encoded in fewer bytes than were read (from specs, encoding must be minimal) - `varint` is a `bytes`-like object and the number of bytes used by the encoding is fewer than its length The last point is a designed choice aimed to reduce errors when decoding fixed-length bytestrings (rather than streams). If this behaviour is undesirable, consider using `decode_head` instead. Example usage with bytes: ```py >>> from multiformats import varint >>> varint.decode(b'\\x80\\x01') 128 ``` Example usage with streams, for the (typical) situation where the varint is only part of the data: ```py >>> from io import BytesIO >>> stream = BytesIO(b"\\x80\\x01\\x12\\xff\\x01") >>> varint.decode(stream) 128 >>> stream.read() # what's left in the stream b'\\x12\\xff\\x01' ``` """ x, num_bytes_read, _ = decode_raw(varint) if isinstance(varint, byteslike) and len(varint) > num_bytes_read: raise ValueError("A bytes-like object was passed, but not all bytes were used by the encoding.") return x
def decode_raw(varint: Union[bytes, bytearray, memoryview, io.BufferedIOBase]) ‑> Tuple[int, int, Union[memoryview, io.BufferedIOBase]]
-
Specialised version of
decode()
for partial decoding, returning a pair(x, n)
of the decoded varintx
and the numbern
of bytes read from the start and/or consumed from the stream. Unlikedecode()
, doesn't raiseValueError
if not all bytes were read in the process.Example usage with bytes:
>>> bs = b"\x80\x01\x12\xff\x01" >>> x, n, m = varint.decode_raw(bs) >>> x 128 >>> n 2 # read first 2 bytes: b"\x80\x01" >>> m <memory at 0x000001A6E55DDA00> >>> bytes(m) b'\x12\xff\x01' # memoryview on remaining bytes # note: bytes(m) did not consume the bytes
Example usage with streams, for the (typical) situation where the varint is only part of the data:
>>> from io import BytesIO >>> stream = BytesIO(b"\x80\x01\x12\xff\x01") >>> x, n = varint.decode_head(stream) # same as decode, but additionally returns number of bytes read >>> x 128 >>> n 2 # read first 2 bytes: b"\x80\x01" >>> m <_io.BytesIO object at 0x000001A6E554BBD0> >>> m == stream True # original stream returned, containing remaining bytes >>> stream.read() b'\x12\xff\x01' # 2 bytes were consumed decoding the varint, so 3 bytes were left in the stream # note: stream.read() consumed the bytes
Expand source code
def decode_raw(varint: Union[BytesLike, BufferedIOBase]) -> Tuple[int, int, Union[memoryview, BufferedIOBase]]: """ Specialised version of `decode` for partial decoding, returning a pair `(x, n)` of the decoded varint `x` and the number `n` of bytes read from the start and/or consumed from the stream. Unlike `decode`, doesn't raise `ValueError` if not all bytes were read in the process. Example usage with bytes: ```py >>> bs = b"\\x80\\x01\\x12\\xff\\x01" >>> x, n, m = varint.decode_raw(bs) >>> x 128 >>> n 2 # read first 2 bytes: b"\\x80\\x01" >>> m <memory at 0x000001A6E55DDA00> >>> bytes(m) b'\\x12\\xff\\x01' # memoryview on remaining bytes # note: bytes(m) did not consume the bytes ``` Example usage with streams, for the (typical) situation where the varint is only part of the data: ```py >>> from io import BytesIO >>> stream = BytesIO(b"\\x80\\x01\\x12\\xff\\x01") >>> x, n = varint.decode_head(stream) # same as decode, but additionally returns number of bytes read >>> x 128 >>> n 2 # read first 2 bytes: b"\\x80\\x01" >>> m <_io.BytesIO object at 0x000001A6E554BBD0> >>> m == stream True # original stream returned, containing remaining bytes >>> stream.read() b'\\x12\\xff\\x01' # 2 bytes were consumed decoding the varint, so 3 bytes were left in the stream # note: stream.read() consumed the bytes ``` """ if isinstance(varint, BufferedIOBase): stream_mode = True validate(varint, BufferedIOBase) else: stream_mode = False validate(varint, BytesLike) expect_next = True num_bytes_read = 0 x = 0 while expect_next: if stream_mode: _next_byte: bytes = cast(BufferedIOBase, varint).read(1) if len(_next_byte) == 0: raise _no_next_byte_error(num_bytes_read) next_byte: int = _next_byte[0] else: if num_bytes_read >= len(cast(BytesLike, varint)): raise _no_next_byte_error(num_bytes_read) next_byte = cast(BytesLike, varint)[num_bytes_read] x += (next_byte & 0b0111_1111) << (7 * num_bytes_read) expect_next = (next_byte >> 7) == 0b1 num_bytes_read += 1 if expect_next and num_bytes_read >= _max_num_bytes: raise ValueError(f"Varints must be at most {_max_num_bytes} bytes long.") if num_bytes_read > 1 and x < 2**(7*(num_bytes_read-1)): raise ValueError(f"Number {x} was not minimally encoded (as a {num_bytes_read} bytes varint).") if stream_mode: return x, num_bytes_read, cast(BufferedIOBase, varint) return x, num_bytes_read, memoryview(cast(BytesLike, varint))[num_bytes_read:]
def encode(x: int) ‑> bytes
-
Encodes a non-negative integer as an unsigned varint, returning the encoded bytes.
Raises
ValueError
if: -x < 0
(varints encode unsigned integers) -x >= 2**63
(from specs, varints are limited to 9 bytes)Example usage:
>>> from multiformats import varint >>> varint.encode(128) b'\x80\x01'
Expand source code
def encode(x: int) -> bytes: """ Encodes a non-negative integer as an unsigned varint, returning the encoded bytes. Raises `ValueError` if: - `x < 0` (varints encode unsigned integers) - `x >= 2**63` (from specs, varints are limited to 9 bytes) Example usage: ```py >>> from multiformats import varint >>> varint.encode(128) b'\\x80\\x01' ``` """ validate(x, int) if x < 0: raise ValueError("Integer is negative.") varint_bytelist: List[int] = [] while True: next_byte = x & 0b0111_1111 x >>= 7 if x > 0: varint_bytelist.append(next_byte | 0b1000_0000) else: varint_bytelist.append(next_byte) break if len(varint_bytelist) >= _max_num_bytes: raise ValueError(f"Varints must be at most {_max_num_bytes} bytes long.") return bytes(varint_bytelist)