Torrentfile API

Hasher Module


module
torrentfile.hasher

Piece/File Hashers for Bittorrent meta file contents.

Classes
  • Hasher Piece hasher for Bittorrent V1 files.
  • HasherV2 Calculate the root hash and piece layers for file contents.
  • HasherHybrid Calculate root and piece hashes for creating hybrid torrent file.
Functions
  • merkle_root(blocks) Calculate the merkle root for a seq of sha256 hash digests.

torrentfile.hasher.merkle_root(blocks)

Calculate the merkle root for a seq of sha256 hash digests.

Source code in torrentfile\hasher.py
def merkle_root(blocks):
    """Calculate the merkle root for a seq of sha256 hash digests."""
    while len(blocks) > 1:
        blocks = [sha256(x + y).digest() for x, y in zip(*[iter(blocks)] * 2)]
    return blocks[0]

torrentfile.hasher.Hasher (_CbMixin)

Piece hasher for Bittorrent V1 files.

Takes a sorted list of all file paths, calculates sha1 hash for fixed size pieces of file data from each file seemlessly until the last piece which may be smaller than others.

Parameters:

Name Type Description Default
paths `list`

List of files.

required
piece_length `int`

Size of chuncks to split the data into.

required
Source code in torrentfile\hasher.py
class Hasher(_CbMixin):
    """Piece hasher for Bittorrent V1 files.

    Takes a sorted list of all file paths, calculates sha1 hash
    for fixed size pieces of file data from each file
    seemlessly until the last piece which may be smaller than others.

    Parameters
    ----------
    paths : `list`
        List of files.
    piece_length : `int`
        Size of chuncks to split the data into.
    """

    def __init__(self, paths, piece_length):
        """Generate hashes of piece length data from filelist contents."""
        self.piece_length = piece_length
        self.paths = paths
        self.total = sum([os.path.getsize(i) for i in self.paths])
        self.index = 0
        self.current = open(self.paths[0], "rb")
        logger.debug(
            "Hashing v1 torrent file. Size: %s Piece Length: %s",
            humanize_bytes(self.total),
            humanize_bytes(self.piece_length),
        )

    def __iter__(self):
        """Iterate through feed pieces.

        Returns
        -------
        self : `iterator`
            Iterator for leaves/hash pieces.
        """
        return self

    def _handle_partial(self, arr):
        """Define the handling partial pieces that span 2 or more files.

        Parameters
        ----------
        arr : `bytearray`
            Incomplete piece containing partial data
        partial : `int`
            Size of incomplete piece_length

        Returns
        -------
        digest : `bytes`
            SHA1 digest of the complete piece.
        """
        while len(arr) < self.piece_length and self.next_file():
            target = self.piece_length - len(arr)
            temp = bytearray(target)
            size = self.current.readinto(temp)
            arr.extend(temp[:size])
            if size == target:
                break
        return sha1(arr).digest()  # nosec

    def next_file(self):
        """Seemlessly transition to next file in file list."""
        self.index += 1
        if self.index < len(self.paths):
            self.current.close()
            self.current = open(self.paths[self.index], "rb")
            return True
        return False

    def __next__(self):
        """Generate piece-length pieces of data from input file list."""
        while True:
            piece = bytearray(self.piece_length)
            size = self.current.readinto(piece)
            if size == 0:
                if not self.next_file():
                    raise StopIteration
            elif size < self.piece_length:
                return self._handle_partial(piece[:size])
            else:
                return sha1(piece).digest()  # nosec

__init__(self, paths, piece_length) special

Generate hashes of piece length data from filelist contents.

Source code in torrentfile\hasher.py
def __init__(self, paths, piece_length):
    """Generate hashes of piece length data from filelist contents."""
    self.piece_length = piece_length
    self.paths = paths
    self.total = sum([os.path.getsize(i) for i in self.paths])
    self.index = 0
    self.current = open(self.paths[0], "rb")
    logger.debug(
        "Hashing v1 torrent file. Size: %s Piece Length: %s",
        humanize_bytes(self.total),
        humanize_bytes(self.piece_length),
    )

__iter__(self) special

Iterate through feed pieces.

Returns:

Type Description
`iterator`

Iterator for leaves/hash pieces.

Source code in torrentfile\hasher.py
def __iter__(self):
    """Iterate through feed pieces.

    Returns
    -------
    self : `iterator`
        Iterator for leaves/hash pieces.
    """
    return self

__next__(self) special

Generate piece-length pieces of data from input file list.

Source code in torrentfile\hasher.py
def __next__(self):
    """Generate piece-length pieces of data from input file list."""
    while True:
        piece = bytearray(self.piece_length)
        size = self.current.readinto(piece)
        if size == 0:
            if not self.next_file():
                raise StopIteration
        elif size < self.piece_length:
            return self._handle_partial(piece[:size])
        else:
            return sha1(piece).digest()  # nosec

next_file(self)

Seemlessly transition to next file in file list.

Source code in torrentfile\hasher.py
def next_file(self):
    """Seemlessly transition to next file in file list."""
    self.index += 1
    if self.index < len(self.paths):
        self.current.close()
        self.current = open(self.paths[self.index], "rb")
        return True
    return False

torrentfile.hasher.HasherV2 (_CbMixin)

Calculate the root hash and piece layers for file contents.

Iterates over 16KiB blocks of data from given file, hashes the data, then creates a hash tree from the individual block hashes until size of hashed data equals the piece-length. Then continues the hash tree until root hash is calculated.

Parameters:

Name Type Description Default
path `str`

Path to file.

required
piece_length `int`

Size of layer hashes pieces.

required
Source code in torrentfile\hasher.py
class HasherV2(_CbMixin):
    """Calculate the root hash and piece layers for file contents.

    Iterates over 16KiB blocks of data from given file, hashes the data,
    then creates a hash tree from the individual block hashes until size of
    hashed data equals the piece-length.  Then continues the hash tree until
    root hash is calculated.

    Parameters
    ----------
    path : `str`
        Path to file.
    piece_length : `int`
        Size of layer hashes pieces.
    """

    def __init__(self, path, piece_length):
        """Calculate and store hash information for specific file."""
        self.path = path
        self.root = None
        self.piece_layer = None
        self.layer_hashes = []
        self.piece_length = piece_length
        self.num_blocks = piece_length // BLOCK_SIZE
        logger.debug(
            "Hashing partial v2 torrent file. Piece Length: %s Path: %s",
            humanize_bytes(self.piece_length),
            str(self.path),
        )

        with open(self.path, "rb") as fd:
            self.process_file(fd)

    def process_file(self, fd):
        """Calculate hashes over 16KiB chuncks of file content.

        Parameters
        ----------
        fd : `str`
            Opened file in read mode.
        """
        while True:
            total = 0
            blocks = []
            leaf = bytearray(BLOCK_SIZE)
            # generate leaves of merkle tree

            for _ in range(self.num_blocks):
                size = fd.readinto(leaf)
                total += size
                if not size:
                    break
                blocks.append(sha256(leaf[:size]).digest())

            # blocks is empty mean eof
            if not blocks:
                break
            if len(blocks) != self.num_blocks:
                # when size of file doesn't fill the last block
                # when the file contains multiple pieces
                remaining = self.num_blocks - len(blocks)
                if not self.layer_hashes:
                    # when the there is only one block for file
                    power2 = next_power_2(len(blocks))
                    remaining = power2 - len(blocks)

                # pad the the rest with zeroes to fill remaining space.
                padding = [bytes(32) for _ in range(remaining)]
                blocks.extend(padding)
            # calculate the root hash for the merkle tree up to piece-length

            layer_hash = merkle_root(blocks)
            if self._cb:
                self._cb(layer_hash)
            self.layer_hashes.append(layer_hash)
        self._calculate_root()

    def _calculate_root(self):
        """Calculate root hash for the target file."""
        self.piece_layer = b"".join(self.layer_hashes)
        hashes = len(self.layer_hashes)
        if hashes > 1:
            pow2 = next_power_2(hashes)
            remainder = pow2 - hashes
            pad_piece = [bytes(HASH_SIZE) for _ in range(self.num_blocks)]
            for _ in range(remainder):
                self.layer_hashes.append(merkle_root(pad_piece))
        self.root = merkle_root(self.layer_hashes)

__init__(self, path, piece_length) special

Calculate and store hash information for specific file.

Source code in torrentfile\hasher.py
def __init__(self, path, piece_length):
    """Calculate and store hash information for specific file."""
    self.path = path
    self.root = None
    self.piece_layer = None
    self.layer_hashes = []
    self.piece_length = piece_length
    self.num_blocks = piece_length // BLOCK_SIZE
    logger.debug(
        "Hashing partial v2 torrent file. Piece Length: %s Path: %s",
        humanize_bytes(self.piece_length),
        str(self.path),
    )

    with open(self.path, "rb") as fd:
        self.process_file(fd)

process_file(self, fd)

Calculate hashes over 16KiB chuncks of file content.

Parameters:

Name Type Description Default
fd `str`

Opened file in read mode.

required
Source code in torrentfile\hasher.py
def process_file(self, fd):
    """Calculate hashes over 16KiB chuncks of file content.

    Parameters
    ----------
    fd : `str`
        Opened file in read mode.
    """
    while True:
        total = 0
        blocks = []
        leaf = bytearray(BLOCK_SIZE)
        # generate leaves of merkle tree

        for _ in range(self.num_blocks):
            size = fd.readinto(leaf)
            total += size
            if not size:
                break
            blocks.append(sha256(leaf[:size]).digest())

        # blocks is empty mean eof
        if not blocks:
            break
        if len(blocks) != self.num_blocks:
            # when size of file doesn't fill the last block
            # when the file contains multiple pieces
            remaining = self.num_blocks - len(blocks)
            if not self.layer_hashes:
                # when the there is only one block for file
                power2 = next_power_2(len(blocks))
                remaining = power2 - len(blocks)

            # pad the the rest with zeroes to fill remaining space.
            padding = [bytes(32) for _ in range(remaining)]
            blocks.extend(padding)
        # calculate the root hash for the merkle tree up to piece-length

        layer_hash = merkle_root(blocks)
        if self._cb:
            self._cb(layer_hash)
        self.layer_hashes.append(layer_hash)
    self._calculate_root()

torrentfile.hasher.HasherHybrid (_CbMixin)

Calculate root and piece hashes for creating hybrid torrent file.

Create merkle tree layers from sha256 hashed 16KiB blocks of contents. With a branching factor of 2, merge layer hashes until blocks equal piece_length bytes for the piece layer, and then the root hash.

Parameters:

Name Type Description Default
path `str`

path to target file.

required
piece_length `int`

piece length for data chunks.

required
Source code in torrentfile\hasher.py
class HasherHybrid(_CbMixin):
    """Calculate root and piece hashes for creating hybrid torrent file.

    Create merkle tree layers from sha256 hashed 16KiB blocks of contents.
    With a branching factor of 2, merge layer hashes until blocks equal
    piece_length bytes for the piece layer, and then the root hash.

    Parameters
    ----------
    path : `str`
        path to target file.
    piece_length : `int`
        piece length for data chunks.
    """

    def __init__(self, path, piece_length):
        """Construct Hasher class instances for each file in torrent."""
        self.path = path
        self.piece_length = piece_length
        self.pieces = []
        self.layer_hashes = []
        self.piece_layer = None
        self.root = None
        self.padding_piece = None
        self.padding_file = None
        self.amount = piece_length // BLOCK_SIZE
        logger.debug(
            "Hashing partial Hybrid torrent file. Piece Length: %s Path: %s",
            humanize_bytes(self.piece_length),
            str(self.path),
        )
        with open(path, "rb") as data:
            self.process_file(data)

    def _pad_remaining(self, block_count):
        """Generate Hash sized, 0 filled bytes for padding.

        Parameters
        ----------
        block_count : `int`
            current total number of blocks collected.

        Returns
        -------
        padding : `bytes`
            Padding to fill remaining portion of tree.
        """
        # when the there is only one block for file
        remaining = self.amount - block_count
        if not self.layer_hashes:
            power2 = next_power_2(block_count)
            remaining = power2 - block_count
        return [bytes(HASH_SIZE) for _ in range(remaining)]

    def process_file(self, data):
        """Calculate layer hashes for contents of file.

        Parameters
        ----------
        data : `BytesIO`
            File opened in read mode.
        """
        while True:
            plength = self.piece_length
            blocks = []
            piece = sha1()  # nosec
            total = 0
            block = bytearray(BLOCK_SIZE)
            for _ in range(self.amount):
                size = data.readinto(block)
                if not size:
                    break
                total += size
                plength -= size
                blocks.append(sha256(block[:size]).digest())
                piece.update(block[:size])
            if not blocks:
                break
            if len(blocks) != self.amount:
                padding = self._pad_remaining(len(blocks))
                blocks.extend(padding)
            layer_hash = merkle_root(blocks)
            if self._cb:
                self._cb(layer_hash)
            self.layer_hashes.append(layer_hash)
            if plength > 0:
                self.padding_file = {
                    "attr": "p",
                    "length": size,
                    "path": [".pad", str(plength)],
                }
                piece.update(bytes(plength))
            self.pieces.append(piece.digest())  # nosec
        self._calculate_root()

    def _calculate_root(self):
        """Calculate the root hash for opened file."""
        self.piece_layer = b"".join(self.layer_hashes)

        if len(self.layer_hashes) > 1:
            pad_piece = merkle_root([bytes(32) for _ in range(self.amount)])

            pow2 = next_power_2(len(self.layer_hashes))
            remainder = pow2 - len(self.layer_hashes)

            self.layer_hashes += [pad_piece for _ in range(remainder)]
        self.root = merkle_root(self.layer_hashes)

__init__(self, path, piece_length) special

Construct Hasher class instances for each file in torrent.

Source code in torrentfile\hasher.py
def __init__(self, path, piece_length):
    """Construct Hasher class instances for each file in torrent."""
    self.path = path
    self.piece_length = piece_length
    self.pieces = []
    self.layer_hashes = []
    self.piece_layer = None
    self.root = None
    self.padding_piece = None
    self.padding_file = None
    self.amount = piece_length // BLOCK_SIZE
    logger.debug(
        "Hashing partial Hybrid torrent file. Piece Length: %s Path: %s",
        humanize_bytes(self.piece_length),
        str(self.path),
    )
    with open(path, "rb") as data:
        self.process_file(data)

process_file(self, data)

Calculate layer hashes for contents of file.

Parameters:

Name Type Description Default
data `BytesIO`

File opened in read mode.

required
Source code in torrentfile\hasher.py
def process_file(self, data):
    """Calculate layer hashes for contents of file.

    Parameters
    ----------
    data : `BytesIO`
        File opened in read mode.
    """
    while True:
        plength = self.piece_length
        blocks = []
        piece = sha1()  # nosec
        total = 0
        block = bytearray(BLOCK_SIZE)
        for _ in range(self.amount):
            size = data.readinto(block)
            if not size:
                break
            total += size
            plength -= size
            blocks.append(sha256(block[:size]).digest())
            piece.update(block[:size])
        if not blocks:
            break
        if len(blocks) != self.amount:
            padding = self._pad_remaining(len(blocks))
            blocks.extend(padding)
        layer_hash = merkle_root(blocks)
        if self._cb:
            self._cb(layer_hash)
        self.layer_hashes.append(layer_hash)
        if plength > 0:
            self.padding_file = {
                "attr": "p",
                "length": size,
                "path": [".pad", str(plength)],
            }
            piece.update(bytes(plength))
        self.pieces.append(piece.digest())  # nosec
    self._calculate_root()