Module dsa.huffman
Module to access functions for Huffman Compression.
Expand source code
""" Module to access functions for Huffman Compression. """
from dsa.tree import Tree, TreeNode
from dsa.heap import PriorityQueue
def character_frequency(s: str) -> dict:
"""
Takes a string a returns a dictionary of character frequencies.
Args:
s (str): The string to analyze.
Returns:
Dictionary containing character frequency.
"""
d = {}
for c in s:
if c in d:
d[c] += 1
else:
d[c] = 1
return d
def build_frequency_table(s: str) -> PriorityQueue:
"""
Accepts a string to encode and returns a heap of the characters.
Args:
s (str): The string to encode.
Returns:
A priority queue of the characters based on frequencies.
"""
frequency_dictionary = character_frequency(s)
pq = PriorityQueue()
for char, count in frequency_dictionary.items():
pq.push(count, TreeNode(char))#, None, None))
return pq
def build_huffman_tree(pq: PriorityQueue) -> Tree:
"""
Accepts a priority queue and returns a Huffman Tree.
Args:
pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies.
Returns:
A Huffman Tree.
"""
while len(pq) > 1:
priority1, node1 = pq.pop_pair()
priority2, node2 = pq.pop_pair()
node = TreeNode(node1.value + node2.value, node1, node2)
pq.push(priority1 + priority2, node)
return Tree(pq.pop())
def build_huffman_dictionary(node: TreeNode, bit_string: str="") -> dict:
"""
Given a TreeNode, build a Huffman Dictionary.
Args:
node (TreeNode): The Huffman Node.
bit_string (str): The bit string.
Returns:
A Huffman Dictionary.
"""
d = {}
if node.left is None and node.right is None:
return {node.value: bit_string}
d.update(build_huffman_dictionary(node.left, bit_string + '0'))
d.update(build_huffman_dictionary(node.right, bit_string + '1'))
return d
def huffman_encode(st: str, hd: dict) -> str:
"""
Encode the string using the Huffman Dictionary.
Args:
st (str): The string to encode.
hd (dict): The Huffman Dictionary.
Returns:
The encoded string.
"""
s = ""
for c in st:
s += hd[c]
return s
def huffman_decode(encoded_data: str, tree: Tree) -> str:
"""
Decode the encoded data using the Huffman Tree.
Args:
encoded_data (str): The encoded data.
tree (Tree): The Huffman Tree.
Returns:
The decoded data.
"""
node = tree.root
s = ""
for bit in encoded_data:
if int(bit) == 0:
node = node.left
else:
node = node.right
if node.left is None and node.right is None:
s += node.value
node = tree.root
return s
def bitstring_to_bytes(s: str) -> bytes:
"""
Convert a bitstring to bytes.
Args:
s (str): The bitstring.
Returns:
Bitstring converted to bytes.
"""
return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
def bytes_to_bitstring(ba: bytes, bitlength: int=8) -> str:
"""
Convert bytes to bitstring.
Args:
ba (bytes): The bytes to convert.
bitlength (int): The bit length.
Returns:
The bytes converted to bitstring.
"""
if not ba:
return ""
s = ""
for b in ba[:-1]:
byte = f"{b:08b}"
s += byte
byte = f"{ba[-1]:b}".zfill(bitlength)
s += byte
return s
Functions
def bitstring_to_bytes(s: str) ‑> bytes
-
Convert a bitstring to bytes.
Args
s
:str
- The bitstring.
Returns
Bitstring converted to bytes.
Expand source code
def bitstring_to_bytes(s: str) -> bytes: """ Convert a bitstring to bytes. Args: s (str): The bitstring. Returns: Bitstring converted to bytes. """ return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
def build_frequency_table(s: str) ‑> PriorityQueue
-
Accepts a string to encode and returns a heap of the characters.
Args
s
:str
- The string to encode.
Returns
A priority queue of the characters based on frequencies.
Expand source code
def build_frequency_table(s: str) -> PriorityQueue: """ Accepts a string to encode and returns a heap of the characters. Args: s (str): The string to encode. Returns: A priority queue of the characters based on frequencies. """ frequency_dictionary = character_frequency(s) pq = PriorityQueue() for char, count in frequency_dictionary.items(): pq.push(count, TreeNode(char))#, None, None)) return pq
def build_huffman_dictionary(node: TreeNode, bit_string: str = '') ‑> dict
-
Given a TreeNode, build a Huffman Dictionary.
Args
node
:TreeNode
- The Huffman Node.
bit_string
:str
- The bit string.
Returns
A Huffman Dictionary.
Expand source code
def build_huffman_dictionary(node: TreeNode, bit_string: str="") -> dict: """ Given a TreeNode, build a Huffman Dictionary. Args: node (TreeNode): The Huffman Node. bit_string (str): The bit string. Returns: A Huffman Dictionary. """ d = {} if node.left is None and node.right is None: return {node.value: bit_string} d.update(build_huffman_dictionary(node.left, bit_string + '0')) d.update(build_huffman_dictionary(node.right, bit_string + '1')) return d
def build_huffman_tree(pq: PriorityQueue) ‑> Tree
-
Accepts a priority queue and returns a Huffman Tree.
Args
pq
:PriorityQueue
- A PriorityQueue containing TreeNodes of characters based on frequencies.
Returns
A Huffman Tree.
Expand source code
def build_huffman_tree(pq: PriorityQueue) -> Tree: """ Accepts a priority queue and returns a Huffman Tree. Args: pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies. Returns: A Huffman Tree. """ while len(pq) > 1: priority1, node1 = pq.pop_pair() priority2, node2 = pq.pop_pair() node = TreeNode(node1.value + node2.value, node1, node2) pq.push(priority1 + priority2, node) return Tree(pq.pop())
def bytes_to_bitstring(ba: bytes, bitlength: int = 8) ‑> str
-
Convert bytes to bitstring.
Args
ba
:bytes
- The bytes to convert.
bitlength
:int
- The bit length.
Returns
The bytes converted to bitstring.
Expand source code
def bytes_to_bitstring(ba: bytes, bitlength: int=8) -> str: """ Convert bytes to bitstring. Args: ba (bytes): The bytes to convert. bitlength (int): The bit length. Returns: The bytes converted to bitstring. """ if not ba: return "" s = "" for b in ba[:-1]: byte = f"{b:08b}" s += byte byte = f"{ba[-1]:b}".zfill(bitlength) s += byte return s
def character_frequency(s: str) ‑> dict
-
Takes a string a returns a dictionary of character frequencies.
Args
s
:str
- The string to analyze.
Returns
Dictionary containing character frequency.
Expand source code
def character_frequency(s: str) -> dict: """ Takes a string a returns a dictionary of character frequencies. Args: s (str): The string to analyze. Returns: Dictionary containing character frequency. """ d = {} for c in s: if c in d: d[c] += 1 else: d[c] = 1 return d
def huffman_decode(encoded_data: str, tree: Tree) ‑> str
-
Decode the encoded data using the Huffman Tree.
Args
encoded_data
:str
- The encoded data.
tree
:Tree
- The Huffman Tree.
Returns
The decoded data.
Expand source code
def huffman_decode(encoded_data: str, tree: Tree) -> str: """ Decode the encoded data using the Huffman Tree. Args: encoded_data (str): The encoded data. tree (Tree): The Huffman Tree. Returns: The decoded data. """ node = tree.root s = "" for bit in encoded_data: if int(bit) == 0: node = node.left else: node = node.right if node.left is None and node.right is None: s += node.value node = tree.root return s
def huffman_encode(st: str, hd: dict) ‑> str
-
Encode the string using the Huffman Dictionary.
Args
st
:str
- The string to encode.
hd
:dict
- The Huffman Dictionary.
Returns
The encoded string.
Expand source code
def huffman_encode(st: str, hd: dict) -> str: """ Encode the string using the Huffman Dictionary. Args: st (str): The string to encode. hd (dict): The Huffman Dictionary. Returns: The encoded string. """ s = "" for c in st: s += hd[c] return s