Module dsa.huffman

Expand source code
import heapq

class Node:
    def __init__(self, left, right, value=None):
        self.child = [left, right]
        self.value = value
    
    def __lt__(self, other):
        return False
    
    def __repr__(self):
        if self.value is None:
            return "none"
        else:
            return self.value

def character_frequency(s):
    ''' takes a string a returns a dictionary on character frequency '''
    d = {}
    for c in s:
        if c not in d:
            d[c] = 1
        else:
            d[c] += 1
    return d

def build_frequency_table(s):
    ''' accepts a string to encode and returns a heap of the characters '''
    frequency_dictionary = character_frequency(s)
    
    # add to priority queue
    h = []
    for item in frequency_dictionary.items():
        heapq.heappush(h, (item[1], Node(None, None, item[0])))

    return h

def build_huffman_tree(heap):
    ''' accepts a heap and returns a Huffman Tree '''
    while len(heap) > 1:
        n1 = heapq.heappop(heap)
        n2 = heapq.heappop(heap)
        node = Node(n1[1], n2[1])
        heapq.heappush(heap, (n1[0] + n2[0], node))
    return heap[0][1]

def build_huffman_dictionary(node, bit_string=""):
    ''' given a Huffman Node, build a Huffman Dictionary '''
    d = {}
    if node.child[0] is None and node.child[1] is None:
        return {node.value: bit_string}

    d.update(build_huffman_dictionary(node.child[0], bit_string + '0'))
    d.update(build_huffman_dictionary(node.child[1], bit_string + '1'))

    return d

def huffman_encode(string, hd):
    s = ""
    for c in string:
        s += hd[c]
    return s

def huffman_decode(encoded_data, tree):
    root = tree
    s = ""
    for bit in encoded_data:
        tree = tree.child[int(bit)]
        if tree.child[0] is None and tree.child[1] is None: 
            s += tree.value
            tree = root
    return s

def bitstring_to_bytes(s):
    return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))

def bytes_to_bitstring(ba, bitlength=8):
    s = ""
    for b in ba[:-1]:
        byte = f"{b:08b}"
        s += byte
    
    byte = f"{ba[-1]:b}".zfill(bitlength) 
    s += byte

    return s

Functions

def bitstring_to_bytes(s)
Expand source code
def bitstring_to_bytes(s):
    return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
def build_frequency_table(s)

accepts a string to encode and returns a heap of the characters

Expand source code
def build_frequency_table(s):
    ''' accepts a string to encode and returns a heap of the characters '''
    frequency_dictionary = character_frequency(s)
    
    # add to priority queue
    h = []
    for item in frequency_dictionary.items():
        heapq.heappush(h, (item[1], Node(None, None, item[0])))

    return h
def build_huffman_dictionary(node, bit_string='')

given a Huffman Node, build a Huffman Dictionary

Expand source code
def build_huffman_dictionary(node, bit_string=""):
    ''' given a Huffman Node, build a Huffman Dictionary '''
    d = {}
    if node.child[0] is None and node.child[1] is None:
        return {node.value: bit_string}

    d.update(build_huffman_dictionary(node.child[0], bit_string + '0'))
    d.update(build_huffman_dictionary(node.child[1], bit_string + '1'))

    return d
def build_huffman_tree(heap)

accepts a heap and returns a Huffman Tree

Expand source code
def build_huffman_tree(heap):
    ''' accepts a heap and returns a Huffman Tree '''
    while len(heap) > 1:
        n1 = heapq.heappop(heap)
        n2 = heapq.heappop(heap)
        node = Node(n1[1], n2[1])
        heapq.heappush(heap, (n1[0] + n2[0], node))
    return heap[0][1]
def bytes_to_bitstring(ba, bitlength=8)
Expand source code
def bytes_to_bitstring(ba, bitlength=8):
    s = ""
    for b in ba[:-1]:
        byte = f"{b:08b}"
        s += byte
    
    byte = f"{ba[-1]:b}".zfill(bitlength) 
    s += byte

    return s
def character_frequency(s)

takes a string a returns a dictionary on character frequency

Expand source code
def character_frequency(s):
    ''' takes a string a returns a dictionary on character frequency '''
    d = {}
    for c in s:
        if c not in d:
            d[c] = 1
        else:
            d[c] += 1
    return d
def huffman_decode(encoded_data, tree)
Expand source code
def huffman_decode(encoded_data, tree):
    root = tree
    s = ""
    for bit in encoded_data:
        tree = tree.child[int(bit)]
        if tree.child[0] is None and tree.child[1] is None: 
            s += tree.value
            tree = root
    return s
def huffman_encode(string, hd)
Expand source code
def huffman_encode(string, hd):
    s = ""
    for c in string:
        s += hd[c]
    return s

Classes

class Node (left, right, value=None)
Expand source code
class Node:
    def __init__(self, left, right, value=None):
        self.child = [left, right]
        self.value = value
    
    def __lt__(self, other):
        return False
    
    def __repr__(self):
        if self.value is None:
            return "none"
        else:
            return self.value