Huffman Coding

Description

Huffman coding is a lossless data compression algorithm. It assigns variable-length codes to input characters, with shorter codes assigned to more frequent characters and longer codes assigned to less frequent characters. This results in a prefix-free code, meaning no code is a prefix of any other, making decoding unambiguous.

C++ Code

#include <iostream>
#include <queue>
#include <unordered_map>
using namespace std;

struct HuffmanNode {
    char data;
    int freq;
    HuffmanNode *left, *right;

    HuffmanNode(char data, int freq) {
        this->data = data;
        this->freq = freq;
        left = right = nullptr;
    }
};

struct Compare {
    bool operator()(HuffmanNode* l, HuffmanNode* r) {
        return l->freq > r->freq;
    }
};

HuffmanNode* buildHuffmanTree(unordered_map<char, int>& freqMap) {
    priority_queue<HuffmanNode*, vector<HuffmanNode*>, Compare> pq;

    for (auto& pair : freqMap) {
        pq.push(new HuffmanNode(pair.first, pair.second));
    }

    while (pq.size() != 1) {
        HuffmanNode *left = pq.top(); pq.pop();
        HuffmanNode *right = pq.top(); pq.pop();

        int sum = left->freq + right->freq;
        HuffmanNode *internalNode = new HuffmanNode('$', sum);
        internalNode->left = left;
        internalNode->right = right;

        pq.push(internalNode);
    }

    return pq.top();
}

void storeHuffmanCodes(HuffmanNode* root, string str, unordered_map<char, string>& huffmanCode) {
    if (root == nullptr) {
        return;
    }

    if (!root->left && !root->right) {
        huffmanCode[root->data] = str;
    }

    storeHuffmanCodes(root->left, str + "0", huffmanCode);
    storeHuffmanCodes(root->right, str + "1", huffmanCode);
}

string encode(string input) {
    unordered_map<char, int> freqMap;
    for (char c : input) {
        freqMap[c]++;
    }

    HuffmanNode* root = buildHuffmanTree(freqMap);

    unordered_map<char, string> huffmanCode;
    storeHuffmanCodes(root, "", huffmanCode);

    string encodedString = "";
    for (char c : input) {
        encodedString += huffmanCode[c];
    }

    return encodedString;
}

string decode(HuffmanNode* root, string encodedString) {
    string decodedString = "";
    HuffmanNode* curr = root;
    for (char bit : encodedString) {
        if (bit == '0') {
            curr = curr->left;
        } else {
            curr = curr->right;
        }
        if (!curr->left && !curr->right) {
            decodedString += curr->data;
            curr = root;
        }
    }
    return decodedString;
}

int main() {
    string input = "Huffman coding is a data compression algorithm.";

    string encodedString = encode(input);
    cout << "Encoded string: " << encodedString << endl;

    HuffmanNode* root = buildHuffmanTree(freqMap);
    string decodedString = decode(root, encodedString);
    cout << "Decoded string: " << decodedString << endl;

    return 0;
}

Time and Space Complexity

Operation Time Complexity Space Complexity
Huffman Tree Construction O(n log n) O(n)
Encoding O(n) O(n)
Decoding O(n) O(1)

Where:

  • n: Number of characters in the input string.

← Back to Homepage