Huffman Coding
Description
Huffman coding is a lossless data compression algorithm. It assigns variable-length codes to input characters, with shorter codes assigned to more frequent characters and longer codes assigned to less frequent characters. This results in a prefix-free code, meaning no code is a prefix of any other, making decoding unambiguous.
C++ Code
#include <iostream>
#include <queue>
#include <unordered_map>
using namespace std;
struct HuffmanNode {
char data;
int freq;
HuffmanNode *left, *right;
HuffmanNode(char data, int freq) {
this->data = data;
this->freq = freq;
left = right = nullptr;
}
};
struct Compare {
bool operator()(HuffmanNode* l, HuffmanNode* r) {
return l->freq > r->freq;
}
};
HuffmanNode* buildHuffmanTree(unordered_map<char, int>& freqMap) {
priority_queue<HuffmanNode*, vector<HuffmanNode*>, Compare> pq;
for (auto& pair : freqMap) {
pq.push(new HuffmanNode(pair.first, pair.second));
}
while (pq.size() != 1) {
HuffmanNode *left = pq.top(); pq.pop();
HuffmanNode *right = pq.top(); pq.pop();
int sum = left->freq + right->freq;
HuffmanNode *internalNode = new HuffmanNode('$', sum);
internalNode->left = left;
internalNode->right = right;
pq.push(internalNode);
}
return pq.top();
}
void storeHuffmanCodes(HuffmanNode* root, string str, unordered_map<char, string>& huffmanCode) {
if (root == nullptr) {
return;
}
if (!root->left && !root->right) {
huffmanCode[root->data] = str;
}
storeHuffmanCodes(root->left, str + "0", huffmanCode);
storeHuffmanCodes(root->right, str + "1", huffmanCode);
}
string encode(string input) {
unordered_map<char, int> freqMap;
for (char c : input) {
freqMap[c]++;
}
HuffmanNode* root = buildHuffmanTree(freqMap);
unordered_map<char, string> huffmanCode;
storeHuffmanCodes(root, "", huffmanCode);
string encodedString = "";
for (char c : input) {
encodedString += huffmanCode[c];
}
return encodedString;
}
string decode(HuffmanNode* root, string encodedString) {
string decodedString = "";
HuffmanNode* curr = root;
for (char bit : encodedString) {
if (bit == '0') {
curr = curr->left;
} else {
curr = curr->right;
}
if (!curr->left && !curr->right) {
decodedString += curr->data;
curr = root;
}
}
return decodedString;
}
int main() {
string input = "Huffman coding is a data compression algorithm.";
string encodedString = encode(input);
cout << "Encoded string: " << encodedString << endl;
HuffmanNode* root = buildHuffmanTree(freqMap);
string decodedString = decode(root, encodedString);
cout << "Decoded string: " << decodedString << endl;
return 0;
}
Time and Space Complexity
Operation | Time Complexity | Space Complexity |
---|---|---|
Huffman Tree Construction | O(n log n) | O(n) |
Encoding | O(n) | O(n) |
Decoding | O(n) | O(1) |
Where:
- n: Number of characters in the input string.