Lempel-Ziv-Welch (LZW)

Description

Lempel-Ziv-Welch (LZW) is a lossless data compression algorithm that builds a dictionary of strings seen in the input data and replaces them with codes. It dynamically adds new strings to the dictionary as it processes the input.

C++ Code

#include <iostream>
#include <unordered_map>
#include <vector>
using namespace std;

vector<int> compress(string input) {
    unordered_map<string, int> dictionary;
    int dictSize = 256;
    
    for (int i = 0; i < 256; i++) {
        dictionary[string(1, i)] = i;
    }
    
    string current;
    vector<int> result;
    
    for (char c : input) {
        string currentPlusC = current + c;
        
        if (dictionary.find(currentPlusC) != dictionary.end()) {
            current = currentPlusC;
        } else {
            result.push_back(dictionary[current]);
            
            dictionary[currentPlusC] = dictSize++;
            
            current = string(1, c);
        }
    }
    
    if (!current.empty()) {
        result.push_back(dictionary[current]);
    }
    
    return result;
}

string decompress(vector<int>& compressed) {
    unordered_map<int, string> dictionary;
    int dictSize = 256;
    
    for (int i = 0; i < 256; i++) {
        dictionary[i] = string(1, i);
    }
    
    string previous = string(1, compressed[0]);
    string result = previous;
    
    for (int i = 1; i < compressed.size(); i++) {
        int currentCode = compressed[i];
        
        string current;
        if (dictionary.find(currentCode) != dictionary.end()) {
            current = dictionary[currentCode];
        } else if (currentCode == dictSize) {
            current = previous + previous[0];
        } else {
            throw runtime_error("Invalid compressed data.");
        }
        
        result += current;
        
        dictionary[dictSize++] = previous + current[0];
        
        previous = current;
    }
    
    return result;
}

int main() {
    string input = "LZW compression is a data compression algorithm.";

    vector<int> compressed = compress(input);
    cout << "Compressed codes:";
    for (int code : compressed) {
        cout << " " << code;
    }
    cout << endl;

    string decompressed = decompress(compressed);
    cout << "Decompressed string: " << decompressed << endl;

    return 0;
}

Time and Space Complexity

Operation Time Complexity Space Complexity
Compression (average) O(n) O(1)
Decompression O(n) O(n)

Where:

  • n: Length of the input data.

← Back to Homepage