Burrows-Wheeler Transform (BWT)

Description

The Burrows-Wheeler Transform (BWT) is a data compression algorithm that rearranges characters in a string to improve compressibility. It permutes the input string into a form where similar characters are grouped together, making it easier for subsequent compression algorithms (like Move-to-Front or Run-Length Encoding) to achieve better compression ratios.

C++ Code

#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;

string burrowsWheelerTransform(string input) {
    int n = input.size();
    vector<string> rotations;

    for (int i = 0; i < n; i++) {
        rotations.push_back(input.substr(i) + input.substr(0, i));
    }

    sort(rotations.begin(), rotations.end());

    string bwt = "";
    for (int i = 0; i < n; i++) {
        bwt += rotations[i][n - 1];
    }

    return bwt;
}

string inverseBurrowsWheelerTransform(string input) {
    int n = input.size();
    vector<pair<char, int>> indexedChars(n);

    for (int i = 0; i < n; i++) {
        indexedChars[i] = { input[i], i };
    }

    sort(indexedChars.begin(), indexedChars.end());

    vector<int> next(n);
    for (int i = 0; i < n; i++) {
        next[i] = indexedChars[i].second;
    }

    string original = "";
    int current = 0;
    for (int i = 0; i < n; i++) {
        original += indexedChars[current].first;
        current = next[current];
    }

    return original;
}

int main() {
    string input = "burrows-wheeler-transform";

    string bwt = burrowsWheelerTransform(input);
    cout << "Burrows-Wheeler Transform (BWT): " << bwt << endl;

    string original = inverseBurrowsWheelerTransform(bwt);
    cout << "Original string after inverse BWT: " << original << endl;

    return 0;
}

Time and Space Complexity

Operation Time Complexity Space Complexity
Burrows-Wheeler Transform (BWT) O(n log n) O(n)
Inverse Burrows-Wheeler Transform O(n log n) O(n)

Where:

  • n: Length of the input string.

← Back to Homepage