Burrows-Wheeler Transform (BWT)
Description
The Burrows-Wheeler Transform (BWT) is a data compression algorithm that rearranges characters in a string to improve compressibility. It permutes the input string into a form where similar characters are grouped together, making it easier for subsequent compression algorithms (like Move-to-Front or Run-Length Encoding) to achieve better compression ratios.
C++ Code
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
string burrowsWheelerTransform(string input) {
int n = input.size();
vector<string> rotations;
for (int i = 0; i < n; i++) {
rotations.push_back(input.substr(i) + input.substr(0, i));
}
sort(rotations.begin(), rotations.end());
string bwt = "";
for (int i = 0; i < n; i++) {
bwt += rotations[i][n - 1];
}
return bwt;
}
string inverseBurrowsWheelerTransform(string input) {
int n = input.size();
vector<pair<char, int>> indexedChars(n);
for (int i = 0; i < n; i++) {
indexedChars[i] = { input[i], i };
}
sort(indexedChars.begin(), indexedChars.end());
vector<int> next(n);
for (int i = 0; i < n; i++) {
next[i] = indexedChars[i].second;
}
string original = "";
int current = 0;
for (int i = 0; i < n; i++) {
original += indexedChars[current].first;
current = next[current];
}
return original;
}
int main() {
string input = "burrows-wheeler-transform";
string bwt = burrowsWheelerTransform(input);
cout << "Burrows-Wheeler Transform (BWT): " << bwt << endl;
string original = inverseBurrowsWheelerTransform(bwt);
cout << "Original string after inverse BWT: " << original << endl;
return 0;
}
Time and Space Complexity
Operation | Time Complexity | Space Complexity |
---|---|---|
Burrows-Wheeler Transform (BWT) | O(n log n) | O(n) |
Inverse Burrows-Wheeler Transform | O(n log n) | O(n) |
Where:
- n: Length of the input string.