Aho-Corasick Algorithm
Description
The Aho-Corasick algorithm is used for efficiently searching for multiple patterns in a given text. It constructs a finite state machine (trie) that allows simultaneous search for all patterns in linear time relative to the size of the patterns and the text.
C++ Code
#include <iostream>
#include <queue>
#include <unordered_map>
#include <vector>
using namespace std;
struct TrieNode {
unordered_map<char, TrieNode*> children;
TrieNode* fail;
vector<int> output;
TrieNode() : fail(nullptr) {}
~TrieNode() {
for (auto& kv : children) {
delete kv.second;
}
}
};
class AhoCorasick {
private:
TrieNode* root;
public:
AhoCorasick() {
root = new TrieNode();
}
~AhoCorasick() {
delete root;
}
void insert(string pattern, int index) {
TrieNode* current = root;
for (char c : pattern) {
if (!current->children[c]) {
current->children[c] = new TrieNode();
}
current = current->children[c];
}
current->output.push_back(index);
}
void buildAutomaton() {
queue<TrieNode*> q;
for (auto& kv : root->children) {
kv.second->fail = root;
q.push(kv.second);
}
while (!q.empty()) {
TrieNode* current = q.front();
q.pop();
for (auto& kv : current->children) {
char c = kv.first;
TrieNode* child = kv.second;
q.push(child);
TrieNode* fail = current->fail;
while (fail != root && !fail->children.count(c)) {
fail = fail->fail;
}
child->fail = fail->children[c] ? fail->children[c] : root;
child->output.insert(child->output.end(), child->fail->output.begin(), child->fail->output.end());
}
}
}
void searchPatterns(string text) {
TrieNode* current = root;
for (int i = 0; i < text.length(); ++i) {
while (current != root && !current->children.count(text[i])) {
current = current->fail;
}
if (current->children.count(text[i])) {
current = current->children[text[i]];
} else {
current = root;
}
if (!current->output.empty()) {
for (int patternIndex : current->output) {
cout << "Pattern found at index " << i - patternIndex << ": " << patternIndex << endl;
}
}
}
}
};
int main() {
AhoCorasick ac;
vector<string> patterns = {"he", "she", "his", "hers"};
for (int i = 0; i < patterns.size(); ++i) {
ac.insert(patterns[i], i);
}
ac.buildAutomaton();
string text = "ahishers";
ac.searchPatterns(text);
return 0;
}
Time and Space Complexity
Operation | Time Complexity | Space Complexity |
---|---|---|
Aho-Corasick Algorithm | O(n + m + z) | O(m) |
Where:
- n: Length of the text to be searched.
- m: Total length of all patterns.
- z: Number of occurrences of patterns in the text.