ICPC Competitive Programming

Suffix Automaton

Suffix Automaton (SAM) data structure for Advanced String Problems

Suffix Automaton (SAM)

Suffix Automaton is the smallest finite automaton that accepts all suffixes of string ss.

Properties

For string of length nn:

  • Has at most 2n12n - 1 states
  • Has at most 3n43n - 4 transitions
  • Can be built in O(n)O(n) time and space

Structure

Each state stores:

  • link (suffix link): points to state representing longest proper suffix
  • len: length of longest string this state represents
  • next[c]: transition to next state when reading character c

Implementation

struct SuffixAutomaton {
    struct State {
        int len, link;
        map<char, int> next;
        // or use int next[26] for lowercase letters
    };
    
    vector<State> st;
    int last;
    
    SuffixAutomaton() {
        st.push_back({0, -1, {}});  // Initial state
        last = 0;
    }
    
    void extend(char c) {
        int cur = st.size();
        st.push_back({st[last].len + 1, -1, {}});
        
        int p = last;
        while (p != -1 && !st[p].next.count(c)) {
            st[p].next[c] = cur;
            p = st[p].link;
        }
        
        if (p == -1) {
            st[cur].link = 0;
        } else {
            int q = st[p].next[c];
            if (st[p].len + 1 == st[q].len) {
                st[cur].link = q;
            } else {
                int clone = st.size();
                st.push_back({st[p].len + 1, st[q].link, st[q].next});
                
                while (p != -1 && st[p].next[c] == q) {
                    st[p].next[c] = clone;
                    p = st[p].link;
                }
                
                st[q].link = st[cur].link = clone;
            }
        }
        
        last = cur;
    }
    
    void build(const string& s) {
        for (char c : s) {
            extend(c);
        }
    }
};

Concept

Endpos Equivalence Class

  • endpos(t) = set of positions where substring tt ends
  • States in SAM represent equivalence classes of strings with the same endpos

Suffix link of state vv points to state representing longest proper suffix of strings in vv that belongs to another equivalence class.

Example: s = "abab"

State 0 (initial): ""
State 1: "a" (endpos = {0, 2})
State 2: "ab" (endpos = {1, 3})
State 3: "aba" → "abab" (endpos = {3})
State 4: "b" (endpos = {1, 3})
State 5: "bab" (endpos = {3})

Suffix links:
3 → 2 → 1 → 0
5 → 4 → 0

Applications

1. Check if Pattern is a Substring

bool contains(const SuffixAutomaton& sam, const string& pattern) {
    int cur = 0;
    for (char c : pattern) {
        if (!sam.st[cur].next.count(c)) {
            return false;
        }
        cur = sam.st[cur].next.at(c);
    }
    return true;
}

2. Count Distinct Substrings

long long countDistinct(const SuffixAutomaton& sam) {
    long long count = 0;
    for (int i = 1; i < sam.st.size(); i++) {
        count += sam.st[i].len - sam.st[sam.st[i].link].len;
    }
    return count;
}

3. Count Number of Occurrences

struct SAMWithCount : SuffixAutomaton {
    vector<int> cnt;  // number of occurrences
    
    void computeCounts() {
        int n = st.size();
        cnt.assign(n, 0);
        
        // Mark states that correspond to prefixes
        int cur = 0;
        // ... (mark terminal states)
        
        // Topological sort by len
        vector<int> order(n);
        iota(order.begin(), order.end(), 0);
        sort(order.begin(), order.end(), [&](int a, int b) {
            return st[a].len > st[b].len;
        });
        
        // Propagate counts via suffix links
        for (int v : order) {
            if (st[v].link >= 0) {
                cnt[st[v].link] += cnt[v];
            }
        }
    }
    
    int countOccurrences(const string& pattern) {
        int cur = 0;
        for (char c : pattern) {
            if (!st[cur].next.count(c)) return 0;
            cur = st[cur].next.at(c);
        }
        return cnt[cur];
    }
};

4. Find Longest Common Substring

string longestCommonSubstring(const string& a, const string& b) {
    SuffixAutomaton sam;
    sam.build(a);
    
    int cur = 0, len = 0;
    int bestLen = 0, bestPos = 0;
    
    for (int i = 0; i < b.size(); i++) {
        char c = b[i];
        
        while (cur != 0 && !sam.st[cur].next.count(c)) {
            cur = sam.st[cur].link;
            len = sam.st[cur].len;
        }
        
        if (sam.st[cur].next.count(c)) {
            cur = sam.st[cur].next[c];
            len++;
        }
        
        if (len > bestLen) {
            bestLen = len;
            bestPos = i - len + 1;
        }
    }
    
    return b.substr(bestPos, bestLen);
}

5. Lexicographically K-th Substring

string kthSubstring(SuffixAutomaton& sam, long long k) {
    int n = sam.st.size();
    vector<long long> paths(n);
    
    // Count paths (substrings) from each state
    // Topological order by len (decreasing)
    vector<int> order(n);
    iota(order.begin(), order.end(), 0);
    sort(order.begin(), order.end(), [&](int a, int b) {
        return sam.st[a].len > sam.st[b].len;
    });
    
    for (int v : order) {
        paths[v] = 1;  // empty string from this state
        for (auto& [c, u] : sam.st[v].next) {
            paths[v] += paths[u];
        }
    }
    
    // Find k-th substring
    string result;
    int cur = 0;
    
    while (k > 0) {
        for (auto& [c, u] : sam.st[cur].next) {
            if (paths[u] >= k) {
                result += c;
                k--;  // count this transition
                cur = u;
                break;
            }
            k -= paths[u];
        }
    }
    
    return result;
}

Complexity

OperationTimeSpace
BuildO(n)O(n)O(n)O(n) or O(nΣ)O(n \cdot \Sigma)
Check substringO(m)O(m)O(1)O(1)
Count distinctO(n)O(n)O(1)O(1)

Tips

  1. SAM vs Suffix Array: SAM is better for online queries, SA is better for LCP queries
  2. Memory: use map for large alphabet, array for small alphabet
  3. Clone states: understand clone operation to debug effectively
  4. Suffix links form a tree: can use DFS/BFS on suffix link tree