Skip to content

Commit 6f74802

Browse files
committed
Add Longest Repeated Substring algorithm
1 parent ae68a78 commit 6f74802

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
def longest_repeated_substring(s: str) -> str:
2+
"""
3+
Longest repeated (overlapping allowed) substring in O(n) using a suffix automaton.
4+
Returns empty string if no repetition.
5+
"""
6+
n = len(s)
7+
if n <= 1:
8+
return ""
9+
10+
class State:
11+
__slots__ = ("next", "link", "length", "first_pos", "occ")
12+
def __init__(self):
13+
self.next = {}
14+
self.link = -1
15+
self.length = 0
16+
self.first_pos = -1
17+
self.occ = 0
18+
19+
st = [State()]
20+
last = 0
21+
22+
def sa_extend(c, pos):
23+
nonlocal last
24+
cur = len(st)
25+
st.append(State())
26+
st[cur].length = st[last].length + 1
27+
st[cur].first_pos = pos
28+
st[cur].occ = 1
29+
p = last
30+
while p >= 0 and c not in st[p].next:
31+
st[p].next[c] = cur
32+
p = st[p].link
33+
if p == -1:
34+
st[cur].link = 0
35+
else:
36+
q = st[p].next[c]
37+
if st[p].length + 1 == st[q].length:
38+
st[cur].link = q
39+
else:
40+
clone = len(st)
41+
st.append(State())
42+
st[clone].next = st[q].next.copy()
43+
st[clone].length = st[p].length + 1
44+
st[clone].link = st[q].link
45+
st[clone].first_pos = st[q].first_pos
46+
while p >= 0 and st[p].next.get(c) == q:
47+
st[p].next[c] = clone
48+
p = st[p].link
49+
st[q].link = clone
50+
st[cur].link = clone
51+
last = cur
52+
53+
for i, ch in enumerate(s):
54+
sa_extend(ch, i)
55+
56+
order = sorted(range(len(st)), key=lambda i: st[i].length, reverse=True)
57+
for v in order:
58+
if st[v].link != -1:
59+
st[st[v].link].occ += st[v].occ
60+
61+
best_len = 0
62+
best_end = -1
63+
for state in st:
64+
if state.occ >= 2 and state.length > best_len:
65+
best_len = state.length
66+
best_end = state.first_pos
67+
if best_len == 0:
68+
return ""
69+
return s[best_end - best_len + 1: best_end + 1]
70+
71+
72+
if __name__ == "__main__":
73+
print(longest_repeated_substring("banana"))

0 commit comments

Comments
 (0)