diff --git a/src/main/java/com/thealgorithms/strings/KMP.java b/src/main/java/com/thealgorithms/strings/KMP.java deleted file mode 100644 index 07d3b0415006..000000000000 --- a/src/main/java/com/thealgorithms/strings/KMP.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.thealgorithms.strings; - -/** - * Implementation of Knuth–Morris–Pratt algorithm Usage: see the main function - * for an example - */ -public final class KMP { - private KMP() { - } - - // a working example - - public static void main(String[] args) { - final String haystack = "AAAAABAAABA"; // This is the full string - final String needle = "AAAA"; // This is the substring that we want to find - kmpMatcher(haystack, needle); - } - - // find the starting index in string haystack[] that matches the search word P[] - public static void kmpMatcher(final String haystack, final String needle) { - final int m = haystack.length(); - final int n = needle.length(); - final int[] pi = computePrefixFunction(needle); - int q = 0; - for (int i = 0; i < m; i++) { - while (q > 0 && haystack.charAt(i) != needle.charAt(q)) { - q = pi[q - 1]; - } - - if (haystack.charAt(i) == needle.charAt(q)) { - q++; - } - - if (q == n) { - System.out.println("Pattern starts: " + (i + 1 - n)); - q = pi[q - 1]; - } - } - } - - // return the prefix function - private static int[] computePrefixFunction(final String p) { - final int n = p.length(); - final int[] pi = new int[n]; - pi[0] = 0; - int q = 0; - for (int i = 1; i < n; i++) { - while (q > 0 && p.charAt(q) != p.charAt(i)) { - q = pi[q - 1]; - } - - if (p.charAt(q) == p.charAt(i)) { - q++; - } - - pi[i] = q; - } - return pi; - } -} diff --git a/src/main/java/com/thealgorithms/strings/KnuthMorrisPratt.java b/src/main/java/com/thealgorithms/strings/KnuthMorrisPratt.java new file mode 100644 index 000000000000..cafefd5dd894 --- /dev/null +++ b/src/main/java/com/thealgorithms/strings/KnuthMorrisPratt.java @@ -0,0 +1,84 @@ +package com.thealgorithms.strings; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implementation of the Knuth–Morris–Pratt (KMP) string matching algorithm. + * KMP efficiently searches for occurrences of a pattern within a text by + * utilizing a pre-computed failure function to avoid redundant comparisons. + * Time Complexity: O(n + m) where n is text length and m is pattern length. + */ +final class KnuthMorrisPratt { + private KnuthMorrisPratt() { + } + + /** + * Compute the Longest Proper Prefix which is also Suffix (LPS) array + * for the given pattern. This array is used to avoid unnecessary + * character comparisons during the search phase. + * + * @param pattern the pattern to compute LPS for + * @return the LPS array + */ + public static int[] computeLps(final String pattern) { + final int n = pattern.length(); + final int[] lps = new int[n]; + int len = 0; + lps[0] = 0; + int i = 1; + while (i < n) { + if (pattern.charAt(i) == pattern.charAt(len)) { + len++; + lps[i] = len; + i++; + } else { + if (len != 0) { + len = lps[len - 1]; + } else { + lps[i] = 0; + i++; + } + } + } + return lps; + } + + /** + * Search for all occurrences of the pattern in the text. + * Returns a list of starting indices where the pattern is found. + * + * @param text the text to search in + * @param pattern the pattern to search for + * @return list of starting indices of pattern occurrences + */ + public static List search(final String text, final String pattern) { + final List occurrences = new ArrayList<>(); + if (pattern == null || pattern.isEmpty() || text == null) { + return occurrences; + } + + final int[] lps = computeLps(pattern); + int i = 0; + int j = 0; + final int n = text.length(); + final int m = pattern.length(); + while (i < n) { + if (text.charAt(i) == pattern.charAt(j)) { + i++; + j++; + if (j == m) { + occurrences.add(i - j); + j = lps[j - 1]; + } + } else { + if (j != 0) { + j = lps[j - 1]; + } else { + i++; + } + } + } + return occurrences; + } +}