-
Notifications
You must be signed in to change notification settings - Fork 20k
refactor: redesign StringMatchFiniteAutomata
#5222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
vil02
merged 17 commits into
TheAlgorithms:master
from
samuelfac:cleanup_StringMatchFiniteAutomata
Jun 17, 2024
Merged
Changes from 4 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
7d13207
refactor
9505edb
add test
4a8eff6
fix clang
6f9c5df
fix pmd
bdd4d62
remove main method
9fa6d68
refactor searchPattern with private class
24df2bb
fix checkstyle
639ba9b
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac bef6769
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac 08337d1
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac c618113
Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutoma…
samuelfac 1b5115b
Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutoma…
samuelfac a46a5a0
Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutoma…
samuelfac e106e64
fix clang
43227e8
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac 841a7ac
tests: add more test cases
vil02 52287ba
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
vil02 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
118 changes: 78 additions & 40 deletions
118
src/main/java/com/thealgorithms/others/StringMatchFiniteAutomata.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,80 +1,118 @@ | ||
package com.thealgorithms.others; | ||
|
||
/** | ||
* @author Prateek Kumar Oraon (https://github.com/prateekKrOraon) | ||
*/ | ||
import java.util.Scanner; | ||
import java.util.Set; | ||
import java.util.TreeSet; | ||
|
||
// An implementation of string matching using finite automata | ||
/** | ||
* A class to perform string matching using <a href="https://en.wikipedia.org/wiki/Finite-state_machine">finite automata</a>. | ||
* | ||
* @author <a href="https://github.com/prateekKrOraon">Prateek Kumar Oraon</a> | ||
*/ | ||
public final class StringMatchFiniteAutomata { | ||
|
||
// Constants | ||
private static final int CHARS = 256; // Total number of characters in the input alphabet | ||
|
||
// Finite automata table | ||
private static int[][] finiteAutomata; | ||
|
||
// Private constructor to prevent instantiation | ||
private StringMatchFiniteAutomata() { | ||
} | ||
|
||
public static final int CHARS = 256; | ||
public static int[][] fa; | ||
public static Scanner scanner = null; | ||
|
||
public static void main(String[] args) { | ||
scanner = new Scanner(System.in); | ||
System.out.println("Enter String"); | ||
String text = scanner.nextLine(); | ||
System.out.println("Enter pattern"); | ||
String pat = scanner.nextLine(); | ||
// Scanner instance for user input | ||
try (Scanner scanner = new Scanner(System.in);) { | ||
|
||
System.out.println("Enter text:"); | ||
String text = scanner.nextLine(); | ||
|
||
searchPat(text, pat); | ||
System.out.println("Enter pattern:"); | ||
String pattern = scanner.nextLine(); | ||
|
||
scanner.close(); | ||
Set<Integer> indexFound = searchPattern(text, pattern); | ||
indexFound.forEach(System.out::println); | ||
} | ||
} | ||
samuelfac marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
public static void searchPat(String text, String pat) { | ||
int m = pat.length(); | ||
int n = text.length(); | ||
/** | ||
* Searches for the pattern in the given text using finite automata. | ||
* | ||
* @param text The text to search within. | ||
* @param pattern The pattern to search for. | ||
*/ | ||
public static Set<Integer> searchPattern(String text, String pattern) { | ||
Set<Integer> indexFound = new TreeSet<>(); | ||
int patternLength = pattern.length(); | ||
int textLength = text.length(); | ||
|
||
// Initialize finite automata table | ||
finiteAutomata = new int[patternLength + 1][CHARS]; | ||
|
||
fa = new int[m + 1][CHARS]; | ||
// Preprocess the pattern to create the finite automata table | ||
computeFiniteAutomata(pattern, patternLength); | ||
|
||
computeFA(pat, m, fa); | ||
int state = 0; // Initial state | ||
|
||
int state = 0; | ||
for (int i = 0; i < n; i++) { | ||
state = fa[state][text.charAt(i)]; | ||
// Process the text over the finite automata | ||
for (int i = 0; i < textLength; i++) { | ||
state = finiteAutomata[state][text.charAt(i)]; | ||
|
||
if (state == m) { | ||
System.out.println("Pattern found at index " + (i - m + 1)); | ||
if (state == patternLength) { | ||
indexFound.add(i - patternLength + 1); | ||
} | ||
} | ||
return indexFound; | ||
} | ||
samuelfac marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Computes finite automata for the pattern | ||
public static void computeFA(String pat, int m, int[][] fa) { | ||
for (int state = 0; state <= m; ++state) { | ||
/** | ||
* Computes the finite automata table for the given pattern. | ||
* | ||
* @param pattern The pattern to preprocess. | ||
* @param patternLength The length of the pattern. | ||
*/ | ||
private static void computeFiniteAutomata(String pattern, int patternLength) { | ||
for (int state = 0; state <= patternLength; ++state) { | ||
for (int x = 0; x < CHARS; ++x) { | ||
fa[state][x] = getNextState(pat, m, state, x); | ||
finiteAutomata[state][x] = getNextState(pattern, patternLength, state, x); | ||
} | ||
} | ||
} | ||
|
||
public static int getNextState(String pat, int m, int state, int x) { | ||
// if current state is less than length of pattern | ||
// and input character of pattern matches the character in the alphabet | ||
// then automata goes to next state | ||
if (state < m && x == pat.charAt(state)) { | ||
/** | ||
* Gets the next state for the finite automata. | ||
* | ||
* @param pattern The pattern being matched. | ||
* @param patternLength The length of the pattern. | ||
* @param state The current state. | ||
* @param x The current character from the input alphabet. | ||
* @return The next state. | ||
*/ | ||
private static int getNextState(String pattern, int patternLength, int state, int x) { | ||
// If the current state is less than the length of the pattern | ||
// and the character matches the pattern character, go to the next state | ||
if (state < patternLength && x == pattern.charAt(state)) { | ||
return state + 1; | ||
} | ||
|
||
// Check for the highest prefix which is also a suffix | ||
for (int ns = state; ns > 0; ns--) { | ||
if (pat.charAt(ns - 1) == x) { | ||
if (pattern.charAt(ns - 1) == x) { | ||
boolean match = true; | ||
for (int i = 0; i < ns - 1; i++) { | ||
if (pat.charAt(i) != pat.charAt(state - ns + i + 1)) { | ||
if (pattern.charAt(i) != pattern.charAt(state - ns + i + 1)) { | ||
match = false; | ||
break; | ||
} | ||
|
||
if (i == ns - 1) { | ||
return ns; | ||
} | ||
} | ||
if (match) { | ||
return ns; | ||
} | ||
} | ||
} | ||
|
||
// If no prefix which is also a suffix is found, return 0 | ||
return 0; | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
src/test/java/com/thealgorithms/others/StringMatchFiniteAutomataTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package com.thealgorithms.others; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
import java.util.Set; | ||
import java.util.stream.Stream; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.Arguments; | ||
import org.junit.jupiter.params.provider.MethodSource; | ||
|
||
class StringMatchFiniteAutomataTest { | ||
|
||
@ParameterizedTest | ||
@MethodSource("provideTestCases") | ||
void searchPattern(String text, String pattern, Set<Integer> expectedOutput) { | ||
assertEquals(expectedOutput, StringMatchFiniteAutomata.searchPattern(text, pattern)); | ||
} | ||
|
||
private static Stream<Arguments> provideTestCases() { | ||
return Stream.of(Arguments.of("abcbcabc", "abc", Set.of(0, 5)), // | ||
Arguments.of("", "abc", Set.of()), // | ||
samuelfac marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Arguments.of("abcdefg", "xyz", Set.of()), // | ||
Arguments.of("abcde", "", Set.of(1, 2, 3, 4, 5)), // | ||
Arguments.of("abcabcabc", "abc", Set.of(0, 3, 6)), // | ||
Arguments.of("abcabcabc", "abcabcabc", Set.of(0)), // | ||
Arguments.of("aaabbbaaa", "aaa", Set.of(0, 6)), // | ||
Arguments.of("abcdefg", "efg", Set.of(4)) // | ||
); | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.