-
Notifications
You must be signed in to change notification settings - Fork 20k
refactor: redesign StringMatchFiniteAutomata
#5222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
vil02
merged 17 commits into
TheAlgorithms:master
from
samuelfac:cleanup_StringMatchFiniteAutomata
Jun 17, 2024
Merged
Changes from 15 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
7d13207
refactor
9505edb
add test
4a8eff6
fix clang
6f9c5df
fix pmd
bdd4d62
remove main method
9fa6d68
refactor searchPattern with private class
24df2bb
fix checkstyle
639ba9b
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac bef6769
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac 08337d1
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac c618113
Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutoma…
samuelfac 1b5115b
Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutoma…
samuelfac a46a5a0
Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutoma…
samuelfac e106e64
fix clang
43227e8
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
samuelfac 841a7ac
tests: add more test cases
vil02 52287ba
Merge branch 'master' into cleanup_StringMatchFiniteAutomata
vil02 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
141 changes: 93 additions & 48 deletions
141
src/main/java/com/thealgorithms/others/StringMatchFiniteAutomata.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,80 +1,125 @@ | ||
package com.thealgorithms.others; | ||
|
||
import java.util.Set; | ||
import java.util.TreeSet; | ||
|
||
/** | ||
* @author Prateek Kumar Oraon (https://github.com/prateekKrOraon) | ||
* A class to perform string matching using <a href="https://en.wikipedia.org/wiki/Finite-state_machine">finite automata</a>. | ||
* | ||
* @author <a href="https://github.com/prateekKrOraon">Prateek Kumar Oraon</a> | ||
*/ | ||
import java.util.Scanner; | ||
|
||
// An implementation of string matching using finite automata | ||
public final class StringMatchFiniteAutomata { | ||
private StringMatchFiniteAutomata() { | ||
} | ||
|
||
public static final int CHARS = 256; | ||
public static int[][] fa; | ||
public static Scanner scanner = null; | ||
|
||
public static void main(String[] args) { | ||
scanner = new Scanner(System.in); | ||
System.out.println("Enter String"); | ||
String text = scanner.nextLine(); | ||
System.out.println("Enter pattern"); | ||
String pat = scanner.nextLine(); | ||
|
||
searchPat(text, pat); | ||
// Constants | ||
private static final int CHARS = Character.MAX_VALUE + 1; // Total number of characters in the input alphabet | ||
|
||
scanner.close(); | ||
// Private constructor to prevent instantiation | ||
private StringMatchFiniteAutomata() { | ||
} | ||
|
||
public static void searchPat(String text, String pat) { | ||
int m = pat.length(); | ||
int n = text.length(); | ||
|
||
fa = new int[m + 1][CHARS]; | ||
|
||
computeFA(pat, m, fa); | ||
|
||
int state = 0; | ||
for (int i = 0; i < n; i++) { | ||
state = fa[state][text.charAt(i)]; | ||
|
||
if (state == m) { | ||
System.out.println("Pattern found at index " + (i - m + 1)); | ||
/** | ||
* Searches for the pattern in the given text using finite automata. | ||
* | ||
* @param text The text to search within. | ||
* @param pattern The pattern to search for. | ||
*/ | ||
public static Set<Integer> searchPattern(final String text, final String pattern) { | ||
final var stateTransitionTable = computeStateTransitionTable(pattern); | ||
FiniteAutomata finiteAutomata = new FiniteAutomata(stateTransitionTable); | ||
|
||
Set<Integer> indexFound = new TreeSet<>(); | ||
for (int i = 0; i < text.length(); i++) { | ||
finiteAutomata.consume(text.charAt(i)); | ||
|
||
if (finiteAutomata.getState() == pattern.length()) { | ||
indexFound.add(i - pattern.length() + 1); | ||
} | ||
} | ||
return indexFound; | ||
} | ||
|
||
// Computes finite automata for the pattern | ||
public static void computeFA(String pat, int m, int[][] fa) { | ||
for (int state = 0; state <= m; ++state) { | ||
/** | ||
* Computes the finite automata table for the given pattern. | ||
* | ||
* @param pattern The pattern to preprocess. | ||
* @return The state transition table. | ||
*/ | ||
private static int[][] computeStateTransitionTable(final String pattern) { | ||
final int patternLength = pattern.length(); | ||
int[][] stateTransitionTable = new int[patternLength + 1][CHARS]; | ||
|
||
for (int state = 0; state <= patternLength; ++state) { | ||
for (int x = 0; x < CHARS; ++x) { | ||
fa[state][x] = getNextState(pat, m, state, x); | ||
stateTransitionTable[state][x] = getNextState(pattern, patternLength, state, x); | ||
} | ||
} | ||
|
||
return stateTransitionTable; | ||
} | ||
|
||
public static int getNextState(String pat, int m, int state, int x) { | ||
// if current state is less than length of pattern | ||
// and input character of pattern matches the character in the alphabet | ||
// then automata goes to next state | ||
if (state < m && x == pat.charAt(state)) { | ||
/** | ||
* Gets the next state for the finite automata. | ||
* | ||
* @param pattern The pattern being matched. | ||
* @param patternLength The length of the pattern. | ||
* @param state The current state. | ||
* @param x The current character from the input alphabet. | ||
* @return The next state. | ||
*/ | ||
private static int getNextState(final String pattern, final int patternLength, final int state, final int x) { | ||
// If the current state is less than the length of the pattern | ||
// and the character matches the pattern character, go to the next state | ||
if (state < patternLength && x == pattern.charAt(state)) { | ||
return state + 1; | ||
} | ||
|
||
// Check for the highest prefix which is also a suffix | ||
for (int ns = state; ns > 0; ns--) { | ||
if (pat.charAt(ns - 1) == x) { | ||
if (pattern.charAt(ns - 1) == x) { | ||
boolean match = true; | ||
for (int i = 0; i < ns - 1; i++) { | ||
if (pat.charAt(i) != pat.charAt(state - ns + i + 1)) { | ||
if (pattern.charAt(i) != pattern.charAt(state - ns + i + 1)) { | ||
match = false; | ||
break; | ||
} | ||
|
||
if (i == ns - 1) { | ||
return ns; | ||
} | ||
} | ||
if (match) { | ||
return ns; | ||
} | ||
} | ||
} | ||
|
||
// If no prefix which is also a suffix is found, return 0 | ||
return 0; | ||
} | ||
|
||
/** | ||
* A class representing the finite automata for pattern matching. | ||
*/ | ||
private static final class FiniteAutomata { | ||
private int state = 0; | ||
private final int[][] stateTransitionTable; | ||
|
||
private FiniteAutomata(int[][] stateTransitionTable) { | ||
this.stateTransitionTable = stateTransitionTable; | ||
} | ||
|
||
/** | ||
* Consumes an input character and transitions to the next state. | ||
* | ||
* @param input The input character. | ||
*/ | ||
private void consume(final char input) { | ||
state = stateTransitionTable[state][input]; | ||
} | ||
|
||
/** | ||
* Gets the current state of the finite automata. | ||
* | ||
* @return The current state. | ||
*/ | ||
private int getState() { | ||
return state; | ||
} | ||
} | ||
} |
23 changes: 23 additions & 0 deletions
23
src/test/java/com/thealgorithms/others/StringMatchFiniteAutomataTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package com.thealgorithms.others; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
import java.util.Set; | ||
import java.util.stream.Stream; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.Arguments; | ||
import org.junit.jupiter.params.provider.MethodSource; | ||
|
||
class StringMatchFiniteAutomataTest { | ||
|
||
@ParameterizedTest | ||
@MethodSource("provideTestCases") | ||
void searchPattern(String text, String pattern, Set<Integer> expectedOutput) { | ||
assertEquals(expectedOutput, StringMatchFiniteAutomata.searchPattern(text, pattern)); | ||
} | ||
|
||
private static Stream<Arguments> provideTestCases() { | ||
return Stream.of(Arguments.of("abcbcabc", "abc", Set.of(0, 5)), Arguments.of("", "abc", Set.of()), Arguments.of("abcdefg", "xyz", Set.of()), Arguments.of("abcde", "", Set.of(1, 2, 3, 4, 5)), Arguments.of("abcabcabc", "abc", Set.of(0, 3, 6)), Arguments.of("abcabcabc", "abcabcabc", Set.of(0)), | ||
Arguments.of("aaabbbaaa", "aaa", Set.of(0, 6)), Arguments.of("abcdefg", "efg", Set.of(4))); | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.