24
24
import java .util .ArrayList ;
25
25
import java .util .List ;
26
26
import java .util .Objects ;
27
+ import java .util .stream .Collectors ;
27
28
28
29
/**
29
30
* @author Michele Rastelli
@@ -38,26 +39,89 @@ private static String stringToHex(String str) {
38
39
return hex .toString ();
39
40
}
40
41
42
+ private static String hexToString (String hex ) {
43
+ final StringBuilder result = new StringBuilder ();
44
+ for (int i = 0 ; i < hex .length () - 1 ; i += 2 ) {
45
+ String tempInHex = hex .substring (i , (i + 2 ));
46
+ int decimal = Integer .parseInt (tempInHex , 16 );
47
+ result .append ((char ) decimal );
48
+ }
49
+ return result .toString ();
50
+ }
51
+
41
52
public StopwordsAnalyzerProperties () {
42
53
stopwords = new ArrayList <>();
54
+ hex = true ;
43
55
}
44
56
45
- private List <String > stopwords ;
57
+ private final List <String > stopwords ;
58
+ private final boolean hex ;
46
59
47
60
/**
48
- * @return array of hex-encoded strings that describe the tokens to be discarded.
61
+ * @return list of hex-encoded strings that describe the tokens to be discarded.
62
+ * @deprecated use {@link #getStopwordsAsHexList()} instead
49
63
*/
64
+ @ Deprecated
50
65
public List <String > getStopwords () {
51
- return stopwords ;
66
+ return getStopwordsAsHexList ();
67
+ }
68
+
69
+ /**
70
+ * @return list of verbatim strings that describe the tokens to be discarded.
71
+ */
72
+ public List <String > getStopwordsAsStringList () {
73
+ if (hex ) {
74
+ return stopwords .stream ()
75
+ .map (StopwordsAnalyzerProperties ::hexToString )
76
+ .collect (Collectors .toList ());
77
+ } else {
78
+ return stopwords ;
79
+ }
52
80
}
53
81
82
+ /**
83
+ * @return list of hex-encoded strings that describe the tokens to be discarded.
84
+ */
85
+ public List <String > getStopwordsAsHexList () {
86
+ if (hex ) {
87
+ return stopwords ;
88
+ } else {
89
+ return stopwords .stream ()
90
+ .map (StopwordsAnalyzerProperties ::stringToHex )
91
+ .collect (Collectors .toList ());
92
+ }
93
+ }
94
+
95
+ /**
96
+ * @return if false each string in {@link #stopwords} is used as verbatim, if true as hex-encoded.
97
+ */
98
+ public boolean getHex () {
99
+ return hex ;
100
+ }
101
+
102
+ /**
103
+ * @param value stopword as verbatim string
104
+ * @return this
105
+ */
54
106
public StopwordsAnalyzerProperties addStopwordAsString (final String value ) {
55
- stopwords .add (stringToHex (value ));
107
+ if (hex ) {
108
+ stopwords .add (stringToHex (value ));
109
+ } else {
110
+ stopwords .add (value );
111
+ }
56
112
return this ;
57
113
}
58
114
115
+ /**
116
+ * @param value stopword as hex string
117
+ * @return this
118
+ */
59
119
public StopwordsAnalyzerProperties addStopwordAsHex (final String value ) {
60
- stopwords .add (value );
120
+ if (hex ) {
121
+ stopwords .add (value );
122
+ } else {
123
+ stopwords .add (hexToString (value ));
124
+ }
61
125
return this ;
62
126
}
63
127
@@ -66,11 +130,11 @@ public boolean equals(Object o) {
66
130
if (this == o ) return true ;
67
131
if (o == null || getClass () != o .getClass ()) return false ;
68
132
StopwordsAnalyzerProperties that = (StopwordsAnalyzerProperties ) o ;
69
- return Objects .equals (stopwords , that .stopwords );
133
+ return hex == that . hex && Objects .equals (stopwords , that .stopwords );
70
134
}
71
135
72
136
@ Override
73
137
public int hashCode () {
74
- return Objects .hash (stopwords );
138
+ return Objects .hash (stopwords , hex );
75
139
}
76
140
}
0 commit comments