Skip to content

Commit f77a9b4

Browse files
committed
Sort morphological features in lowercase alphabetical order. Better fits with what is expected in UD datasets
1 parent cb50801 commit f77a9b4

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,23 @@
1515
* which is necessary for the CoNLLU format
1616
*/
1717
public class CoNLLUFeatures extends TreeMap<String, String> {
18+
public static class LowercaseComparator implements Comparator<String> {
19+
public int compare(String x, String y) {
20+
if (x == null && y == null) {
21+
return 0;
22+
}
23+
if (x == null) {
24+
return -1;
25+
}
26+
if (y == null) {
27+
return 1;
28+
}
29+
return x.compareToIgnoreCase(y);
30+
}
31+
}
32+
33+
static final LowercaseComparator comparator = new LowercaseComparator();
34+
1835
/**
1936
* Parses the value of the feature column in a CoNLL-U file
2037
* and returns them in a HashMap with the feature names as keys
@@ -24,7 +41,7 @@ public class CoNLLUFeatures extends TreeMap<String, String> {
2441
* @return A {@code HashMap<String,String>} with the feature values.
2542
*/
2643
public CoNLLUFeatures(String featureString) {
27-
super();
44+
super(comparator);
2845

2946
if (!featureString.equals("_")) {
3047
String[] featValPairs = featureString.split("\\|");
@@ -36,11 +53,12 @@ public CoNLLUFeatures(String featureString) {
3653
}
3754

3855
public CoNLLUFeatures(Map<String, String> features) {
39-
super(features);
56+
super(comparator);
57+
putAll(features);
4058
}
4159

4260
public CoNLLUFeatures() {
43-
super();
61+
super(comparator);
4462
}
4563

4664

0 commit comments

Comments
 (0)