Skip to content

Commit 96a46e5

Browse files
authored
Reduce errors in ICU4J Collator executor results (#185)
* Fix parsing for Collator for ICU4J executor * Refactor ICU4J executor run configs * Add tests for the ICU4J executor for collator * Update ICU4J executor code for Collator to directly call the superclass method * Ensure that ICU4J runs Collator with normalization turned on * Remove unit test based on upstream test data gen bug re: double escaping
1 parent 7e91327 commit 96a46e5

File tree

3 files changed

+42
-38
lines changed

3 files changed

+42
-38
lines changed

executors/icu4j/73/executor-icu4j/src/main/java/org/unicode/conformance/testtype/collator/CollatorTester.java

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,8 @@ public ITestTypeInputJson inputMapToJson(Map<String, Object> inputMapData) {
3939

4040
result.locale = (String) inputMapData.get("locale", null);
4141

42-
boolean ignorePunctuation = false;
43-
Optional<Object> ignorePunctuationStr = inputMapData.get("ignorePunctuation");
44-
try {
45-
if (ignorePunctuationStr.isPresent()) {
46-
ignorePunctuation = Boolean.parseBoolean((String) ignorePunctuationStr.get());
47-
}
48-
} catch (Exception e) {
49-
// do nothing, default is false
50-
}
51-
result.ignorePunctuation = ignorePunctuation;
52-
53-
int line = 0;
54-
Optional<Object> lineStr = inputMapData.get("line");
55-
try {
56-
if (lineStr.isPresent()) {
57-
line = Integer.parseInt((String) lineStr.get());
58-
}
59-
} catch (Exception e) {
60-
// do nothing, default is 0
61-
}
62-
result.line = line;
42+
result.ignorePunctuation = (boolean) inputMapData.get("ignorePunctuation", false);
43+
result.line = (int) ((double) inputMapData.get("line", 0.0));
6344

6445
result.compare_type = (String) inputMapData.get("compare_type", null);
6546
result.test_description = (String) inputMapData.get("test_description", null);
@@ -158,7 +139,7 @@ public Collator getCollatorForInput(CollatorInputJson input) {
158139

159140
if (input.locale == null) {
160141
if (input.rules == null) {
161-
result = (RuleBasedCollator) RuleBasedCollator.getInstance();
142+
result = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
162143
} else {
163144
try {
164145
result = new RuleBasedCollator(input.rules);
@@ -168,7 +149,7 @@ public Collator getCollatorForInput(CollatorInputJson input) {
168149
}
169150
} else {
170151
ULocale locale = ULocale.forLanguageTag(input.locale);
171-
result = (RuleBasedCollator) RuleBasedCollator.getInstance(locale);
152+
result = (RuleBasedCollator) Collator.getInstance(locale);
172153
if (input.rules != null) {
173154
String defaultRules = result.getRules();
174155
String newRules = defaultRules + input.rules;
@@ -180,6 +161,10 @@ public Collator getCollatorForInput(CollatorInputJson input) {
180161
}
181162
}
182163

164+
// ensure that ICU performs decomposition before collation in order to get proper results,
165+
// per documentation: https://unicode-org.github.io/icu-docs/apidoc/dev/icu4j/com/ibm/icu/text/Collator.html
166+
result.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
167+
183168
if (input.ignorePunctuation) {
184169
result.setAlternateHandlingShifted(true);
185170
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package org.unicode.conformance.collator;
2+
3+
import static org.junit.Assert.assertTrue;
4+
5+
import org.junit.Test;
6+
import org.unicode.conformance.testtype.collator.CollatorOutputJson;
7+
import org.unicode.conformance.testtype.collator.CollatorTester;
8+
9+
public class CollatorTest {
10+
11+
@Test
12+
public void testIgnorePunctuation() {
13+
String testInput =
14+
"{\"label\":\"0001424\",\"s1\":\" ?\",\"s2\":\" ?\",\"line\":59,\"ignorePunctuation\":true}";
15+
16+
CollatorOutputJson output =
17+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
18+
19+
assertTrue(output.result);
20+
}
21+
22+
@Test
23+
public void testNonEscaped() {
24+
String testInput =
25+
"\t{\"label\":\"0006747\",\"s1\":\"̴?\",\"s2\":\"̴̲\",\"line\":5382,\"ignorePunctuation\":true}";
26+
27+
CollatorOutputJson output =
28+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
29+
30+
assertTrue(output.result);
31+
}
32+
33+
}

run_config.json

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -247,21 +247,7 @@
247247
"icu_version": "icu73",
248248
"exec": "icu4j",
249249
"test_type": [
250-
"collation_short"
251-
],
252-
"per_execution": 10000
253-
}
254-
},
255-
{
256-
"prereq": {
257-
"name": "mvn-icu4j-73-shaded",
258-
"version": "73",
259-
"command": "mvn -f ../executors/icu4j/73/executor-icu4j/pom.xml package"
260-
},
261-
"run": {
262-
"icu_version": "icu73",
263-
"exec": "icu4j",
264-
"test_type": [
250+
"collation_short",
265251
"lang_names",
266252
"likely_subtags",
267253
"number_fmt"

0 commit comments

Comments
 (0)