Skip to content

Commit

Permalink
Reduce errors in ICU4J Collator executor results (#185)
Browse files Browse the repository at this point in the history
* Fix parsing for Collator for ICU4J executor

* Refactor ICU4J executor run configs

* Add tests for the ICU4J executor for collator

* Update ICU4J executor code for Collator to directly call the superclass method

* Ensure that ICU4J runs Collator with normalization turned on

* Remove unit test based on upstream test data gen bug re: double escaping
  • Loading branch information
echeran authored Mar 4, 2024
1 parent 7e91327 commit 96a46e5
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,8 @@ public ITestTypeInputJson inputMapToJson(Map<String, Object> inputMapData) {

result.locale = (String) inputMapData.get("locale", null);

boolean ignorePunctuation = false;
Optional<Object> ignorePunctuationStr = inputMapData.get("ignorePunctuation");
try {
if (ignorePunctuationStr.isPresent()) {
ignorePunctuation = Boolean.parseBoolean((String) ignorePunctuationStr.get());
}
} catch (Exception e) {
// do nothing, default is false
}
result.ignorePunctuation = ignorePunctuation;

int line = 0;
Optional<Object> lineStr = inputMapData.get("line");
try {
if (lineStr.isPresent()) {
line = Integer.parseInt((String) lineStr.get());
}
} catch (Exception e) {
// do nothing, default is 0
}
result.line = line;
result.ignorePunctuation = (boolean) inputMapData.get("ignorePunctuation", false);
result.line = (int) ((double) inputMapData.get("line", 0.0));

result.compare_type = (String) inputMapData.get("compare_type", null);
result.test_description = (String) inputMapData.get("test_description", null);
Expand Down Expand Up @@ -158,7 +139,7 @@ public Collator getCollatorForInput(CollatorInputJson input) {

if (input.locale == null) {
if (input.rules == null) {
result = (RuleBasedCollator) RuleBasedCollator.getInstance();
result = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
} else {
try {
result = new RuleBasedCollator(input.rules);
Expand All @@ -168,7 +149,7 @@ public Collator getCollatorForInput(CollatorInputJson input) {
}
} else {
ULocale locale = ULocale.forLanguageTag(input.locale);
result = (RuleBasedCollator) RuleBasedCollator.getInstance(locale);
result = (RuleBasedCollator) Collator.getInstance(locale);
if (input.rules != null) {
String defaultRules = result.getRules();
String newRules = defaultRules + input.rules;
Expand All @@ -180,6 +161,10 @@ public Collator getCollatorForInput(CollatorInputJson input) {
}
}

// ensure that ICU performs decomposition before collation in order to get proper results,
// per documentation: https://unicode-org.github.io/icu-docs/apidoc/dev/icu4j/com/ibm/icu/text/Collator.html
result.setDecomposition(Collator.CANONICAL_DECOMPOSITION);

if (input.ignorePunctuation) {
result.setAlternateHandlingShifted(true);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.unicode.conformance.collator;

import static org.junit.Assert.assertTrue;

import org.junit.Test;
import org.unicode.conformance.testtype.collator.CollatorOutputJson;
import org.unicode.conformance.testtype.collator.CollatorTester;

public class CollatorTest {

@Test
public void testIgnorePunctuation() {
String testInput =
"{\"label\":\"0001424\",\"s1\":\" ?\",\"s2\":\" ?\",\"line\":59,\"ignorePunctuation\":true}";

CollatorOutputJson output =
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);

assertTrue(output.result);
}

@Test
public void testNonEscaped() {
String testInput =
"\t{\"label\":\"0006747\",\"s1\":\"̴?\",\"s2\":\"̴̲\",\"line\":5382,\"ignorePunctuation\":true}";

CollatorOutputJson output =
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);

assertTrue(output.result);
}

}
16 changes: 1 addition & 15 deletions run_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -247,21 +247,7 @@
"icu_version": "icu73",
"exec": "icu4j",
"test_type": [
"collation_short"
],
"per_execution": 10000
}
},
{
"prereq": {
"name": "mvn-icu4j-73-shaded",
"version": "73",
"command": "mvn -f ../executors/icu4j/73/executor-icu4j/pom.xml package"
},
"run": {
"icu_version": "icu73",
"exec": "icu4j",
"test_type": [
"collation_short",
"lang_names",
"likely_subtags",
"number_fmt"
Expand Down

0 comments on commit 96a46e5

Please sign in to comment.