Skip to content

Commit

Permalink
Parallelize test execution and schema checks using Python multiproces…
Browse files Browse the repository at this point in the history
…sing (#162)

* Adding ICU4C running collation tests - first try

* Cache ICU4C binaries in GH and locally, only if they don't exist

* Install JSON-C dependency if not installed at beginning of CI or e2e script

* Fix bash if condition syntax

* CPP Collation much better now

* Parallelize validations with schema

* Add parallel processing for test generation

---------

Co-authored-by: Elango Cheran <[email protected]>
  • Loading branch information
sven-oly and echeran authored Jan 19, 2024
1 parent acde9b9 commit e7c959e
Show file tree
Hide file tree
Showing 15 changed files with 544 additions and 143 deletions.
221 changes: 187 additions & 34 deletions executors/cpp/coll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <unicode/locid.h>
#include <unicode/utypes.h>
#include <unicode/coll.h>
#include <unicode/tblcoll.h>
#include <unicode/ucol.h>
#include <unicode/unistr.h>
#include <unicode/ustring.h>
Expand All @@ -36,12 +37,14 @@ using std::string;
using icu::Locale;
using icu::UnicodeString;
using icu::Collator;
using icu::RuleBasedCollator;

const string error_message = "error";

/**
* test_collator -- process JSON inputs, run comparator, return result
*/
const string test_collator(json_object *json_in) //
{
const string test_collator(json_object *json_in) {
UErrorCode status = U_ZERO_ERROR;

json_object *label_obj = json_object_object_get(json_in, "label");
Expand All @@ -52,27 +55,59 @@ const string test_collator(json_object *json_in) //

string string1 = json_object_get_string(str1);
string string2 = json_object_get_string(str2);
// cout << "s1 = " << string1 << " s2 = " << string2 << endl;

UnicodeString us1 = UnicodeString(string1.c_str()).unescape();
UnicodeString us2 = UnicodeString(string2.c_str()).unescape();

// Check unescaped versions.
char char_out1[1000] = "";
char char_out2[1000] = "";
int32_t chars_out = us1.extract(char_out1, 1000, nullptr, status);
chars_out = us2.extract(char_out2, 1000, nullptr, status);
// Does this conversion preserve the data?
UnicodeString us1 = UnicodeString::fromUTF8(string1); //(string1.c_str()).unescape();
UnicodeString us2 = UnicodeString::fromUTF8(string2); // .c_str()).unescape();

// cout << "us1 = " << char_out1 << " us2 = " << char_out2 << endl;
string test_result;

json_object *locale_obj = json_object_object_get(json_in, "locale");
const char *locale_string;
if (locale_obj) {
locale_string = json_object_get_string(locale_obj);
} else {
locale_string = nullptr;
locale_string = "und";
}

// Comparison type
json_object *compare_type_obj = json_object_object_get(json_in, "compare_type");
string compare_type_string = "";
if (compare_type_obj) {
compare_type_string = json_object_get_string(compare_type_obj);
}

// Strength of comparison
Collator::ECollationStrength strength_type = Collator::PRIMARY;
string strength_string = "";

json_object *strength_obj = json_object_object_get(json_in, "strength");
if (strength_obj) {
strength_string = json_object_get_string(strength_obj);
if (strength_string == "primary") {
strength_type = Collator::PRIMARY;
}
else if (strength_string == "secondary") {
strength_type = Collator:: SECONDARY;
}
else if (strength_string == "tertiary") {
strength_type = Collator::TERTIARY;
}
else if (strength_string == "quaternary") {
strength_type = Collator::QUATERNARY;
}
else if (strength_string == "IDENTICAL") {
strength_type = Collator::IDENTICAL;
}
}

// Check for rule-based collation
json_object *rules_obj = json_object_object_get(json_in, "rules");
string rules_string = "";
if (rules_obj) {
rules_string = json_object_get_string(rules_obj);
}
UnicodeString uni_rules = UnicodeString::fromUTF8(rules_string);

// Allow for different levels or types of comparison.
json_object *compare_type = json_object_object_get(json_in, "compare_type");
Expand All @@ -86,36 +121,154 @@ const string test_collator(json_object *json_in) //
const int32_t unspecified_length = -1;
bool coll_result = true;

// The json test output.
json_object *return_json = json_object_new_object();
json_object_object_add(return_json, "label", label_obj);

bool no_error = true;
int uni_result;
// Create a C++ collator and try it.
Collator *uni_coll = Collator::createInstance(Locale(locale_string), status);
if (ignore_obj) {
uni_coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);

Collator *uni_coll = nullptr;
RuleBasedCollator *rb_coll = nullptr;

if (rules_string != "") {
char uni_rules_out[1000] = "";
int32_t rule_chars_out = uni_rules.extract(uni_rules_out, 1000, nullptr, status);
cout << "# RULES string: " << rules_string << endl;
// !!! rb_coll = new RuleBasedCollator(uni_rules, strength_type, status);
rb_coll = new RuleBasedCollator(uni_rules, status);
if (U_FAILURE(status)) {
test_result = error_message.c_str();
// TODO: report the error in creating the instance
cout << "# Error in making RuleBasedCollator: " << label_string << " : " << test_result << endl;

json_object_object_add(return_json,
"error", json_object_new_string("creat rule based collator"));
no_error = false;
}

// cout << "# Calling rb_coll compare" << endl;
uni_result = rb_coll->compare(us1, us2, status);
if (U_FAILURE(status)) {
test_result = error_message.c_str();

json_object_object_add(return_json,
"error", json_object_new_string("error in rb_coll->compare"));
no_error = false;
cout << "# Error in rb_coll->compare: " << label_string << " : " << test_result << endl;
}
// Don't need this anymore.
delete rb_coll;
}
else {
// Not a rule-based collator.
if (locale_string == "") {
uni_coll = Collator::createInstance(status);
} else {
cout << "# Locale set to " << locale_string << endl;
uni_coll = Collator::createInstance(Locale(locale_string), status);
}

int uni_result = uni_coll->compare(us1, us2);
// cout << "UNI_RESULT = " << uni_result << endl;
if (U_FAILURE(status)) {
test_result = error_message.c_str();
json_object_object_add(return_json,
"error", json_object_new_string("error creating collator instance"));
no_error = false;
cout << "# Error in createInstance: " << label_string << " : " << test_result << endl;
}

// The json test output.
json_object *return_json = json_object_new_object();
json_object_object_add(return_json, "label", label_obj);
if (strength_obj) {
// !!!
cout << "# Collator strength = " << strength_string << endl;
uni_coll->setStrength(strength_type);
}

int64_t numeric_result = int64_t(uni_result);
if (uni_result == UCOL_GREATER) {
coll_result = false;
if (ignore_obj) {
const bool ignore_punctuation_bool = json_object_get_boolean(ignore_obj);
if (ignore_punctuation_bool) {
uni_coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
if (U_FAILURE(status)) {
test_result = error_message.c_str();
json_object_object_add(return_json,
"error", json_object_new_string("error setAttribute"));
no_error = false;
cout << "# Error in setAttribute: " << label_string << " : " << test_result << endl;
}
}
}

// Include data compared in the failing test
json_object_object_add(
return_json, "s1", json_object_new_string(string1.c_str()));
json_object_object_add(
return_json, "s2", json_object_new_string(string2.c_str()));
// Just to check the result.
UColAttributeValue alternate_value = uni_coll->getAttribute(UCOL_ALTERNATE_HANDLING, status);

// What was the actual returned value?
json_object_object_add(
return_json, "compare", json_object_new_int64(numeric_result));
// Try two differen APIs
int uni_result_utf8 = uni_coll->compareUTF8(string1, string2, status);
// This one seems to work better.
uni_result = uni_coll->compare(us1, us2, status);

if (uni_result != uni_result_utf8) {
cout << "# UNI_COLL COMPARE Unicode String " << uni_result << " ";
cout << "# UNI_COLL COMPARE UTF8 String " << uni_result_utf8 << endl;
cout << "# ******* results different in " << label_string << endl;
}

if (U_FAILURE(status)) {
json_object_object_add(return_json,
"error", json_object_new_string("error in uni_coll_compare"));
no_error = false;
cout << "## Error in uni_coll->compare: " << label_string << " : " << error_message.c_str() << endl;
}
if (uni_coll) {
UColAttributeValue alternate_value = uni_coll->getAttribute(UCOL_ALTERNATE_HANDLING, status);
}
delete uni_coll;
}

json_object_object_add(
return_json, "result", json_object_new_boolean(coll_result));
if (no_error) {
int64_t numeric_result = int64_t(uni_result);
if (uni_result == UCOL_GREATER) {
coll_result = false;

cout << "# UNI_RESULT: " << label_string << " " << uni_result <<
" s1: " << string1 << " s2: " << string2 << endl;

// Check unescaped versions.
char char_out1[1000] = "";
char char_out2[1000] = "";
int32_t chars_out = us1.extract(char_out1, 1000, nullptr, status);
if (U_FAILURE(status)) {
test_result = error_message.c_str();
json_object_object_add(return_json,
"error", json_object_new_string("error extracting us1"));
cout << "# Error in us1.extract: " << label_string << " : " << test_result << endl;
}

int32_t chars_out2 = us2.extract(char_out2, 1000, nullptr, status);
if (U_FAILURE(status)) {
test_result = error_message.c_str();
// TODO: report the error in creating the instance
test_result = error_message.c_str();
json_object_object_add(return_json,
"error", json_object_new_string("error extracting us2"));
cout << "# Error in us2.extract: " << label_string << " : " << test_result << endl;
}

// Include data compared in the failing test
json_object_object_add(
return_json, "s1", json_object_new_string(string1.c_str()));
json_object_object_add(
return_json, "s2", json_object_new_string(string2.c_str()));

// What was the actual returned value?
json_object_object_add(
return_json, "compare", json_object_new_int64(numeric_result));
} else {
coll_result = true;
}

json_object_object_add(
return_json, "result", json_object_new_boolean(coll_result));
}

return json_object_to_json_string(return_json);
}
2 changes: 1 addition & 1 deletion executors/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ int main(int argc, const char** argv)
// TODO: get from the array of supported tests
json_object *tests_supported = json_object_new_object();
json_object *test_array = json_object_new_array();
for (int index = 0; index < 5; index ++) {
for (int index = 0; index < 4; index ++) {
json_object_array_add(test_array,
json_object_new_string(supported_tests[index].c_str()));
}
Expand Down
2 changes: 1 addition & 1 deletion executors/dart_web/out/executor.js
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ rl.on('line', function (line) {

if ('error' in outputLine) {
// To get the attention of the driver
console.log("#!! ERROR in NODE call: " + JSON.stringify(outputLine));
console.log("#!! ERROR in DART_WEB: " + test_type + ": " + JSON.stringify(outputLine));
}

// Send result to stdout for verification
Expand Down
30 changes: 28 additions & 2 deletions executors/test_strings.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@
{"test_type": "collation_short", "label":"0001377","s1":"\\u0009?","s2":"\\u000a!","line":12,"ignorePunctuation":true}
{"test_type": "collation_short", "label":"00008","s1":"ä","s2":"ä","compare_type":"=","test_description":" simple CEs & expansions","rules":"&\\x01\n<<<\\u0300\n&9<\\x00\n&\\uA00A\\uA00B=\\uA002\n&\\uA00A\\uA00B\\u00050005=\\uA003"}

=======
{"test_type": "collation_short", "label": "0011232", "s1": "<a", "s2": "<A", "line": 9867, "ignorePunctuation": true}
{"test_type": "collation_short", "label": "0011232_Aa", "s1": "<A", "s2": "<a", "line": 9867, "ignorePunctuation": true}
{"test_type": "collation_short", "label": "0011232_aA", "s1": "a", "s2": "A", "line": 9867, "ignorePunctuation": true}

{"test_type": "lang_names", "label": "01", "language_label": "en", "locale_label": "af"}
{"test_type": "lang_names", "label": "01", "language_label": "", "locale_label": "fr"}
{"test_type": "lang_names", "label": "01", "language_label": "de", "locale_label": "fr"}
Expand Down Expand Up @@ -120,7 +123,6 @@

{"label":"0598","locale":"zh-TW","skeleton":"compact-short percent sign-accounting-except-zero","input":"91827.3645","options":{"notation":"compact","compactDisplay":"short","style":"unit","unit":"percent","signDisplay":"exceptZero","currencySign":"accounting"}, "test_type": "number_fmt"}
{"label":"0598","locale":"zh-TW","skeleton":"compact-short percent sign-accounting-except-zero","input":"9182734567890","options":{"notation":"compact","compactDisplay":"short","style":"unit","unit":"percent","signDisplay":"exceptZero","currencySign":"accounting"}, "test_type": "number_fmt"}
>>>>>>> 8eca805e8c4cd69552153c7137eeea111144d528


{"label":"0598","locale":"zh-TW","skeleton":"compact-short percent sign-accounting-except-zero","input":"91827.3645","options":{"notation":"compact","compactDisplay":"short","style":"unit","unit":"percent","signDisplay":"exceptZero","currencySign":"accounting"}, "test_type": "number_fmt"}
Expand All @@ -131,6 +133,8 @@
{"test_type": "number_fmt", "label":"2062_long","locale":"es-MX","skeleton":"currency/EUR unit-width-long .000","input":"91827.3645","options":{"style":"currency","currencyDisplay":"narrowSymbol","currency":"EUR","unitDisplay":"long","maximumFractionDigits":3,"minimumFractionDigits":3}}
{"test_type": "number_fmt", "label": "0219", "locale": "zh-TW", "skeleton": "scientific/+ee/sign-always percent precision-integer", "input": "0", "options": {"notation": "scientific", "style": "unit", "unit": "percent", "maximumFractionDigits": 0, "minimumFractionDigits": 0, "roundingType": "fractionDigits"}}

{"test_type": "number_fmt", "label": "0219", "locale": "zh-TW", "skeleton": "scientific/+ee/sign-always", "input": "1234.01", "options": {"notation": "scientific", "style": "unit", "unit": "percent", "maximumFractionDigits": 0, "minimumFractionDigits": 0, "roundingType": "fractionDigits"}}

{"test_type": "number_fmt","label": "0054", "locale": "es-MX", "skeleton": "scientific/+ee/sign-always percent unit-width-narrow", "input": "0", "options": {"notation": "scientific", "conformanceExponent": "+ee", "conformanceSign": "always", "style": "unit", "unit": "percent", "unitDisplay": "narrow", "currencyDisplay": "narrowSymbol", "maximumFractionDigits": 6}}

// Check significant digits
Expand All @@ -154,9 +158,31 @@
{"test_type": "number_fmt", "label":"0648-no-max-fraction","locale":"es-MX","skeleton":"compact-short percent decimal-always","input":"0","options":{"notation":"compact","compactDisplay":"short","style":"unit","unit":"percent","conformanceDecimalAlways":true}}
{"test_type": "number_fmt", "label":"0649","locale":"es-MX","skeleton":"compact-short percent decimal-always","input":"91827.3645","options":{"notation":"compact","compactDisplay":"short","style":"unit","unit":"percent","conformanceDecimalAlways":true,"maximumFractionDigits":2}}
{"test_type": "number_fmt", "label":"0649","locale":"es-MX","skeleton":"compact-short percent decimal-always","input":"91827.3645","options":{"notation":"compact","compactDisplay":"short","style":"unit","unit":"percent","conformanceDecimalAlways":true}}

{"test_type": "number_fmt", "label":"5913","op":"format", "locale": "en", "skeleton":".00","input":"123456789","options":{"roundingMode":"halfEven","minimumIntegerDigits":1,"maximumFractionDigits":0,"useGrouping":false}}
{"test_type": "number_fmt", "label":"5913","op":"format", "locale":"es-MX", "skeleton":"percent .##/@@@+","input":"123456789.9876543210","options":{"roundingMode":"halfEven","minimumIntegerDigits":1,"maximumFractionDigits":0,"useGrouping":false}}

{"test_type": "number_fmt", "label":"5913","op":"format", "locale": "en", "skeleton":".0","input":"123456789","options":{"roundingMode":"halfEven","minimumIntegerDigits":1,"maximumFractionDigits":0,"useGrouping":false}}
{"test_type": "number_fmt", "label":"5913","op":"format", "locale":"es-MX", "skeleton":"percent .##/@@@+","input":"123456789.9876543210","options":{"roundingMode":"halfEven","minimumIntegerDigits":1,"maximumFractionDigits":0,"useGrouping":false}}

// Skeletons
{"test_type": "number_fmt", "label":"s1", "pattern": "0.0000E0", "skeleton": "scientific/+e .0000/@+", "input": "1234.01", "options":{}}}
{"test_type": "number_fmt", "label":"s2", "locale": "en", "skeleton": "integer-width/##00 ./@+", "input":"1.91", "options":{}}}
{"test_type": "number_fmt", "label":"s", "pattern":"#.#", "skeleton": ".#/@@+", "input":"12.3456", "options":{}}
{"test_type": "number_fmt", "label":"s", "pattern":"@@@", "skeleton": "@@@", "input":"12.3456", "options":{}}
{"test_type": "number_fmt", "label":"s", "pattern":"@@@", "skeleton": "@@@ group-off", "input":"123456", "options":{}}
{"test_type": "number_fmt", "label":"s", "pattern":"@@@", "skeleton": "@@@ group-off", "input":"0.00123456", "options":{}}

{"test_type": "number_fmt", "label":"s", "pattern":"@@###", "skeleton": "@@### group-off", "input":"123456000", "options":{}}
{"test_type": "number_fmt", "label":"s", "pattern":"@@###", "skeleton": "@@### group-off", "input":".00123456000", "options":{}}
{"test_type": "number_fmt", "label":"s", "pattern":"@@###", "skeleton": "@@### group-off", "input":".00123", "options":{}}

{"test_type": "number_fmt", "label":"s", "pattern":"@@@@E0", "skeleton": "scientific/+e .0000/@@+", "input":".00123", "options":{}}
{"test_type": "number_fmt", "label":"s", "pattern":"0.0##E0", "skeleton": "scientific/+e .##/@@+ integer-width/*000", "input":".00123e110", "options":{}}

{"test_type": "number_fmt", "label":"LONG", "pattern":"#", "skeleton": "@+ group-off", "input":"10000000000000000000000000000000000000000000000000001", "options":{}}
10000000000000000000000000000000000000000000000000001
10000000000000000000000000000000000000000000000000000

# LOCALE_INFO
{"test_type":"likely_subtags", "option":"maximize", "locale":"en", "label":"en_max"}
Expand Down
1 change: 0 additions & 1 deletion genData100.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ then
bash setup.sh
fi

export TEMP_DIR=TEMP_DATA
export TEST_LIMIT=100

export TEMP_DIR=TEMP_DATA_100
Expand Down
2 changes: 2 additions & 0 deletions schema/check_generated_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import logging
import logging.config
import multiprocessing as mp
import os.path
import sys

Expand Down Expand Up @@ -45,6 +46,7 @@ def main(args):
logging.debug('test types = %s', ALL_TEST_TYPES)

validator = schema_validator.ConformanceSchemaValidator()

# Todo: use setters to initialize validator
validator.schema_base = '.'
validator.test_data_base = test_data_path
Expand Down
Loading

0 comments on commit e7c959e

Please sign in to comment.