Issue #173 Fixing lots of issues with schemas (#316)

* Issue #173 Fixing lots of issues with schemas * DDT Schema processing. Much better now * Updating info printed out. Adding some ICU67 data, too * Fix Dart syntax problem * Remove dart_web update * Removing ICU67 data * Adding back run_config.json * Resetting run_config.json * Add schema for message_fmt2 test output * Fix typo in Verify * Fix duplicate line for rdt * Set schema failures to exit hard. Clean up code warnings * Fix details on schema validation * Fix failure in catching non-fatal exception * Update schema/schema_validator.py Co-authored-by: Elango Cheran <[email protected]> * Update schema/schema_validator.py Co-authored-by: Elango Cheran <[email protected]> * Actually stops the processing on bad schema files --------- Co-authored-by: Elango Cheran <[email protected]>
unicode-org · Oct 7, 2024 · ee6d46e · ee6d46e
1 parent bc0fdb9
commit ee6d46e
Show file tree

Hide file tree

Showing 39 changed files with 897 additions and 264 deletions.
diff --git a/executors/rust/1.3/src/relativedatetime_fmt.rs b/executors/rust/1.3/src/relativedatetime_fmt.rs
@@ -169,7 +169,7 @@ pub fn run_relativedatetimeformat_test(json_obj: &Value) -> Result<Value, String
             return Ok(json!({
                 "error": "Number system not supported",
                 "error_msg": numbering_system,
-                "error_detail": format!("{data_locale:?}"),
+                "error_detail": {"locale": format!("{data_locale:?}")},
                 "label": label,
                 "unsupported": "non-Latn numbering system",
             }));

diff --git a/genVerifyAll.sh b/genVerifyAll.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Part of the generateDataAndRun.sh file that 
+# runs the verifier.
+set -e
+
+# Rotate log files
+logrotate -s logrotate.state logrotate.conf
+
+##########
+# Setup (generate) test data & expected values
+##########
+
+# Ensure that ICU4C binaries have been downloaded locally
+if [[ ! -d gh-cache ]]
+then
+  bash setup.sh
+fi
+
+# Generates all new test data
+source_file=${1:-'run_config.json'}
+
+# ...
+
+# Verify everything
+mkdir -p $TEMP_DIR/testReports
+pushd verifier
+
+all_test_types=$(jq '.[].run.test_type' ../$source_file | jq -s '.' | jq 'add' | jq 'unique' | jq -r 'join(" ")')
+all_execs=$(jq -r 'join(" ")' <<< $all_execs_json)
+python3 verifier.py --file_base ../$TEMP_DIR --exec $all_execs --test_type $all_test_types
+
+popd
+
diff --git a/run_config.json b/run_config.json
@@ -298,10 +298,10 @@
       "test_type": [
         "collation_short",
         "datetime_fmt",
+        "number_fmt",
         "lang_names",
-        "list_fmt",
         "likely_subtags",
-        "number_fmt",
+        "list_fmt",
         "plural_rules",
         "rdt_fmt"
       ],

diff --git a/schema/check_generated_data.py b/schema/check_generated_data.py
@@ -1,22 +1,20 @@
 # Checks test data generated against schema in Conformance Testing
 # For ICU Conformance project, Data Driven Testing
-import argparse
+
 from datetime import datetime
 import glob
 import json
 
 
 import logging
 import logging.config
-import multiprocessing as mp
 import os.path
 import sys
 
 import schema_validator
-import schema_files
-from schema_files import SCHEMA_FILE_MAP
 from schema_files import ALL_TEST_TYPES
 
+
 def main(args):
     logging.config.fileConfig("../logging.conf")
 
@@ -34,13 +32,12 @@ def main(args):
 
     # TODO: get ICU versions
     icu_versions = []
-    test_type_set = set()
     if os.path.exists(test_data_path):
         check_path = os.path.join(test_data_path, 'icu*')
         icu_dirs = glob.glob(check_path)
         logging.debug('ICU DIRECTORIES = %s', icu_dirs)
-        for dir in icu_dirs:
-            icu_versions.append(os.path.basename(dir))
+        for dir_name in icu_dirs:
+            icu_versions.append(os.path.basename(dir_name))
 
     logging.debug('ICU directories = %s', icu_versions)
     logging.debug('test types = %s', ALL_TEST_TYPES)
@@ -53,19 +50,15 @@ def main(args):
     validator.icu_versions = sorted(icu_versions)
     validator.test_types = ALL_TEST_TYPES
     validator.debug = 1
-    schema_base = '.'
-    schema_data_results = []
-    schema_count = 0
 
     all_results = validator.validate_test_data_with_schema()
     logging.info('  %d results for generated test data', len(all_results))
 
-    schema_errors = 0
+    schema_errors = []
     failed_validations = []
     passed_validations = []
     schema_count = len(all_results)
     for result in all_results:
-        #print(result)
         if result['result']:
             passed_validations.append(result)
         else:
@@ -85,26 +78,29 @@ def main(args):
     try:
         summary_data = json.dumps(summary_json)
     except BaseException as error:
-        logging.error('json.dumps Summary data problem: %s, ')
+        logging.error('json.dumps Summary data problem: %s at %s', error, error)
+        exit(1)
 
+    output_filename = os.path.join(test_data_path, 'test_data_validation_summary.json')
     try:
-        output_filename = os.path.join(test_data_path, 'test_data_validation_summary.json')
         file_out = open(output_filename, mode='w', encoding='utf-8')
         file_out.write(summary_data)
         file_out.close()
     except BaseException as error:
-        logging.warning('Error: %s. Cannot save validation summary in file %s', err, output_filename)
-
+        schema_errors.append(output_filename)
+        logging.fatal('Error: %s. Cannot save validation summary in file %s', error, output_filename)
+        exit(1)
 
     if schema_errors:
         logging.critical('Test data file files: %d fail out of %d:',
-            len(schema_errors, schema_count))
+                         len(schema_errors), schema_count)
         for failure in schema_errors:
             logging.critical('  %s', failure)
         exit(1)
     else:
         logging.info("All %d generated test data files match with schema", schema_count)
         exit(0)
 
+
 if __name__ == "__main__":
     main(sys.argv)
diff --git a/schema/check_schemas.py b/schema/check_schemas.py
@@ -1,20 +1,22 @@
 # Schema checker for the schemas in Conformance Testing
 # For ICU Conformance project, Data Driven Testing
-import argparse
+
 from datetime import datetime
 import glob
 import json
 
 import logging
 import logging.config
-import multiprocessing as mp
+from multiprocessing.dummy import Pool
+import multiprocessing
 import os.path
 import sys
 
 import schema_validator
 from schema_files import ALL_TEST_TYPES
 
-class ValidateSchema():
+
+class ValidateSchema:
     def __init__(self, schema_base='.'):
         self.schema_base = schema_base
         logging.config.fileConfig("../logging.conf")
@@ -35,7 +37,6 @@ def save_schema_validation_summary(self, validation_status):
             'description': 'Results of checking schema files for correct syntax',
             'when_processed': datetime.now().strftime('%Y-%m-%d T%H%M%S.%f'),
             'schema_validation_base': self.schema_base,
-            'when_processed': datetime.now().strftime('%Y-%m-%d T%H%M%S.%f'),
             'validations': {
                 'failed': failed_validations,
                 'passed': passed_validations
@@ -48,26 +49,31 @@ def save_schema_validation_summary(self, validation_status):
             logging.error('%s: Cannot create JSON summary: %s', err, summary_json)
             return None
 
+        output_filename = os.path.join(self.schema_base, 'schema_validation_summary.json')
         try:
-            output_filename = os.path.join(self.schema_base, 'schema_validation_summary.json')
             file_out = open(output_filename, mode='w', encoding='utf-8')
             file_out.write(summary_data)
             file_out.close()
         except BaseException as error:
-            logging.warning('Error: %s. Cannot save validation summary in file %s', err, output_filename)
+            logging.warning('Error: %s. Cannot save validation summary in file %s', error, output_filename)
             return None
 
         return output_filename
 
+
 def parallel_validate_schema(validator, file_names):
-        num_processors = mp.cpu_count()
-        logging.info('Schema valiation: %s processors for %s plans' , num_processors, len(file_names))
+    num_processors = multiprocessing.cpu_count()
+    logging.info('Schema validation: %s processors for %s schema validations', num_processors, len(file_names))
+
+    processor_pool = multiprocessing.Pool(num_processors)
+    # How to get all the results
+    result = None
+    try:
+        result = processor_pool.map(validator.validate_schema_file, file_names)
+    except multiprocessing.pool.MaybeEncodingError as error:
+        pass
+    return result
 
-        processor_pool = mp.Pool(num_processors)
-        # How to get all the results
-        with processor_pool as p:
-            result = p.map(validator.validate_schema_file, file_names)
-        return result
 
 def main(args):
     logger = logging.Logger("TEST SCHEMAS LOGGER")
@@ -95,10 +101,14 @@ def main(args):
         schema_test_json_files = os.path.join(schema_test_base, '*.json')
         schema_file_names = glob.glob(schema_test_json_files)
         schema_file_paths.extend(schema_file_names)
+
     results = parallel_validate_schema(validator, schema_file_paths)
+    if not results:
+        # This should stop the whole thing!
+        exit(1)
 
     for outcome in results:
-        result, err, file_path = outcome[0], outcome[1], outcome[2]
+        result, err, file_path, test_type = outcome[0], outcome[1], outcome[2], outcome[3]
         schema_file = os.path.basename(file_path)
         validation_status.append({"test_type": test_type,
                                   "schema_path": file_path,
@@ -110,19 +120,19 @@ def main(args):
             logging.error('Bad Schema at %s', schema_file)
         schema_count += 1
 
-    ok = val_schema.save_schema_validation_summary(validation_status)
+    output_filename = val_schema.save_schema_validation_summary(validation_status)
 
     if schema_errors:
         logging.error('SCHEMA: %d fail out of %d:', 
-            len(schema_errors), schema_count)
+                      len(schema_errors), schema_count)
         for failure in schema_errors:
             logging.error('  %s', failure)
         exit(1)
     else:
-        logging.info("All %d schema are valid", schema_count)
+        logging.info("All %d schema are valid in file %s", schema_count, output_filename)
         exit(0)
 
 
-    # Add validation results to test data with validation.
+# Add validation results to test data with validation.
 if __name__ == "__main__":
     main(sys.argv)
diff --git a/schema/check_test_output.py b/schema/check_test_output.py
@@ -1,22 +1,20 @@
 # Run schema validation on all test outputs for all tests.
 
 # For ICU Conformance project, Data Driven Testing
-import argparse
 from datetime import datetime
 import glob
 import json
 
-
 import logging
 import logging.config
 import os.path
 import sys
 
 import schema_validator
 import schema_files
-from schema_files import SCHEMA_FILE_MAP
 from schema_files import ALL_TEST_TYPES
 
+
 def main(args):
     logging.config.fileConfig("../logging.conf")
 
@@ -36,6 +34,7 @@ def main(args):
     executor_set = set()
     icu_version_set = set()
     test_type_set = set()
+    json_files = []
     if os.path.exists(test_output_path):
         executor_path = os.path.join(test_output_path, '*')
         executor_paths = glob.glob(executor_path)
@@ -50,15 +49,15 @@ def main(args):
         json_files = glob.glob(test_output_json_path)
 
         for file in json_files:
+            test_file_prefix = os.path.splitext(os.path.basename(file))[0]
             try:
-                test_file_prefix = os.path.splitext(os.path.basename(file))[0]
                 test_type = schema_files.TEST_FILE_TO_TEST_TYPE_MAP[test_file_prefix]
                 test_type_set.add(test_type)
             except BaseException as err:
                 logging.debug('No file (%s) during schema check output: %s', file, err
                               )
-        for dir in icu_dirs:
-            icu_version_set.add(os.path.basename(dir))
+        for dir_nane in icu_dirs:
+            icu_version_set.add(os.path.basename(dir_nane))
 
     icu_versions = sorted(list(icu_version_set))
     logging.debug('ICU directories = %s', icu_versions)
@@ -73,14 +72,21 @@ def main(args):
     validator.test_types = list(test_type_set)
     validator.executors = list(executor_set)
     validator.debug = 1
-    schema_base = '.'
-    schema_data_results = []
-    schema_count = 0
 
-    all_results = validator.validate_test_output_with_schema()
+    all_results, test_validation_plans = validator.validate_test_output_with_schema()
     logging.info('  %d results for test output', len(all_results))
 
-    schema_errors = 0
+    # Check if any files in the expected list were not validated.
+    test_paths = set()
+    for plan in test_validation_plans:
+        test_paths.add(plan['test_result_file'])
+
+    for json_file in json_files:
+        if json_file not in test_paths:
+            logging.fatal('JSON file %s was not verified against a schema', json_file)
+            # Bail out right away!
+            exit(1)
+
     failed_validations = []
     passed_validations = []
     schema_count = len(all_results)
@@ -103,32 +109,29 @@ def main(args):
             }
         }
     except BaseException as error:
-        summary_json = {}
+        logging.fatal('Cannot create summary_json %s', error)
+        exit(1)
 
     # Create outputs from these results.
     try:
         summary_data = json.dumps(summary_json)
-    except TypeError as err :
-        logging.error('Error: %s\n  Cannot dump JSON for %s: ',
-                      err, summary_json)
+    except TypeError as err:
+        logging.fatal('Error: %s\n  Cannot dump JSON for %s', err, summary_json)
+        exit(1)
+
+    output_filename = os.path.join(test_output_path, 'test_output_validation_summary.json')
     try:
-        output_filename = os.path.join(test_output_path, 'test_output_validation_summary.json')
         file_out = open(output_filename, mode='w', encoding='utf-8')
         file_out.write(summary_data)
         file_out.close()
     except BaseException as error:
-        logging.warning('Error: %s. Cannot save validation summary in file %s', error, output_filename)
+        logging.fatal('Error: %s. Cannot save validation summary in file %s', error, output_filename)
+        # Don't continue after this problem.
+        exit(1)
 
+    logging.info("All %d test output files match with schema", schema_count)
+    return
 
-    if schema_errors:
-        logging.error('Test data file files: %d fail out of %d:', 
-            len(schema_errors, schema_count))
-        for failure in schema_errors:
-            logging.error('  %s', failure)
-        exit(1)
-    else:
-        logging.info("All %d test output files match with schema", schema_count)
-        exit(0)
 
 if __name__ == "__main__":
     main(sys.argv)