diff --git a/.gitignore b/.gitignore
index 4c5688f3..76db0be8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,8 @@ executors/rust/target/
 executors/cpp/executor
 executors/dart_native/bin/executor/
 
+gh-cache/
+
 schema/schema_validation_summary.json
 
 **/__pycache__/
diff --git a/setup.sh b/setup.sh
index da0760c8..ff427318 100755
--- a/setup.sh
+++ b/setup.sh
@@ -7,6 +7,7 @@ set -e
 dpkg --list | grep libjson-c-dev || error_code=$?
 if [[ $error_code -ne 0 ]]
 then
+    sudo apt-get update
     sudo apt-get install libjson-c-dev
 fi
 
@@ -73,11 +74,11 @@ function download_74_2() {
 
 
  pushd gh-cache
-    
+
  download_71_1
  download_72_1
  download_73_1
  download_74_1
  download_74_2
- 
+
  popd
diff --git a/testgen/generators/__init__.py b/testgen/generators/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/testgen/generators/base.py b/testgen/generators/base.py
new file mode 100644
index 00000000..132bb9c9
--- /dev/null
+++ b/testgen/generators/base.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+from abc import ABC, abstractmethod
+import json
+import logging
+import logging.config
+import math
+import os
+import requests
+
+
+class DataGenerator(ABC):
+    def __init__(self, icu_version, run_limit=None):
+        self.icu_version = icu_version
+        # If set, this is the maximum number of tests generated for each.
+        self.run_limit = run_limit
+
+        logging.config.fileConfig("../logging.conf")
+
+    @abstractmethod
+    def process_test_data(self):
+        pass
+
+    def saveJsonFile(self, filename, data, indent=None):
+        output_path = os.path.join(self.icu_version, filename)
+        output_file = open(output_path, "w", encoding="UTF-8")
+        json.dump(data, output_file, indent=indent)
+        output_file.close()
+
+    def getTestDataFromGitHub(self, datafile_name, version):
+        # Path for fetching test data from ICU repository
+        latest = 'https://raw.githubusercontent.com/unicode-org/icu/main/icu4c/source/test/testdata/"'
+        pattern0 = "https://raw.githubusercontent.com/unicode-org/icu/"
+
+        if version == "LATEST":
+            ver_string = "main"
+        else:
+            ver_string = "maint/maint-%s" % version
+
+        pattern1 = "/icu4c/source/test/testdata/"
+        url = pattern0 + ver_string + pattern1 + datafile_name
+        try:
+            r = requests.get(url)
+            if r.status_code != 200:
+                logging.warning(
+                    "Cannot load version %s of file %s", version, datafile_name
+                )
+                return None
+            return r.text
+        except BaseException as err:
+            logging.warning(
+                "Warning: cannot load data %s for version %s. Error = %s",
+                datafile_name,
+                version,
+                err,
+            )
+            return None
+
+    def sample_tests(self, all_tests):
+        if self.run_limit < 0 or len(all_tests) <= self.run_limit:
+            return all_tests
+        else:
+            # Sample to get about run_limit items
+            increment = len(all_tests) // self.run_limit
+            samples = []
+            for index in range(0, len(all_tests), increment):
+                samples.append(all_tests[index])
+            return samples
+
+    def readFile(self, filename, version=""):
+        # If version is provided, it refers to a subdirectory containing the test source
+        path = filename
+        if version:
+            path = os.path.join(version, filename)
+        try:
+            with open(path, "r", encoding="utf-8") as testdata:
+                return testdata.read()
+        except BaseException as err:
+            logging.warning("** READ: Error = %s", err)
+            return None
+
+    def computeMaxDigitsForCount(self, count):
+        return math.ceil(math.log10(count + 1))
diff --git a/testgen/generators/collation_short.py b/testgen/generators/collation_short.py
new file mode 100644
index 00000000..24881130
--- /dev/null
+++ b/testgen/generators/collation_short.py
@@ -0,0 +1,413 @@
+# -*- coding: utf-8 -*-
+import re
+import logging
+from generators.base import DataGenerator
+
+reblankline = re.compile("^\s*$")
+
+
+class CollationShortGenerator(DataGenerator):
+    def process_test_data(self):
+        # Get each kind of collation tests and create a unified data set
+        json_test = {"test_type": "collation_short", "tests": [], "data_errors": []}
+        json_verify = {"test_type": "collation_short", "verifications": []}
+        self.insert_collation_header([json_test, json_verify])
+
+        data_error_list = []
+
+        start_count = 0
+
+        # Data from more complex tests in github's unicode-org/icu repository
+        # icu4c/source/test/testdata/collationtest.txt
+        test_complex, verify_complex, encode_errors = self.generateCollTestData2(
+            "collationtest.txt", self.icu_version, start_count=len(json_test["tests"])
+        )
+
+        if verify_complex:
+            json_verify["verifications"].extend(verify_complex)
+
+        if test_complex:
+            json_test["tests"].extend(test_complex)
+
+        data_error_list.extend(encode_errors)
+
+        # Collation ignoring punctuation
+        test_ignorable, verify_ignorable, data_errors = (
+            self.generateCollTestDataObjects(
+                "CollationTest_SHIFTED_SHORT.txt",
+                self.icu_version,
+                ignorePunctuation=True,
+                start_count=len(json_test["tests"]),
+            )
+        )
+
+        json_test["tests"].extend(test_ignorable)
+        json_verify["verifications"].extend(verify_ignorable)
+        data_error_list.extend(data_errors)
+
+        # Collation considering punctuation
+        test_nonignorable, verify_nonignorable, data_errors = (
+            self.generateCollTestDataObjects(
+                "CollationTest_NON_IGNORABLE_SHORT.txt",
+                self.icu_version,
+                ignorePunctuation=False,
+                start_count=len(json_test["tests"]),
+            )
+        )
+
+        # Resample as needed
+        json_test["tests"].extend(test_nonignorable)
+        json_test["tests"] = self.sample_tests(json_test["tests"])
+        data_error_list.extend(data_errors)
+
+        # Store data errors with the tests
+        json_test["data_errors"] = data_error_list
+
+        json_verify["verifications"].extend(verify_nonignorable)
+        json_verify["verifications"] = self.sample_tests(json_verify["verifications"])
+        # TODO: Store data errors with the tests
+
+        # And write the files
+        self.saveJsonFile("collation_test.json", json_test)
+        self.saveJsonFile("collation_verify.json", json_verify)
+
+    def insert_collation_header(self, test_objs):
+        for obj in test_objs:
+            obj["Test scenario"] = "collation_short"
+            obj["description"] = (
+                "UCA conformance test. Compare the first data string with the second and with strength = identical level (using S3.10). If the second string is greater than the first string, then stop with an error."
+            )
+
+    def generateCollTestData2(self, filename, icu_version, start_count=0):
+        # Read raw data from complex test file, e.g., collation_test.txt
+        label_num = start_count
+
+        test_list = []
+        verify_list = []
+        encode_errors = []
+
+        rawcolltestdata = self.readFile(filename, icu_version)
+        if not rawcolltestdata:
+            return test_list, verify_list, encode_errors
+
+        raw_testdata_list = rawcolltestdata.splitlines()
+        max_digits = 1 + self.computeMaxDigitsForCount(
+            len(raw_testdata_list)
+        )  # Approximate
+        recommentline = re.compile("^[\ufeff\s]*#(.*)")
+
+        root_locale = re.compile("@ root")
+        locale_string = re.compile("@ locale (\S+)")
+        test_line = re.compile("^\*\* test:(.*)")
+        rule_header_pattern = re.compile("^@ rules")
+        rule_pattern = re.compile("^&.*")
+        strength_pattern = re.compile("% strength=(\S)")
+        compare_pattern = re.compile("^\* compare(.*)")
+
+        comparison_pattern = re.compile(
+            "(\S+)\s+(\S+)\s*(\#?.*)"
+        )  # compare operator followed by string
+
+        attribute_test = re.compile("^\% (\S+)\s*=\s*(\S+)")
+        rules = ""
+        strength = None
+
+        # Ignore comment lines
+        string1 = ""
+        string2 = ""
+        attributes = []
+        test_description = ""
+
+        # Get @ root or @ locale ...
+        # Check for "@ rules"
+        # Handle % options, e.g., strengt=h, reorder=, backwards=, caseFirst=,
+        #  ...
+        # Find "* compare" section and create list of tests for this,
+        # starting comparison with empty string ''.
+        # Handle compre options =, <, <1, <2, <3, <4
+
+        locale = ""
+        line_number = 0
+        num_lines = len(raw_testdata_list)
+        while line_number < num_lines:
+            line_in = raw_testdata_list[line_number]
+            line_number += 1
+
+            is_comment = recommentline.match(line_in)
+            if line_in[0:1] == "#" or is_comment or reblankline.match(line_in):
+                continue
+
+            if root_locale.match(line_in):
+                # Reset the parameters for collation
+                locale = "und"
+                rules = []
+                locale = ""
+                attributes = []
+                strength = None
+                continue
+
+            locale_match = locale_string.match(line_in)
+            if locale_match:
+                # Reset the parameters for collation
+                locale = locale_match.group(1)
+                rules = []
+                locale = ""
+                attributes = []
+                strength = None
+                continue
+
+            # Find "** test" section
+            is_test = test_line.match(line_in)
+            if is_test:
+                test_description = is_test.group(1)
+                continue
+
+            # Handle rules, to be applied in subsequent tests
+            is_rules = rule_header_pattern.match(line_in)
+            if is_rules:
+                # Read rule lines until  a "*" line is found
+                rules = []
+                locale = "und"
+                rules = []
+                locale = ""
+                attributes = []
+                strength = None
+
+                # Skip comment and empty lines
+                while line_number < num_lines:
+                    if line_number >= num_lines:
+                        break
+                    line_in = raw_testdata_list[line_number]
+                    if len(line_in) == 0 or line_in[0] == "#":
+                        line_number += 1
+                        continue
+                    if line_in[0] == "*":
+                        break
+                    # Remove any comments in the line preceded by '#'
+                    comment_start = line_in.find("#")
+                    if comment_start >= 0:
+                        line_in = line_in[0:comment_start]
+                    rules.append(line_in.strip())
+                    line_number += 1
+                continue
+
+            is_strength = strength_pattern.match(line_in)
+            if is_strength:
+                strength = is_strength.group(1)
+
+            is_compare = compare_pattern.match(line_in)
+            compare_type = None
+            if is_compare:
+                # Initialize string1 to the empty string.
+                string1 = ""
+                compare_mode = True
+                info = is_compare.group(1)
+                while line_number < num_lines:
+                    line_number += 1
+                    if line_number >= num_lines:
+                        break
+                    line_in = raw_testdata_list[line_number]
+
+                    if len(line_in) == 0 or line_in[0] == "#":
+                        continue
+                    if line_in[0] == "*":
+                        break
+
+                    is_comparison = comparison_pattern.match(line_in)
+                    # Handle compare options =, <, <1, <2, <3, <4
+                    if is_comparison:
+                        compare_type = is_comparison.group(1)
+                        compare_string = is_comparison.group(2)
+                        # Note that this doesn't seem to handle \x encoding, however.
+                        compare_comment = is_comparison.group(3)
+                        # Generate the test case
+                        try:
+                            string2 = compare_string.encode().decode("unicode_escape")
+                        except (BaseException, UnicodeEncodeError) as err:
+                            logging.error(
+                                "%s: line: %d. PROBLEM ENCODING", err, line_number
+                            )
+                            continue
+
+                        compare_comment = is_comparison.group(3)
+
+                    label = str(label_num).rjust(max_digits, "0")
+                    label_num += 1
+
+                    # # If either string has unpaired surrogates, ignore the case and record it.
+                    if not self.check_unpaired_surrogate_in_string(
+                        string1
+                    ) and not self.check_unpaired_surrogate_in_string(string2):
+                        test_case = {
+                            "label": label,
+                            "s1": string1,
+                            "s2": string2,
+                        }
+
+                        # Add info to the test case.
+                        if locale:
+                            test_case["locale"] = locale
+                        if compare_type:
+                            if type(compare_type) in [list, tuple]:
+                                test_case["compare_type"] = compare_type[0]
+                            else:
+                                test_case["compare_type"] = compare_type
+                        if test_description:
+                            test_case["test_description"] = test_description
+
+                        if compare_comment:
+                            test_case["compare_comment"] = compare_comment
+                        if rules:
+                            test_case["rules"] = "".join(rules)
+                        if attributes:
+                            test_case["attributes"] = attributes
+
+                        if strength:
+                            test_case["strength"] = strength
+
+                        test_list.append(test_case)
+                        # We always expect True as the result
+
+                        verify_list.append({"label": label, "verify": True})
+                    else:
+                        # Record the problem and skip
+                        encode_errors.append([line_number, line_in])
+                        pass
+
+                    # Keep this for the next comparison test
+                    string1 = string2
+                continue
+
+            is_attribute = attribute_test.match(line_in)
+            if is_attribute:
+                attributes.append([is_attribute.group(1), is_attribute.group(2)])
+                continue
+        if encode_errors:
+            logging.warning(
+                "!! %s File has %s ENCODING ERRORS: %s",
+                filename,
+                len(encode_errors),
+                encode_errors,
+            )
+        return test_list, verify_list, encode_errors
+
+    def generateCollTestDataObjects(
+        self, filename, icu_version, ignorePunctuation, start_count=0
+    ):
+        test_list = []
+        verify_list = []
+        data_errors = []  # Items with malformed Unicode
+
+        # Read raw data
+        rawcolltestdata = self.readFile(filename, icu_version)
+
+        if not rawcolltestdata:
+            return test_list, verify_list, data_errors
+
+        raw_testdata_list = rawcolltestdata.splitlines()
+
+        # Handles lines of strings to be compared with collation.
+        # Adds field for ignoring punctuation as needed.
+        recommentline = re.compile("^\s*#")
+
+        max_digits = 1 + self.computeMaxDigitsForCount(
+            len(raw_testdata_list)
+        )  # Approximately correct
+        count = start_count
+
+        prev = None
+        index = 0
+        line_number = 0
+        for item in raw_testdata_list[1:]:
+            line_number += 1
+            if recommentline.match(item) or reblankline.match(item):
+                continue
+            # It's a data line.
+            if not prev:
+                # Just getting started.
+                prev = self.parseCollTestData(item)
+                continue
+
+            # Get the code points for each test
+            next = self.parseCollTestData(item)
+
+            if not next:
+                # This is a problem with the data input. D80[0-F] is the high surrogate
+                data_errors.append([index, item])
+                continue
+
+            label = str(count).rjust(max_digits, "0")
+            new_test = {"label": label, "s1": prev, "s2": next, "line": line_number}
+            if ignorePunctuation:
+                new_test["ignorePunctuation"] = True
+            test_list.append(new_test)
+
+            verify_list.append({"label": label, "verify": True})
+
+            prev = next  # set up for next pair
+            count += 1
+            index += 1
+
+        logging.info("Coll Test: %d lines processed", len(test_list))
+        if data_errors:
+            logging.warning(
+                "!! %s File has %s DATA ERRORS: %s",
+                filename,
+                len(data_errors),
+                data_errors,
+            )
+
+        return test_list, verify_list, data_errors
+
+    def parseCollTestData(self, testdata):
+        testdata = testdata.encode().decode("unicode_escape")
+        recodepoint = re.compile(r"[0-9a-fA-F]{4,6}")
+
+        return_list = []
+        codepoints = recodepoint.findall(testdata)
+        for code in codepoints:
+            num_code = int(code, 16)
+            if num_code >= 0xD800 and num_code <= 0xDFFF:
+                return None
+            return_list.append(self.stringifyCode(num_code))
+        return "".join(return_list)
+
+    def stringifyCode(self, cp):
+        # Converts some code points represented as hex strings to escaped values, others as characters
+        if cp < 0x20 or cp == 0x22 or cp == 127 or cp == 0x5C:
+            teststring = "\\u" + format(cp, "04x")
+        else:
+            try:
+                teststring = chr(cp)
+            except ValueError as err:
+                teststring = cp
+
+        return teststring
+
+    high_surrogate_pattern = re.compile(r"([\ud800-\udbff])")
+    low_surrogate_pattern = re.compile(r"([\udc00-\udfff])")
+
+    def check_unpaired_surrogate_in_string(self, text):
+        # Look for unmatched high/low surrogates in the text
+        # high_surrogate_pattern = re.compile(r'([\ud800-\udbff])')
+        # low_surrogate_pattern = re.compile(r'([\udc00-\udfff])')
+
+        match_high = self.high_surrogate_pattern.findall(text)
+        match_low = self.low_surrogate_pattern.findall(text)
+
+        if not match_high and not match_low:
+            return False
+
+        if match_high and not match_low:
+            return True
+
+        if not match_high and match_low:
+            return True
+
+        if len(match_high) != len(match_low):
+            return True
+
+        # TODO: Check if each high match is immediately followed by a low match
+        # Now, assume that they are paired
+
+        return False
diff --git a/testgen/generators/lang_names.py b/testgen/generators/lang_names.py
new file mode 100644
index 00000000..5abbf83c
--- /dev/null
+++ b/testgen/generators/lang_names.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+import os
+import json
+import re
+import logging
+from generators.base import DataGenerator
+
+reblankline = re.compile("^\s*$")
+
+
+class LangNamesGenerator(DataGenerator):
+    json_test = {"test_type": "lang_names"}
+    json_verify = {"test_type": "lang_names"}
+
+    def process_test_data(self):
+        self.languageNameDescr()
+        filename = "languageNameTable.txt"
+        rawlangnametestdata = self.readFile(filename, self.icu_version)
+
+        if not rawlangnametestdata:
+            return None
+
+        # TODO: add standard vs. dialect vs. alternate names
+        self.generateLanguageNameTestDataObjects(rawlangnametestdata)
+        output_path = os.path.join(self.icu_version, "lang_name_test_file.json")
+        lang_name_test_file = open(output_path, "w", encoding="UTF-8")
+        json.dump(self.json_test, lang_name_test_file, indent=1)
+        lang_name_test_file.close()
+
+        output_path = os.path.join(self.icu_version, "lang_name_verify_file.json")
+        lang_name_verify_file = open(output_path, "w", encoding="UTF-8")
+        json.dump(self.json_verify, lang_name_verify_file, indent=1)
+        lang_name_verify_file.close()
+
+        return True
+
+    def languageNameDescr(self):
+        # Adds information to LanguageName tests and verify JSON
+        descr = "Language display name test cases. The first code declares the language whose display name is requested while the second code declares the locale to display the language name in."
+        test_id = "lang_names"
+        source_url = "No URL yet."
+        version = "unspecified"
+        self.json_test = {
+            "test_type": test_id,
+            "Test scenario": test_id,
+            "description": descr,
+            "source": {
+                "repository": "conformance-test",
+                "version": "trunk",
+                "url": source_url,
+                "source_version": version,
+            },
+        }
+        return
+
+    def generateLanguageNameTestDataObjects(self, rawtestdata):
+        # Get the JSON data for tests and verification for language names
+        recommentline = re.compile("^\s*#")
+        count = 0
+
+        jtests = []
+        jverify = []
+
+        # Compute max size needed for label number
+        test_lines = rawtestdata.splitlines()
+        num_samples = len(test_lines)
+        max_digits = self.computeMaxDigitsForCount(num_samples)
+        for item in test_lines:
+            if not (recommentline.match(item) or reblankline.match(item)):
+                test_data = self.parseLanguageNameData(item)
+                if test_data == None:
+                    logging.debug(
+                        "  LanguageNames (%s): Line '%s' not recognized as valid test data entry",
+                        self.icu_version,
+                        item,
+                    )
+                    continue
+                else:
+                    label = str(count).rjust(max_digits, "0")
+                    test_json = {
+                        "label": label,
+                        "language_label": test_data[0],
+                        "locale_label": test_data[1],
+                    }
+                    jtests.append(test_json)
+                    jverify.append({"label": label, "verify": test_data[2]})
+                    count += 1
+
+        self.json_test["tests"] = self.sample_tests(jtests)
+        self.json_verify["verifications"] = self.sample_tests(jverify)
+
+        logging.info("LangNames Test (%s): %d lines processed", self.icu_version, count)
+        return
+
+    def parseLanguageNameData(self, rawtestdata):
+        reformat = re.compile(r"(\w*);(\w*);(.*)")
+
+        test_match = reformat.search(rawtestdata)
+
+        if test_match != None:
+            return (test_match.group(1), test_match.group(2), test_match.group(3))
+        else:
+            return None
diff --git a/testgen/generators/likely_subtags.py b/testgen/generators/likely_subtags.py
new file mode 100644
index 00000000..ba2d59f4
--- /dev/null
+++ b/testgen/generators/likely_subtags.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+import logging
+from generators.base import DataGenerator
+
+
+class LikelySubtagsGenerator(DataGenerator):
+    def process_test_data(self):
+        filename = "likelySubtags.txt"
+        file_version = "2023-08-17, https://github.com/unicode-org/cldr/pull/3176"
+        raw_likely_subtags_data = self.readFile(filename, self.icu_version)
+        if not raw_likely_subtags_data:
+            return None
+
+        json_test = {
+            "test_type": "likely_subtags",
+            "source_file": filename,
+            "source_version": file_version,
+            "tests": [],
+        }
+        json_verify = {
+            "test_type": "likely_subtags",
+            "source_file": filename,
+            "source_version": file_version,
+        }
+        json_verify["Test Scenario"] = json_test["Test scenario"] = "likely_subtags"
+        # Generate the test and verify json
+        testlines = raw_likely_subtags_data.splitlines()
+        count = 0
+        max_digits = self.computeMaxDigitsForCount(len(testlines))
+        test_list = []
+        verify_list = []
+        for line in testlines:
+            # Ignore blank and # comment lineslines()
+            if len(line) == 0 or line[0] == "#":
+                continue
+            # split at ";" and ignore whitespace
+            tags = list(map(str.strip, line.split(";")))
+
+            # Normalize to 4 tags: Source; AddLikely; RemoveFavorScript; RemoveFavorRegin
+            while len(tags) < 4:
+                tags.append("")
+            if not tags[2]:
+                tags[2] = tags[1]
+            if not tags[3]:
+                tags[3] = tags[2]
+
+            # Create minimize tests - default is RemoveFavorScript
+            source = tags[0]
+            add_likely = tags[1]
+            remove_favor_script = tags[2]
+            remove_favor_region = tags[3]
+
+            # And maximize from each tag
+            label = str(count).rjust(max_digits, "0")
+            test_max = {"label": label, "locale": source, "option": "maximize"}
+            verify = {"label": label, "verify": add_likely}
+            test_list.append(test_max)
+            verify_list.append(verify)
+            count += 1
+
+            # Expected minimized form favoring the script
+            label = str(count).rjust(max_digits, "0")
+            test_min = {"label": label, "locale": source, "option": "minimize"}
+            verify = {"label": label, "verify": remove_favor_script}
+            test_list.append(test_min)
+            verify_list.append(verify)
+            count += 1
+
+            # And check for minimizing with favored region is supported
+            label = str(count).rjust(max_digits, "0")
+            test_favor_region = {
+                "label": label,
+                "locale": source,
+                "option": "minimizeFavorRegion",
+            }
+            verify = {"label": label, "verify": remove_favor_region}
+            test_list.append(test_favor_region)
+            verify_list.append(verify)
+            count += 1
+
+        # Add to the test and verify json data
+        json_test["tests"] = self.sample_tests(test_list)
+        json_verify["verifications"] = self.sample_tests(verify_list)
+
+        # Output the files including the json dump
+        self.saveJsonFile("likely_subtags_test.json", json_test)
+        self.saveJsonFile("likely_subtags_verify.json", json_verify)
+        logging.info(
+            "Likely Subtags Test (%s): %d lines processed", self.icu_version, count
+        )
+        return
diff --git a/testgen/generators/number_fmt.py b/testgen/generators/number_fmt.py
new file mode 100644
index 00000000..f2d799fc
--- /dev/null
+++ b/testgen/generators/number_fmt.py
@@ -0,0 +1,481 @@
+# -*- coding: utf-8 -*-
+import os
+import json
+import logging
+import re
+from generators.base import DataGenerator
+
+reblankline = re.compile("^\s*$")
+
+# Global constants
+# Values to be formatted in number format tests
+NUMBERS_TO_TEST = ["0", "91827.3645", "-0.22222"]
+
+# Which locales are selected for this testing.
+# This selects es-MX, zh-TW, bn-BD
+NUMBERFORMAT_LOCALE_INDICES = [3, 7, 11]
+
+
+class NumberFmtGenerator(DataGenerator):
+    def process_test_data(self):
+        filename = "dcfmtest.txt"
+        rawdcmlfmttestdata = self.readFile(filename, self.icu_version)
+        if rawdcmlfmttestdata:
+            BOM = "\xef\xbb\xbf"
+            if rawdcmlfmttestdata.startswith(BOM):
+                rawdcmlfmttestdata = rawdcmlfmttestdata[3:]
+
+        filename = "numberpermutationtest.txt"
+        rawnumfmttestdata = self.readFile(filename, self.icu_version)
+        if rawnumfmttestdata:
+            num_testdata_object_list, num_verify_object_list, count = (
+                self.generateNumberFmtTestDataObjects(rawnumfmttestdata)
+            )
+            if rawdcmlfmttestdata:
+                dcml_testdata_object_list, dcml_verify_object_list, count = (
+                    self.generateDcmlFmtTestDataObjects(rawdcmlfmttestdata, count)
+                )
+
+            test_list = num_testdata_object_list + dcml_testdata_object_list
+            verify_list = num_verify_object_list + dcml_verify_object_list
+            json_test, json_verify = self.insertNumberFmtDescr(test_list, verify_list)
+
+            json_test["tests"] = self.sample_tests(json_test["tests"])
+            json_verify["verifications"] = self.sample_tests(
+                json_verify["verifications"]
+            )
+
+            self.saveJsonFile("num_fmt_test_file.json", json_test)
+
+            output_path = os.path.join(self.icu_version, "num_fmt_verify_file.json")
+            # TODO: Change these saves to use saveJsonFile with output_path ??
+            num_fmt_verify_file = open(output_path, "w", encoding="UTF-8")
+            json.dump(json_verify, num_fmt_verify_file, indent=1)
+            num_fmt_verify_file.close()
+
+            logging.info(
+                "NumberFormat Test (%s): %s tests created", self.icu_version, count
+            )
+        return
+
+    def generateNumberFmtTestDataObjects(self, rawtestdata, count=0):
+        # Returns 2 lists JSON-formatted: all_tests_list, verify_list
+        original_count = count
+        entry_types = {
+            "compact-short": "notation",
+            "scientific/+ee/sign-always": "notation",
+            "percent": "unit",
+            "currency/EUR": "unit",  ## TODO: Change the unit
+            "measure-unit/length-meter": "unit",
+            "measure-unit/length-furlong": "unit",
+            "unit-width-narrow": "unit-width",
+            "unit-width-full-name": "unit-width",
+            "precision-integer": "precision",
+            ".000": "precision",
+            ".##/@@@+": "precision",
+            "@@": "precision",
+            "rounding-mode-floor": "rounding-mode",
+            "integer-width/##00": "integer-width",
+            "scale/0.5": "scale",
+            "group-on-aligned": "grouping",
+            "latin": "symbols",
+            "sign-accounting-except-zero": "sign-display",
+            "decimal-always": "decimal-separator-display",
+        }
+        test_list = self.parseNumberFmtTestData(rawtestdata)
+        ecma402_options_start = ['"options": {\n']
+
+        all_tests_list = []
+        verify_list = []
+
+        expected_count = (
+            len(test_list) * len(NUMBERFORMAT_LOCALE_INDICES) * len(NUMBERS_TO_TEST)
+            + count
+        )
+        max_digits = self.computeMaxDigitsForCount(expected_count)
+
+        for test_options in test_list:
+            # The first three specify the formatting.
+            # Example: compact-short percent unit-width-full-name
+            part1 = entry_types[test_options[0]]
+            part2 = entry_types[test_options[1]]
+            part3 = entry_types[test_options[2]]
+
+            # TODO: use combinations of part1, part2, and part3 to generate options.
+            # Locales are in element 3, 7, and 11 of parsed structure.
+
+            for locale_idx in NUMBERFORMAT_LOCALE_INDICES:
+                for number_idx in range(len(NUMBERS_TO_TEST)):
+                    ecma402_options = []
+                    label = str(count).rjust(max_digits, "0")
+                    expected = test_options[locale_idx + 1 + number_idx]
+                    verify_json = {"label": label, "verify": expected}
+                    verify_list.append(verify_json)
+
+                    # TODO: Use JSON module instead of print formatting
+                    skeleton = "%s %s %s" % (
+                        test_options[0],
+                        test_options[1],
+                        test_options[2],
+                    )
+                    entry = {
+                        "label": label,
+                        "locale": test_options[locale_idx],
+                        "skeleton": skeleton,
+                        "input": NUMBERS_TO_TEST[number_idx],
+                    }
+
+                    try:
+                        options_dict = self.mapFmtSkeletonToECMA402(
+                            [test_options[0], test_options[1], test_options[2]]
+                        )
+                    except KeyError as error:
+                        logging.warning(
+                            "Looking up Skeletons: %s [0-2] = %s, %s %s",
+                            error,
+                            test_options[0],
+                            test_options[1],
+                            test_options[2],
+                        )
+                    if not options_dict:
+                        logging.warning("$$$ OPTIONS not found for %s", label)
+                    # TODO: Look at the items in the options_dict to resolve conflicts and set up things better.
+                    resolved_options_dict = self.resolveOptions(
+                        options_dict, test_options
+                    )
+                    # include these options in the entry
+                    entry = entry | {"options": resolved_options_dict}
+
+                    all_tests_list.append(entry)  # All the tests in JSON form
+                    count += 1
+        logging.info(
+            "  generateNumberFmtTestDataObjects gives %d tests",
+            (count - original_count),
+        )
+        return all_tests_list, verify_list, count
+
+    def parseNumberFmtTestData(self, rawtestdata):
+        renumformat = re.compile(
+            r"([\w/@\+\-\#\.]+) ([\w/@\+\-\#\.]+) ([\w/@\+\-\#\.]+)\n\s*(\w\w\-\w\w)\n\s*(.*?)\n\s*(.*?)\n\s*(.*?)\n\s*(\w\w\-\w\w)\n\s*(.*?)\n\s*(.*?)\n\s*(.*?)\n\s*(\w\w\-\w\w)\n\s*(.*?)\n\s*(.*?)\n\s*(.*?)\n"
+        )
+
+        return renumformat.findall(rawtestdata)
+
+    # Count is the starting point for the values
+    # Use older Decimal Format specifications
+    # Source data: https://github.com/unicode-org/icu/blob/main/icu4c/source/test/testdata/dcfmtest.txt
+    def generateDcmlFmtTestDataObjects(self, rawtestdata, count=0):
+        original_count = count
+        recommentline = re.compile("^\s*#")
+        test_list = rawtestdata.splitlines()
+
+        all_tests_list = []
+        verify_list = []
+
+        # Transforming patterns to skeltons
+        pattern_to_skeleton = {
+            "0.0000E0": "scientific .0000/@",
+            "00": "integer-width/##00 group-off",
+            # '0.00': '.##/@@@',  # TODO: Fix this skeleton
+            "@@@": "@@@ group-off",
+            "@@###": "@@### group-off",
+            "#": "@ group-off",
+            "@@@@E0": "scientific/+e .0000/@@+",
+            "0.0##@E0": "scientific/+e .##/@@+",
+            "0005": "integer-width/0000 precision-increment/0005",
+        }
+
+        expected = len(test_list) + count
+        max_digits = self.computeMaxDigitsForCount(expected)
+
+        for item in test_list[1:]:
+            if not (recommentline.match(item) or reblankline.match(item)):
+                # Ignore parse for now.
+                if item == "" or item[0:5] == "parse":
+                    continue
+
+                pattern, round_mode, test_input, expected = self.parseDcmlFmtTestData(
+                    item
+                )
+                if pattern == None:
+                    continue
+
+                rounding_mode = self.mapRoundingToECMA402(round_mode)
+                label = str(count).rjust(max_digits, "0")
+
+                # TODO!!: Look up the patterns to make skeletons
+                if pattern in pattern_to_skeleton:
+                    skeleton = pattern_to_skeleton[pattern]
+                else:
+                    skeleton = None
+
+                if skeleton:
+                    entry = {
+                        "label": label,
+                        "op": "format",
+                        "pattern": pattern,
+                        "skeleton": skeleton,
+                        "input": test_input,
+                        "options": {},
+                    }
+                else:
+                    # Unknown skeleton
+                    entry = {
+                        "label": label,
+                        "op": "format",
+                        "pattern": pattern,
+                        "input": test_input,
+                        "options": {},
+                    }
+
+                json_part = self.mapFmtSkeletonToECMA402([pattern])
+
+                resolved_options_dict = self.resolveOptions(json_part, None)
+                # None of these old patterns use groupings
+                resolved_options_dict["useGrouping"] = False
+
+                if rounding_mode:
+                    entry["options"]["roundingMode"] = rounding_mode
+                else:
+                    # Default if not specified
+                    entry["options"]["roundingMode"] = self.mapRoundingToECMA402(
+                        "halfeven"
+                    )
+
+                entry["options"] |= resolved_options_dict  # ??? json_part
+
+                all_tests_list.append(entry)
+                verify_list.append({"label": label, "verify": expected})
+                count += 1
+
+        logging.info(
+            "  generateDcmlFmtTestDataObjects gives %d tests", (count - original_count)
+        )
+        return all_tests_list, verify_list, count
+
+    def parseDcmlFmtTestData(self, rawtestdata):
+        reformat = re.compile(
+            r"format +([\d.E@\#]+) +(default|ceiling|floor|down|up|halfeven|halfdown|halfup|unnecessary) +\"(-?[\d.E]+)\" +\"(-?[\d.E]+|Inexact)\""
+        )
+        # TODO: ignore 'parse' line
+        try:
+            test_match = reformat.search(rawtestdata)
+        except AttributeError as error:
+            logging.warning("** parseDcmlFmtTestData: %s", error)
+        if not test_match:
+            logging.warning("No test match with rawtestdata = %s", rawtestdata)
+            return None, None, None, None
+        return (
+            test_match.group(1),
+            test_match.group(2),
+            test_match.group(3),
+            test_match.group(4),
+        )
+
+    def mapFmtSkeletonToECMA402(self, options):
+        ecma402_map = {
+            "compact-short": {"notation": "compact", "compactDisplay": "short"},
+            "scientific/+ee/sign-always": {
+                "notation": "scientific",
+                "conformanceExponent": "+ee",
+                "conformanceSign": "always",
+            },
+            # Percent with word "percent":
+            "percent": {"style": "unit", "unit": "percent"},  # "style": "percent",
+            "currency/EUR": {
+                "style": "currency",
+                "currencyDisplay": "symbol",
+                "currency": "EUR",
+            },
+            "measure-unit/length-meter": {"style": "unit", "unit": "meter"},
+            "measure-unit/length-furlong": {"style": "unit", "unit": "furlong"},
+            "unit-width-narrow": {
+                "unitDisplay": "narrow",
+                "currencyDisplay": "narrowSymbol",
+            },
+            "unit-width-full-name": {"unitDisplay": "long", "currencyDisplay": "name"},
+            # "unit-width-full-name": {"unitDisplay": "long"},
+            "precision-integer": {
+                "maximumFractionDigits": 0,
+                "minimumFractionDigits": 0,
+                "roundingType": "fractionDigits",
+            },
+            ".000": {"maximumFractionDigits": 3, "minimumFractionDigits": 3},
+            # Use maximumFractionDigits: 2, maximumSignificantDigits: 3, roundingPriority: "morePrecision"
+            ".##/@@@+": {
+                "maximumFractionDigits": 2,
+                "maximumSignificantDigits": 3,
+                "roundingPriority": "morePrecision",
+            },
+            "@@": {"maximumSignificantDigits": 2, "minimumSignificantDigits": 2},
+            "rounding-mode-floor": {"roundingMode": "floor"},
+            "integer-width/##00": {
+                "maximumIntegerDigits": 4,
+                "minimumIntegerDigits": 2,
+            },
+            "group-on-aligned": {"useGrouping": True},
+            "latin": {"numberingSystem": "latn"},
+            "sign-accounting-except-zero": {
+                "signDisplay": "exceptZero",
+                "currencySign": "accounting",
+            },
+            # These are all patterns...
+            "0.0000E0": {
+                "notation": "scientific",
+                "minimumIntegerDigits": 1,
+                "minimumFractionDigits": 4,
+                "maximumFractionDigits": 4,
+            },
+            "00": {"minimumIntegerDigits": 2, "maximumFractionDigits": 0},
+            "#.#": {"maximumFractionDigits": 1},
+            "@@@": {"minimumSignificantDigits": 3, "maximumSignificantDigits": 3},
+            "@@###": {"minimumSignificantDigits": 2, "maximumSignificantDigits": 5},
+            "@@@@E0": {
+                "notation": "scientific",
+                "minimumSignificantDigits": 4,
+                "maximumSignificantDigits": 4,
+            },
+            "0.0##E0": {
+                "notation": "scientific",
+                "minimumIntegerDigits": 1,
+                "minimumFractionDigits": 1,
+                "maximumFractionDigits": 3,
+            },
+            "00.##E0": {
+                "notation": "scientific",
+                "minimumIntegerDigits": 2,
+                "minimumFractionDigits": 1,
+                "maximumFractionDigits": 3,
+            },
+            "0005": {"minimumIntegerDigits": 2},
+            "0.00": {
+                "minimumIntegerDigits": 1,
+                "minimumFractionDigits": 2,
+                "maximumFractionDigits": 2,
+            },
+            "0.000E0": {
+                "notation": "scientific",
+                "minimumIntegerDigits": 1,
+                "minimumFractionDigits": 3,
+                "maximumFractionDigits": 3,
+            },
+            "0.0##": {
+                "minimumIntegerDigits": 1,
+                "minimumFractionDigits": 1,
+                "maximumFractionDigits": 3,
+            },
+            "#": {"minimumIntegerDigits": 1, "maximumFractionDigits": 0},
+            "0.#E0": {
+                "notation": "scientific",
+                "minimumIntegerDigits": 1,
+                "maximumFractionDigits": 1,
+            },
+            "0.##E0": {
+                "notation": "scientific",
+                "minimumIntegerDigits": 1,
+                "maximumFractionDigits": 2,
+            },
+            ".0E0": {
+                "notation": "scientific",
+                "minimumFractionDigits": 1,
+                "maximumFractionDigits": 1,
+            },
+            ".0#E0": {
+                "notation": "scientific",
+                "minimumFractionDigits": 1,
+                "maximumFractionDigits": 2,
+            },
+            "@@@@@@@@@@@@@@@@@@@@@@@@@": {
+                "minimumSignificantDigits": 21,
+                "maximumSignificantDigits": 21,
+            },
+            "0.0": {
+                "minimumIntegerDigits": 1,
+                "minimumFractionDigits": 1,
+                "maximumFractionDigits": 1,
+            },
+        }
+
+        ecma402_options = []
+
+        options_dict = {}
+        # Which combinatins of skeleton entries need modificiation?
+        # Look at the expected output...
+        for o in options:
+            if o != "scale/0.5" and o != "decimal-always":
+                option_detail = ecma402_map[o]
+                options_dict = options_dict | option_detail
+            if o[0:5] == "scale":
+                options_dict = options_dict | {"conformanceScale": o[6:]}
+            if o == "decimal-always":
+                options_dict = options_dict | {"conformanceDecimalAlways": True}
+
+        # TODO: resolve some combinations of entries that are in conflict
+        return options_dict
+
+    def mapRoundingToECMA402(self, rounding):
+        ecma402_rounding_map = {
+            "default": "halfEven",
+            "halfeven": "halfEven",
+            "halfodd": "none",
+            "halfdown": "halfTrunc",
+            "halfup": "halfExpand",
+            "down": "trunc",
+            "up": "expand",
+            "halfceiling": "halfCeil",
+            "halffloor": "halfFloor",
+            "floor": "floor",
+            "ceiling": "ceil",
+            "unnecessary": "unnecessary",
+        }
+        return ecma402_rounding_map[rounding]
+
+    def resolveOptions(self, raw_options, skeleton_list):
+        # Resolve conflicts with options before putting them into the test's options.
+        # TODO: fix all the potential conflicts
+        resolved = raw_options
+        if (
+            "minimumSignificantDigits" in resolved
+            and "maximumFractionDigits" in resolved
+        ):
+            resolved.pop("minimumSignificantDigits")
+
+        # Set up default maximumFractionDigits if if not compact or currency
+        if (
+            "maximumFractionDigits" not in resolved
+            and ("notation" not in resolved or resolved["notation"] != "compact")
+            and ("style" not in resolved or resolved["style"] != "currency")
+        ):
+            resolved["maximumFractionDigits"] = 6
+
+        if "maximumFractionDigits" not in resolved and (
+            "notation" in resolved and resolved["notation"] == "compact"
+        ):
+            pass
+            # NOT NECESSARY resolved['maximumFractionDigits'] = 2
+
+        if skeleton_list and "percent" in skeleton_list:
+            resolved["style"] = "unit"
+            resolved["unit"] = "percent"
+        if skeleton_list and "unit-width-full-name" in skeleton_list:
+            resolved["currencyDisplay"] = "name"
+            resolved["unitDisplay"] = "long"
+        return resolved
+
+    def insertNumberFmtDescr(self, tests_obj, verify_obj):
+        # returns JSON data for tests and verification
+        test_scenario = "number_fmt"
+        test_data = {
+            "Test scenario": test_scenario,
+            "test_type": "number_fmt",
+            "description": "Number formatter test cases. The skeleton entry corresponds to the formatting specification used by ICU while the option entries adhere to ECMA-402 syntax.",
+            "source": {"repository": "icu", "version": "trunk"},
+            "url": "https://raw.githubusercontent.com/unicode-org/icu/main/icu4c/source/test/testdata/numberpermutationtest.txt",
+            "tests": tests_obj,
+        }
+        verify_data = {
+            "Test scenario": test_scenario,
+            "test_type": "number_fmt",
+            "verifications": verify_obj,
+        }
+        return test_data, verify_data
diff --git a/testgen/testdata_gen.py b/testgen/testdata_gen.py
index cab97432..0bd066ce 100644
--- a/testgen/testdata_gen.py
+++ b/testgen/testdata_gen.py
@@ -1,25 +1,17 @@
 # -*- coding: utf-8 -*-
-
 import argparse
-import json
 import logging
 import logging.config
-import math
 import multiprocessing as mp
-import os
 import re
-import requests
 from enum import Enum
 
-reblankline = re.compile('^\s*$')
-
-# Global constants
-# Values to be formatted in number format tests
-NUMBERS_TO_TEST = ['0', '91827.3645', '-0.22222']
+from generators.collation_short import CollationShortGenerator
+from generators.lang_names import LangNamesGenerator
+from generators.likely_subtags import LikelySubtagsGenerator
+from generators.number_fmt import NumberFmtGenerator
 
-# Which locales are selected for this testing.
-# This selects es-MX, zh-TW, bn-BD
-NUMBERFORMAT_LOCALE_INDICES = [3, 7, 11]
+reblankline = re.compile("^\s*$")
 
 
 class TestType(str, Enum):
@@ -1006,28 +998,29 @@ def insertNumberFmtDescr(tests_obj, verify_obj):
 
 
 def setupArgs():
-    parser = argparse.ArgumentParser(prog='testdata_gen')
-    parser.add_argument('--icu_versions', nargs='*', default=[])
+    parser = argparse.ArgumentParser(prog="testdata_gen")
+    parser.add_argument("--icu_versions", nargs="*", default=[])
     all_test_types = [t.value for t in TestType]
-    parser.add_argument('--test_types', nargs='*', choices=all_test_types, default=all_test_types)
+    parser.add_argument(
+        "--test_types", nargs="*", choices=all_test_types, default=all_test_types
+    )
     # -1 is no limit
-    parser.add_argument('--run_limit', nargs='?', type=int, default=-1)
+    parser.add_argument("--run_limit", nargs="?", type=int, default=-1)
     new_args = parser.parse_args()
     return new_args
 
 
 def generate_versioned_data_parallel(args):
     num_processors = mp.cpu_count()
-    logging.info('Test data generation: %s processors for %s plans' , num_processors, len(args.icu_versions))
+    logging.info(
+        "Test data generation: %s processors for %s plans",
+        num_processors,
+        len(args.icu_versions),
+    )
 
     version_data = []
     for icu_version in args.icu_versions:
-        version_data.append(
-            {
-                'icu_version': icu_version,
-                'args': args
-            }
-        )
+        version_data.append({"icu_version": icu_version, "args": args})
 
     processor_pool = mp.Pool(num_processors)
     with processor_pool as p:
@@ -1037,32 +1030,36 @@ def generate_versioned_data_parallel(args):
 
 
 def generate_versioned_data(version_info):
-    new_args = version_info['args']
-    icu_version = version_info['icu_version']
-    data_generator = generateData(icu_version)
-    data_generator.run_limit = new_args.run_limit
+    args = version_info["args"]
+    icu_version = version_info["icu_version"]
 
-    logging.info('Generating .json files for data driven testing. ICU_VERSION requested = %s',
-                 icu_version)
+    logging.info(
+        "Generating .json files for data driven testing. ICU_VERSION requested = %s",
+        icu_version,
+    )
 
-    if len(new_args.test_types) < len(TestType):
-        logging.info('(Only generating %s)', ', '.join(new_args.test_types))
+    if len(args.test_types) < len(TestType):
+        logging.info("(Only generating %s)", ", ".join(args.test_types))
 
-    if TestType.NUMBER_FMT in new_args.test_types:
-        data_generator.processNumberFmtTestData()
+    if TestType.COLLATION_SHORT in args.test_types:
+        # This is slow
+        generator = CollationShortGenerator(icu_version, args.run_limit)
+        generator.process_test_data()
 
-    if TestType.COLLATION_SHORT in new_args.test_types:
+    if TestType.LANG_NAMES in args.test_types:
         # This is slow
-        data_generator.processCollationTestData()
+        generator = LangNamesGenerator(icu_version, args.run_limit)
+        generator.process_test_data()
 
-    if TestType.LIKELY_SUBTAGS in new_args.test_types:
-        data_generator.processLikelySubtagsData()
+    if TestType.LIKELY_SUBTAGS in args.test_types:
+        generator = LikelySubtagsGenerator(icu_version, args.run_limit)
+        generator.process_test_data()
 
-    if TestType.LANG_NAMES in new_args.test_types:
-        # This is slow
-        data_generator.processLangNameTestData()
+    if TestType.NUMBER_FMT in args.test_types:
+        generator = NumberFmtGenerator(icu_version, args.run_limit)
+        generator.process_test_data()
 
-    logging.info('++++ Data generation for %s is complete.', icu_version)
+    logging.info("++++ Data generation for %s is complete.", icu_version)
 
 
 def main():
@@ -1075,5 +1072,5 @@ def main():
     generate_versioned_data_parallel(new_args)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()