From cb2c9a02aa7b2c43678dc92ac9423214e8f51216 Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Mon, 9 Sep 2024 12:48:39 +0400
Subject: [PATCH] Add missing tokenizers (#2877)

---
 output/openapi/elasticsearch-openapi.json     | 231 +++++++---
 .../elasticsearch-serverless-openapi.json     | 231 +++++++---
 output/schema/schema-serverless.json          | 399 ++++++++++++------
 output/schema/schema.json                     | 207 +++++++--
 output/typescript/types.ts                    |  25 +-
 specification/_types/analysis/analyzers.ts    |   2 +-
 specification/_types/analysis/nori-plugin.ts  |  34 ++
 specification/_types/analysis/tokenizers.ts   |  83 ++--
 8 files changed, 906 insertions(+), 306 deletions(-)
 create mode 100644 specification/_types/analysis/nori-plugin.ts

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 846b23d746..4d9b7c4d0e 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -74744,6 +74744,9 @@
           {
             "$ref": "#/components/schemas/_types.analysis:CharGroupTokenizer"
           },
+          {
+            "$ref": "#/components/schemas/_types.analysis:ClassicTokenizer"
+          },
           {
             "$ref": "#/components/schemas/_types.analysis:EdgeNGramTokenizer"
           },
@@ -74760,14 +74763,23 @@
             "$ref": "#/components/schemas/_types.analysis:NGramTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:NoriTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:PathHierarchyTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:PathHierarchyTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:PatternTokenizer"
+          },
+          {
+            "$ref": "#/components/schemas/_types.analysis:SimplePatternTokenizer"
+          },
+          {
+            "$ref": "#/components/schemas/_types.analysis:SimplePatternSplitTokenizer"
           },
           {
             "$ref": "#/components/schemas/_types.analysis:StandardTokenizer"
           },
+          {
+            "$ref": "#/components/schemas/_types.analysis:ThaiTokenizer"
+          },
           {
             "$ref": "#/components/schemas/_types.analysis:UaxEmailUrlTokenizer"
           },
@@ -74775,13 +74787,13 @@
             "$ref": "#/components/schemas/_types.analysis:WhitespaceTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:KuromojiTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:IcuTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:PatternTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:KuromojiTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:IcuTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:NoriTokenizer"
           }
         ]
       },
@@ -74824,6 +74836,30 @@
           }
         }
       },
+      "_types.analysis:ClassicTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "classic"
+                ]
+              },
+              "max_token_length": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
       "_types.analysis:EdgeNGramTokenizer": {
         "allOf": [
           {
@@ -74857,8 +74893,7 @@
             "required": [
               "type",
               "max_gram",
-              "min_gram",
-              "token_chars"
+              "min_gram"
             ]
           }
         ]
@@ -74974,13 +75009,12 @@
             "required": [
               "type",
               "max_gram",
-              "min_gram",
-              "token_chars"
+              "min_gram"
             ]
           }
         ]
       },
-      "_types.analysis:NoriTokenizer": {
+      "_types.analysis:PathHierarchyTokenizer": {
         "allOf": [
           {
             "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
@@ -74991,23 +75025,23 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "nori_tokenizer"
+                  "path_hierarchy"
                 ]
               },
-              "decompound_mode": {
-                "$ref": "#/components/schemas/_types.analysis:NoriDecompoundMode"
+              "buffer_size": {
+                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
               },
-              "discard_punctuation": {
-                "type": "boolean"
+              "delimiter": {
+                "type": "string"
               },
-              "user_dictionary": {
+              "replacement": {
                 "type": "string"
               },
-              "user_dictionary_rules": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
+              "reverse": {
+                "$ref": "#/components/schemas/_spec_utils:Stringifiedboolean"
+              },
+              "skip": {
+                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
               }
             },
             "required": [
@@ -75016,7 +75050,7 @@
           }
         ]
       },
-      "_types.analysis:PathHierarchyTokenizer": {
+      "_types.analysis:PatternTokenizer": {
         "allOf": [
           {
             "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
@@ -75027,23 +75061,65 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "path_hierarchy"
+                  "pattern"
                 ]
               },
-              "buffer_size": {
-                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
-              },
-              "delimiter": {
+              "flags": {
                 "type": "string"
               },
-              "replacement": {
+              "group": {
+                "type": "number"
+              },
+              "pattern": {
                 "type": "string"
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
+      "_types.analysis:SimplePatternTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "simple_pattern"
+                ]
               },
-              "reverse": {
-                "$ref": "#/components/schemas/_spec_utils:Stringifiedboolean"
+              "pattern": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
+      "_types.analysis:SimplePatternSplitTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "simple_pattern_split"
+                ]
               },
-              "skip": {
-                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
+              "pattern": {
+                "type": "string"
               }
             },
             "required": [
@@ -75076,6 +75152,27 @@
           }
         ]
       },
+      "_types.analysis:ThaiTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "thai"
+                ]
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
       "_types.analysis:UaxEmailUrlTokenizer": {
         "allOf": [
           {
@@ -75124,6 +75221,31 @@
           }
         ]
       },
+      "_types.analysis:IcuTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "icu_tokenizer"
+                ]
+              },
+              "rule_files": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "type",
+              "rule_files"
+            ]
+          }
+        ]
+      },
       "_types.analysis:KuromojiTokenizer": {
         "allOf": [
           {
@@ -75170,7 +75292,7 @@
           }
         ]
       },
-      "_types.analysis:PatternTokenizer": {
+      "_types.analysis:NoriTokenizer": {
         "allOf": [
           {
             "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
@@ -75181,46 +75303,27 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "pattern"
+                  "nori_tokenizer"
                 ]
               },
-              "flags": {
-                "type": "string"
+              "decompound_mode": {
+                "$ref": "#/components/schemas/_types.analysis:NoriDecompoundMode"
               },
-              "group": {
-                "type": "number"
+              "discard_punctuation": {
+                "type": "boolean"
               },
-              "pattern": {
+              "user_dictionary": {
                 "type": "string"
-              }
-            },
-            "required": [
-              "type"
-            ]
-          }
-        ]
-      },
-      "_types.analysis:IcuTokenizer": {
-        "allOf": [
-          {
-            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
-          },
-          {
-            "type": "object",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": [
-                  "icu_tokenizer"
-                ]
               },
-              "rule_files": {
-                "type": "string"
+              "user_dictionary_rules": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
               }
             },
             "required": [
-              "type",
-              "rule_files"
+              "type"
             ]
           }
         ]
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index c8d5f984e2..fe2784eaa4 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -48666,6 +48666,9 @@
           {
             "$ref": "#/components/schemas/_types.analysis:CharGroupTokenizer"
           },
+          {
+            "$ref": "#/components/schemas/_types.analysis:ClassicTokenizer"
+          },
           {
             "$ref": "#/components/schemas/_types.analysis:EdgeNGramTokenizer"
           },
@@ -48682,14 +48685,23 @@
             "$ref": "#/components/schemas/_types.analysis:NGramTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:NoriTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:PathHierarchyTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:PathHierarchyTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:PatternTokenizer"
+          },
+          {
+            "$ref": "#/components/schemas/_types.analysis:SimplePatternTokenizer"
+          },
+          {
+            "$ref": "#/components/schemas/_types.analysis:SimplePatternSplitTokenizer"
           },
           {
             "$ref": "#/components/schemas/_types.analysis:StandardTokenizer"
           },
+          {
+            "$ref": "#/components/schemas/_types.analysis:ThaiTokenizer"
+          },
           {
             "$ref": "#/components/schemas/_types.analysis:UaxEmailUrlTokenizer"
           },
@@ -48697,13 +48709,13 @@
             "$ref": "#/components/schemas/_types.analysis:WhitespaceTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:KuromojiTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:IcuTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:PatternTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:KuromojiTokenizer"
           },
           {
-            "$ref": "#/components/schemas/_types.analysis:IcuTokenizer"
+            "$ref": "#/components/schemas/_types.analysis:NoriTokenizer"
           }
         ]
       },
@@ -48746,6 +48758,30 @@
           }
         }
       },
+      "_types.analysis:ClassicTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "classic"
+                ]
+              },
+              "max_token_length": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
       "_types.analysis:EdgeNGramTokenizer": {
         "allOf": [
           {
@@ -48779,8 +48815,7 @@
             "required": [
               "type",
               "max_gram",
-              "min_gram",
-              "token_chars"
+              "min_gram"
             ]
           }
         ]
@@ -48896,13 +48931,12 @@
             "required": [
               "type",
               "max_gram",
-              "min_gram",
-              "token_chars"
+              "min_gram"
             ]
           }
         ]
       },
-      "_types.analysis:NoriTokenizer": {
+      "_types.analysis:PathHierarchyTokenizer": {
         "allOf": [
           {
             "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
@@ -48913,23 +48947,23 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "nori_tokenizer"
+                  "path_hierarchy"
                 ]
               },
-              "decompound_mode": {
-                "$ref": "#/components/schemas/_types.analysis:NoriDecompoundMode"
+              "buffer_size": {
+                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
               },
-              "discard_punctuation": {
-                "type": "boolean"
+              "delimiter": {
+                "type": "string"
               },
-              "user_dictionary": {
+              "replacement": {
                 "type": "string"
               },
-              "user_dictionary_rules": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
+              "reverse": {
+                "$ref": "#/components/schemas/_spec_utils:Stringifiedboolean"
+              },
+              "skip": {
+                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
               }
             },
             "required": [
@@ -48938,7 +48972,7 @@
           }
         ]
       },
-      "_types.analysis:PathHierarchyTokenizer": {
+      "_types.analysis:PatternTokenizer": {
         "allOf": [
           {
             "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
@@ -48949,23 +48983,65 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "path_hierarchy"
+                  "pattern"
                 ]
               },
-              "buffer_size": {
-                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
-              },
-              "delimiter": {
+              "flags": {
                 "type": "string"
               },
-              "replacement": {
+              "group": {
+                "type": "number"
+              },
+              "pattern": {
                 "type": "string"
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
+      "_types.analysis:SimplePatternTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "simple_pattern"
+                ]
               },
-              "reverse": {
-                "$ref": "#/components/schemas/_spec_utils:Stringifiedboolean"
+              "pattern": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
+      "_types.analysis:SimplePatternSplitTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "simple_pattern_split"
+                ]
               },
-              "skip": {
-                "$ref": "#/components/schemas/_spec_utils:Stringifiedinteger"
+              "pattern": {
+                "type": "string"
               }
             },
             "required": [
@@ -48998,6 +49074,27 @@
           }
         ]
       },
+      "_types.analysis:ThaiTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "thai"
+                ]
+              }
+            },
+            "required": [
+              "type"
+            ]
+          }
+        ]
+      },
       "_types.analysis:UaxEmailUrlTokenizer": {
         "allOf": [
           {
@@ -49046,6 +49143,31 @@
           }
         ]
       },
+      "_types.analysis:IcuTokenizer": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "icu_tokenizer"
+                ]
+              },
+              "rule_files": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "type",
+              "rule_files"
+            ]
+          }
+        ]
+      },
       "_types.analysis:KuromojiTokenizer": {
         "allOf": [
           {
@@ -49092,7 +49214,7 @@
           }
         ]
       },
-      "_types.analysis:PatternTokenizer": {
+      "_types.analysis:NoriTokenizer": {
         "allOf": [
           {
             "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
@@ -49103,46 +49225,27 @@
               "type": {
                 "type": "string",
                 "enum": [
-                  "pattern"
+                  "nori_tokenizer"
                 ]
               },
-              "flags": {
-                "type": "string"
+              "decompound_mode": {
+                "$ref": "#/components/schemas/_types.analysis:NoriDecompoundMode"
               },
-              "group": {
-                "type": "number"
+              "discard_punctuation": {
+                "type": "boolean"
               },
-              "pattern": {
+              "user_dictionary": {
                 "type": "string"
-              }
-            },
-            "required": [
-              "type"
-            ]
-          }
-        ]
-      },
-      "_types.analysis:IcuTokenizer": {
-        "allOf": [
-          {
-            "$ref": "#/components/schemas/_types.analysis:TokenizerBase"
-          },
-          {
-            "type": "object",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": [
-                  "icu_tokenizer"
-                ]
               },
-              "rule_files": {
-                "type": "string"
+              "user_dictionary_rules": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
               }
             },
             "required": [
-              "type",
-              "rule_files"
+              "type"
             ]
           }
         ]
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index d717dca62c..f1df4f49f7 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -77343,7 +77343,7 @@
         "name": "NoriDecompoundMode",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L74-L78"
+      "specLocation": "_types/analysis/nori-plugin.ts#L22-L26"
     },
     {
       "kind": "interface",
@@ -80462,7 +80462,7 @@
         "name": "TokenChar",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L46-L53"
+      "specLocation": "_types/analysis/tokenizers.ts#L59-L66"
     },
     {
       "codegenNames": [
@@ -83745,7 +83745,7 @@
         "name": "Tokenizer",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L119-L121",
+      "specLocation": "_types/analysis/tokenizers.ts#L137-L139",
       "type": {
         "items": [
           {
@@ -83772,7 +83772,7 @@
         "name": "TokenizerDefinition",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L123-L141",
+      "specLocation": "_types/analysis/tokenizers.ts#L141-L164",
       "type": {
         "items": [
           {
@@ -83782,6 +83782,13 @@
               "namespace": "_types.analysis"
             }
           },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "ClassicTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
           {
             "kind": "instance_of",
             "type": {
@@ -83820,14 +83827,28 @@
           {
             "kind": "instance_of",
             "type": {
-              "name": "NoriTokenizer",
+              "name": "PathHierarchyTokenizer",
               "namespace": "_types.analysis"
             }
           },
           {
             "kind": "instance_of",
             "type": {
-              "name": "PathHierarchyTokenizer",
+              "name": "PatternTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "SimplePatternTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "SimplePatternSplitTokenizer",
               "namespace": "_types.analysis"
             }
           },
@@ -83838,6 +83859,13 @@
               "namespace": "_types.analysis"
             }
           },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "ThaiTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
           {
             "kind": "instance_of",
             "type": {
@@ -83855,21 +83883,21 @@
           {
             "kind": "instance_of",
             "type": {
-              "name": "KuromojiTokenizer",
+              "name": "IcuTokenizer",
               "namespace": "_types.analysis"
             }
           },
           {
             "kind": "instance_of",
             "type": {
-              "name": "PatternTokenizer",
+              "name": "KuromojiTokenizer",
               "namespace": "_types.analysis"
             }
           },
           {
             "kind": "instance_of",
             "type": {
-              "name": "IcuTokenizer",
+              "name": "NoriTokenizer",
               "namespace": "_types.analysis"
             }
           }
@@ -83929,7 +83957,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L55-L59"
+      "specLocation": "_types/analysis/tokenizers.ts#L31-L38"
     },
     {
       "kind": "interface",
@@ -83950,7 +83978,42 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L26-L28"
+      "specLocation": "_types/analysis/tokenizers.ts#L27-L29"
+    },
+    {
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "ClassicTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "classic"
+          }
+        },
+        {
+          "name": "max_token_length",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L40-L46"
     },
     {
       "inherits": {
@@ -84008,7 +84071,8 @@
         },
         {
           "name": "token_chars",
-          "required": true,
+          "required": false,
+          "serverDefault": [],
           "type": {
             "kind": "array_of",
             "value": {
@@ -84021,7 +84085,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L30-L36"
+      "specLocation": "_types/analysis/tokenizers.ts#L48-L57"
     },
     {
       "inherits": {
@@ -84056,7 +84120,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L61-L64"
+      "specLocation": "_types/analysis/tokenizers.ts#L68-L71"
     },
     {
       "inherits": {
@@ -84080,7 +84144,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L66-L68"
+      "specLocation": "_types/analysis/tokenizers.ts#L73-L75"
     },
     {
       "inherits": {
@@ -84104,7 +84168,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L70-L72"
+      "specLocation": "_types/analysis/tokenizers.ts#L77-L79"
     },
     {
       "inherits": {
@@ -84162,7 +84226,8 @@
         },
         {
           "name": "token_chars",
-          "required": true,
+          "required": false,
+          "serverDefault": [],
           "type": {
             "kind": "array_of",
             "value": {
@@ -84175,7 +84240,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L38-L44"
+      "specLocation": "_types/analysis/tokenizers.ts#L81-L90"
     },
     {
       "inherits": {
@@ -84186,7 +84251,7 @@
       },
       "kind": "interface",
       "name": {
-        "name": "NoriTokenizer",
+        "name": "PathHierarchyTokenizer",
         "namespace": "_types.analysis"
       },
       "properties": [
@@ -84195,33 +84260,42 @@
           "required": true,
           "type": {
             "kind": "literal_value",
-            "value": "nori_tokenizer"
+            "value": "path_hierarchy"
           }
         },
         {
-          "name": "decompound_mode",
+          "name": "buffer_size",
           "required": false,
           "type": {
+            "generics": [
+              {
+                "kind": "instance_of",
+                "type": {
+                  "name": "integer",
+                  "namespace": "_types"
+                }
+              }
+            ],
             "kind": "instance_of",
             "type": {
-              "name": "NoriDecompoundMode",
-              "namespace": "_types.analysis"
+              "name": "Stringified",
+              "namespace": "_spec_utils"
             }
           }
         },
         {
-          "name": "discard_punctuation",
+          "name": "delimiter",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "boolean",
+              "name": "string",
               "namespace": "_builtins"
             }
           }
         },
         {
-          "name": "user_dictionary",
+          "name": "replacement",
           "required": false,
           "type": {
             "kind": "instance_of",
@@ -84232,21 +84306,47 @@
           }
         },
         {
-          "name": "user_dictionary_rules",
+          "name": "reverse",
           "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
+            "generics": [
+              {
+                "kind": "instance_of",
+                "type": {
+                  "name": "boolean",
+                  "namespace": "_builtins"
+                }
               }
+            ],
+            "kind": "instance_of",
+            "type": {
+              "name": "Stringified",
+              "namespace": "_spec_utils"
+            }
+          }
+        },
+        {
+          "name": "skip",
+          "required": false,
+          "type": {
+            "generics": [
+              {
+                "kind": "instance_of",
+                "type": {
+                  "name": "integer",
+                  "namespace": "_types"
+                }
+              }
+            ],
+            "kind": "instance_of",
+            "type": {
+              "name": "Stringified",
+              "namespace": "_spec_utils"
             }
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L80-L86"
+      "specLocation": "_types/analysis/tokenizers.ts#L92-L99"
     },
     {
       "inherits": {
@@ -84257,7 +84357,7 @@
       },
       "kind": "interface",
       "name": {
-        "name": "PathHierarchyTokenizer",
+        "name": "PatternTokenizer",
         "namespace": "_types.analysis"
       },
       "properties": [
@@ -84266,42 +84366,33 @@
           "required": true,
           "type": {
             "kind": "literal_value",
-            "value": "path_hierarchy"
+            "value": "pattern"
           }
         },
         {
-          "name": "buffer_size",
+          "name": "flags",
           "required": false,
           "type": {
-            "generics": [
-              {
-                "kind": "instance_of",
-                "type": {
-                  "name": "integer",
-                  "namespace": "_types"
-                }
-              }
-            ],
             "kind": "instance_of",
             "type": {
-              "name": "Stringified",
-              "namespace": "_spec_utils"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "name": "delimiter",
+          "name": "group",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "integer",
+              "namespace": "_types"
             }
           }
         },
         {
-          "name": "replacement",
+          "name": "pattern",
           "required": false,
           "type": {
             "kind": "instance_of",
@@ -84310,49 +84401,79 @@
               "namespace": "_builtins"
             }
           }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L101-L106"
+    },
+    {
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "SimplePatternTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "simple_pattern"
+          }
         },
         {
-          "name": "reverse",
+          "name": "pattern",
           "required": false,
           "type": {
-            "generics": [
-              {
-                "kind": "instance_of",
-                "type": {
-                  "name": "boolean",
-                  "namespace": "_builtins"
-                }
-              }
-            ],
             "kind": "instance_of",
             "type": {
-              "name": "Stringified",
-              "namespace": "_spec_utils"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L108-L111"
+    },
+    {
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "SimplePatternSplitTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "simple_pattern_split"
+          }
         },
         {
-          "name": "skip",
+          "name": "pattern",
           "required": false,
           "type": {
-            "generics": [
-              {
-                "kind": "instance_of",
-                "type": {
-                  "name": "integer",
-                  "namespace": "_types"
-                }
-              }
-            ],
             "kind": "instance_of",
             "type": {
-              "name": "Stringified",
-              "namespace": "_spec_utils"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L88-L95"
+      "specLocation": "_types/analysis/tokenizers.ts#L113-L116"
     },
     {
       "inherits": {
@@ -84387,7 +84508,31 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L104-L107"
+      "specLocation": "_types/analysis/tokenizers.ts#L118-L121"
+    },
+    {
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "ThaiTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "thai"
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L123-L125"
     },
     {
       "inherits": {
@@ -84422,7 +84567,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L109-L112"
+      "specLocation": "_types/analysis/tokenizers.ts#L127-L130"
     },
     {
       "inherits": {
@@ -84457,7 +84602,42 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L114-L117"
+      "specLocation": "_types/analysis/tokenizers.ts#L132-L135"
+    },
+    {
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "IcuTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "icu_tokenizer"
+          }
+        },
+        {
+          "name": "rule_files",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/icu-plugin.ts#L30-L33"
     },
     {
       "inherits": {
@@ -84572,7 +84752,7 @@
       },
       "kind": "interface",
       "name": {
-        "name": "PatternTokenizer",
+        "name": "NoriTokenizer",
         "namespace": "_types.analysis"
       },
       "properties": [
@@ -84581,33 +84761,33 @@
           "required": true,
           "type": {
             "kind": "literal_value",
-            "value": "pattern"
+            "value": "nori_tokenizer"
           }
         },
         {
-          "name": "flags",
+          "name": "decompound_mode",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "NoriDecompoundMode",
+              "namespace": "_types.analysis"
             }
           }
         },
         {
-          "name": "group",
+          "name": "discard_punctuation",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
-              "namespace": "_types"
+              "name": "boolean",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "name": "pattern",
+          "name": "user_dictionary",
           "required": false,
           "type": {
             "kind": "instance_of",
@@ -84616,44 +84796,23 @@
               "namespace": "_builtins"
             }
           }
-        }
-      ],
-      "specLocation": "_types/analysis/tokenizers.ts#L97-L102"
-    },
-    {
-      "inherits": {
-        "type": {
-          "name": "TokenizerBase",
-          "namespace": "_types.analysis"
-        }
-      },
-      "kind": "interface",
-      "name": {
-        "name": "IcuTokenizer",
-        "namespace": "_types.analysis"
-      },
-      "properties": [
-        {
-          "name": "type",
-          "required": true,
-          "type": {
-            "kind": "literal_value",
-            "value": "icu_tokenizer"
-          }
         },
         {
-          "name": "rule_files",
-          "required": true,
+          "name": "user_dictionary_rules",
+          "required": false,
           "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
             }
           }
         }
       ],
-      "specLocation": "_types/analysis/icu-plugin.ts#L30-L33"
+      "specLocation": "_types/analysis/nori-plugin.ts#L28-L34"
     },
     {
       "esQuirk": "This is a boolean that evolved into an enum. Boolean values should be accepted on reading, and\ntrue and false must be serialized as JSON booleans, or it may break Kibana (see elasticsearch-java#139)",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 778bc50378..ed3b4f523c 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -63623,7 +63623,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L55-L59"
+      "specLocation": "_types/analysis/tokenizers.ts#L31-L38"
     },
     {
       "kind": "interface",
@@ -63705,6 +63705,41 @@
       ],
       "specLocation": "_types/analysis/analyzers.ts#L115-L119"
     },
+    {
+      "kind": "interface",
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "name": {
+        "name": "ClassicTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "classic"
+          }
+        },
+        {
+          "name": "max_token_length",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L40-L46"
+    },
     {
       "kind": "interface",
       "inherits": {
@@ -64456,7 +64491,8 @@
         },
         {
           "name": "token_chars",
-          "required": true,
+          "required": false,
+          "serverDefault": [],
           "type": {
             "kind": "array_of",
             "value": {
@@ -64469,7 +64505,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L30-L36"
+      "specLocation": "_types/analysis/tokenizers.ts#L48-L57"
     },
     {
       "kind": "interface",
@@ -66219,7 +66255,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L61-L64"
+      "specLocation": "_types/analysis/tokenizers.ts#L68-L71"
     },
     {
       "kind": "interface",
@@ -66848,7 +66884,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L66-L68"
+      "specLocation": "_types/analysis/tokenizers.ts#L73-L75"
     },
     {
       "kind": "interface",
@@ -67034,7 +67070,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L70-L72"
+      "specLocation": "_types/analysis/tokenizers.ts#L77-L79"
     },
     {
       "kind": "interface",
@@ -67265,7 +67301,8 @@
         },
         {
           "name": "token_chars",
-          "required": true,
+          "required": false,
+          "serverDefault": [],
           "type": {
             "kind": "array_of",
             "value": {
@@ -67278,7 +67315,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L38-L44"
+      "specLocation": "_types/analysis/tokenizers.ts#L81-L90"
     },
     {
       "kind": "interface",
@@ -67362,7 +67399,7 @@
         "name": "NoriDecompoundMode",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L74-L78"
+      "specLocation": "_types/analysis/nori-plugin.ts#L22-L26"
     },
     {
       "kind": "interface",
@@ -67471,7 +67508,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L80-L86"
+      "specLocation": "_types/analysis/nori-plugin.ts#L28-L34"
     },
     {
       "kind": "type_alias",
@@ -67665,7 +67702,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L88-L95"
+      "specLocation": "_types/analysis/tokenizers.ts#L92-L99"
     },
     {
       "kind": "interface",
@@ -67978,7 +68015,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L97-L102"
+      "specLocation": "_types/analysis/tokenizers.ts#L101-L106"
     },
     {
       "kind": "interface",
@@ -68718,6 +68755,76 @@
       ],
       "specLocation": "_types/analysis/analyzers.ts#L329-L332"
     },
+    {
+      "kind": "interface",
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "name": {
+        "name": "SimplePatternSplitTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "simple_pattern_split"
+          }
+        },
+        {
+          "name": "pattern",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L113-L116"
+    },
+    {
+      "kind": "interface",
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "name": {
+        "name": "SimplePatternTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "simple_pattern"
+          }
+        },
+        {
+          "name": "pattern",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L108-L111"
+    },
     {
       "kind": "interface",
       "name": {
@@ -69061,7 +69168,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L104-L107"
+      "specLocation": "_types/analysis/tokenizers.ts#L118-L121"
     },
     {
       "kind": "interface",
@@ -69640,6 +69747,30 @@
       ],
       "specLocation": "_types/analysis/analyzers.ts#L306-L310"
     },
+    {
+      "kind": "interface",
+      "inherits": {
+        "type": {
+          "name": "TokenizerBase",
+          "namespace": "_types.analysis"
+        }
+      },
+      "name": {
+        "name": "ThaiTokenizer",
+        "namespace": "_types.analysis"
+      },
+      "properties": [
+        {
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "literal_value",
+            "value": "thai"
+          }
+        }
+      ],
+      "specLocation": "_types/analysis/tokenizers.ts#L123-L125"
+    },
     {
       "kind": "enum",
       "members": [
@@ -69666,7 +69797,7 @@
         "name": "TokenChar",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L46-L53"
+      "specLocation": "_types/analysis/tokenizers.ts#L59-L66"
     },
     {
       "kind": "type_alias",
@@ -70077,7 +70208,7 @@
         "name": "Tokenizer",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L119-L121",
+      "specLocation": "_types/analysis/tokenizers.ts#L137-L139",
       "type": {
         "kind": "union_of",
         "items": [
@@ -70117,7 +70248,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L26-L28"
+      "specLocation": "_types/analysis/tokenizers.ts#L27-L29"
     },
     {
       "kind": "type_alias",
@@ -70125,7 +70256,7 @@
         "name": "TokenizerDefinition",
         "namespace": "_types.analysis"
       },
-      "specLocation": "_types/analysis/tokenizers.ts#L123-L141",
+      "specLocation": "_types/analysis/tokenizers.ts#L141-L164",
       "type": {
         "kind": "union_of",
         "items": [
@@ -70136,6 +70267,13 @@
               "namespace": "_types.analysis"
             }
           },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "ClassicTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
           {
             "kind": "instance_of",
             "type": {
@@ -70174,14 +70312,28 @@
           {
             "kind": "instance_of",
             "type": {
-              "name": "NoriTokenizer",
+              "name": "PathHierarchyTokenizer",
               "namespace": "_types.analysis"
             }
           },
           {
             "kind": "instance_of",
             "type": {
-              "name": "PathHierarchyTokenizer",
+              "name": "PatternTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "SimplePatternTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "SimplePatternSplitTokenizer",
               "namespace": "_types.analysis"
             }
           },
@@ -70192,6 +70344,13 @@
               "namespace": "_types.analysis"
             }
           },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "ThaiTokenizer",
+              "namespace": "_types.analysis"
+            }
+          },
           {
             "kind": "instance_of",
             "type": {
@@ -70209,21 +70368,21 @@
           {
             "kind": "instance_of",
             "type": {
-              "name": "KuromojiTokenizer",
+              "name": "IcuTokenizer",
               "namespace": "_types.analysis"
             }
           },
           {
             "kind": "instance_of",
             "type": {
-              "name": "PatternTokenizer",
+              "name": "KuromojiTokenizer",
               "namespace": "_types.analysis"
             }
           },
           {
             "kind": "instance_of",
             "type": {
-              "name": "IcuTokenizer",
+              "name": "NoriTokenizer",
               "namespace": "_types.analysis"
             }
           }
@@ -70381,7 +70540,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L109-L112"
+      "specLocation": "_types/analysis/tokenizers.ts#L127-L130"
     },
     {
       "kind": "interface",
@@ -70504,7 +70663,7 @@
           }
         }
       ],
-      "specLocation": "_types/analysis/tokenizers.ts#L114-L117"
+      "specLocation": "_types/analysis/tokenizers.ts#L132-L135"
     },
     {
       "kind": "interface",
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 8a152dcd17..02aac32cdf 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -4409,6 +4409,11 @@ export interface AnalysisCjkAnalyzer {
   stopwords_path?: string
 }
 
+export interface AnalysisClassicTokenizer extends AnalysisTokenizerBase {
+  type: 'classic'
+  max_token_length?: integer
+}
+
 export interface AnalysisCommonGramsTokenFilter extends AnalysisTokenFilterBase {
   type: 'common_grams'
   common_words?: string[]
@@ -4495,7 +4500,7 @@ export interface AnalysisEdgeNGramTokenizer extends AnalysisTokenizerBase {
   custom_token_chars?: string
   max_gram: integer
   min_gram: integer
-  token_chars: AnalysisTokenChar[]
+  token_chars?: AnalysisTokenChar[]
 }
 
 export interface AnalysisElisionTokenFilter extends AnalysisTokenFilterBase {
@@ -4838,7 +4843,7 @@ export interface AnalysisNGramTokenizer extends AnalysisTokenizerBase {
   custom_token_chars?: string
   max_gram: integer
   min_gram: integer
-  token_chars: AnalysisTokenChar[]
+  token_chars?: AnalysisTokenChar[]
 }
 
 export interface AnalysisNoriAnalyzer {
@@ -5003,6 +5008,16 @@ export interface AnalysisSimpleAnalyzer {
   version?: VersionString
 }
 
+export interface AnalysisSimplePatternSplitTokenizer extends AnalysisTokenizerBase {
+  type: 'simple_pattern_split'
+  pattern?: string
+}
+
+export interface AnalysisSimplePatternTokenizer extends AnalysisTokenizerBase {
+  type: 'simple_pattern'
+  pattern?: string
+}
+
 export interface AnalysisSnowballAnalyzer {
   type: 'snowball'
   version?: VersionString
@@ -5110,6 +5125,10 @@ export interface AnalysisThaiAnalyzer {
   stopwords_path?: string
 }
 
+export interface AnalysisThaiTokenizer extends AnalysisTokenizerBase {
+  type: 'thai'
+}
+
 export type AnalysisTokenChar = 'letter' | 'digit' | 'whitespace' | 'punctuation' | 'symbol' | 'custom'
 
 export type AnalysisTokenFilter = string | AnalysisTokenFilterDefinition
@@ -5126,7 +5145,7 @@ export interface AnalysisTokenizerBase {
   version?: VersionString
 }
 
-export type AnalysisTokenizerDefinition = AnalysisCharGroupTokenizer | AnalysisEdgeNGramTokenizer | AnalysisKeywordTokenizer | AnalysisLetterTokenizer | AnalysisLowercaseTokenizer | AnalysisNGramTokenizer | AnalysisNoriTokenizer | AnalysisPathHierarchyTokenizer | AnalysisStandardTokenizer | AnalysisUaxEmailUrlTokenizer | AnalysisWhitespaceTokenizer | AnalysisKuromojiTokenizer | AnalysisPatternTokenizer | AnalysisIcuTokenizer
+export type AnalysisTokenizerDefinition = AnalysisCharGroupTokenizer | AnalysisClassicTokenizer | AnalysisEdgeNGramTokenizer | AnalysisKeywordTokenizer | AnalysisLetterTokenizer | AnalysisLowercaseTokenizer | AnalysisNGramTokenizer | AnalysisPathHierarchyTokenizer | AnalysisPatternTokenizer | AnalysisSimplePatternTokenizer | AnalysisSimplePatternSplitTokenizer | AnalysisStandardTokenizer | AnalysisThaiTokenizer | AnalysisUaxEmailUrlTokenizer | AnalysisWhitespaceTokenizer | AnalysisIcuTokenizer | AnalysisKuromojiTokenizer | AnalysisNoriTokenizer
 
 export interface AnalysisTrimTokenFilter extends AnalysisTokenFilterBase {
   type: 'trim'
diff --git a/specification/_types/analysis/analyzers.ts b/specification/_types/analysis/analyzers.ts
index 38a51256ec..47da0e68fd 100644
--- a/specification/_types/analysis/analyzers.ts
+++ b/specification/_types/analysis/analyzers.ts
@@ -22,8 +22,8 @@ import { integer } from '@_types/Numeric'
 import { IcuAnalyzer } from './icu-plugin'
 import { KuromojiAnalyzer } from './kuromoji-plugin'
 import { Language, SnowballLanguage } from './languages'
+import { NoriDecompoundMode } from './nori-plugin'
 import { StopWords } from './StopWords'
-import { NoriDecompoundMode } from './tokenizers'
 
 export class CustomAnalyzer {
   type: 'custom'
diff --git a/specification/_types/analysis/nori-plugin.ts b/specification/_types/analysis/nori-plugin.ts
new file mode 100644
index 0000000000..b245996e72
--- /dev/null
+++ b/specification/_types/analysis/nori-plugin.ts
@@ -0,0 +1,34 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TokenizerBase } from './tokenizers'
+
+export enum NoriDecompoundMode {
+  discard,
+  none,
+  mixed
+}
+
+export class NoriTokenizer extends TokenizerBase {
+  type: 'nori_tokenizer'
+  decompound_mode?: NoriDecompoundMode
+  discard_punctuation?: boolean
+  user_dictionary?: string
+  user_dictionary_rules?: string[]
+}
diff --git a/specification/_types/analysis/tokenizers.ts b/specification/_types/analysis/tokenizers.ts
index 322a0098a1..38308bcbee 100644
--- a/specification/_types/analysis/tokenizers.ts
+++ b/specification/_types/analysis/tokenizers.ts
@@ -22,25 +22,38 @@ import { VersionString } from '@_types/common'
 import { integer } from '@_types/Numeric'
 import { IcuTokenizer } from './icu-plugin'
 import { KuromojiTokenizer } from './kuromoji-plugin'
+import { NoriTokenizer } from './nori-plugin'
 
 export class TokenizerBase {
   version?: VersionString
 }
 
-export class EdgeNGramTokenizer extends TokenizerBase {
-  type: 'edge_ngram'
-  custom_token_chars?: string
-  max_gram: integer
-  min_gram: integer
-  token_chars: TokenChar[]
+export class CharGroupTokenizer extends TokenizerBase {
+  type: 'char_group'
+  tokenize_on_chars: string[]
+  /*
+   * @server_default 255
+   */
+  max_token_length?: integer
 }
 
-export class NGramTokenizer extends TokenizerBase {
-  type: 'ngram'
+export class ClassicTokenizer extends TokenizerBase {
+  type: 'classic'
+  /*
+   * @server_default 255
+   */
+  max_token_length?: integer
+}
+
+export class EdgeNGramTokenizer extends TokenizerBase {
+  type: 'edge_ngram'
   custom_token_chars?: string
   max_gram: integer
   min_gram: integer
-  token_chars: TokenChar[]
+  /**
+   * @server_default []
+   */
+  token_chars?: TokenChar[]
 }
 
 export enum TokenChar {
@@ -52,12 +65,6 @@ export enum TokenChar {
   custom
 }
 
-export class CharGroupTokenizer extends TokenizerBase {
-  type: 'char_group'
-  tokenize_on_chars: string[]
-  max_token_length?: integer
-}
-
 export class KeywordTokenizer extends TokenizerBase {
   type: 'keyword'
   buffer_size: integer
@@ -71,18 +78,15 @@ export class LowercaseTokenizer extends TokenizerBase {
   type: 'lowercase'
 }
 
-export enum NoriDecompoundMode {
-  discard,
-  none,
-  mixed
-}
-
-export class NoriTokenizer extends TokenizerBase {
-  type: 'nori_tokenizer'
-  decompound_mode?: NoriDecompoundMode
-  discard_punctuation?: boolean
-  user_dictionary?: string
-  user_dictionary_rules?: string[]
+export class NGramTokenizer extends TokenizerBase {
+  type: 'ngram'
+  custom_token_chars?: string
+  max_gram: integer
+  min_gram: integer
+  /**
+   * @server_default []
+   */
+  token_chars?: TokenChar[]
 }
 
 export class PathHierarchyTokenizer extends TokenizerBase {
@@ -101,11 +105,25 @@ export class PatternTokenizer extends TokenizerBase {
   pattern?: string
 }
 
+export class SimplePatternTokenizer extends TokenizerBase {
+  type: 'simple_pattern'
+  pattern?: string
+}
+
+export class SimplePatternSplitTokenizer extends TokenizerBase {
+  type: 'simple_pattern_split'
+  pattern?: string
+}
+
 export class StandardTokenizer extends TokenizerBase {
   type: 'standard'
   max_token_length?: integer
 }
 
+export class ThaiTokenizer extends TokenizerBase {
+  type: 'thai'
+}
+
 export class UaxEmailUrlTokenizer extends TokenizerBase {
   type: 'uax_url_email'
   max_token_length?: integer
@@ -126,16 +144,21 @@ export type Tokenizer = string | TokenizerDefinition
  */
 export type TokenizerDefinition =
   | CharGroupTokenizer
+  | ClassicTokenizer
   | EdgeNGramTokenizer
   | KeywordTokenizer
   | LetterTokenizer
   | LowercaseTokenizer
   | NGramTokenizer
-  | NoriTokenizer
   | PathHierarchyTokenizer
+  | PatternTokenizer
+  | SimplePatternTokenizer
+  | SimplePatternSplitTokenizer
   | StandardTokenizer
+  | ThaiTokenizer
   | UaxEmailUrlTokenizer
   | WhitespaceTokenizer
-  | KuromojiTokenizer
-  | PatternTokenizer
+  // plugins
   | IcuTokenizer
+  | KuromojiTokenizer
+  | NoriTokenizer