From 8fd08a0603680513fa80043984681f2c77569445 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 18 Mar 2025 15:16:47 +0100
Subject: [PATCH 1/2] Adds PUT EIS inference details (#3987)

* Adds EIS inference details.

Co-authored-by: Jonathan Buttner <56361221+jonathan-buttner@users.noreply.github.com>
---
 output/openapi/elasticsearch-openapi.json     | 113 +++++++-
 .../elasticsearch-serverless-openapi.json     | 113 +++++++-
 output/schema/schema-serverless.json          | 241 ++++++++++++++++--
 output/schema/schema.json                     | 197 ++++++++++++++
 output/typescript/types.ts                    |  20 ++
 specification/_doc_ids/table.csv              |   1 +
 .../_json_spec/inference.put.eis.json         |  35 +++
 .../inference/put_eis/PutEisRequest.ts        |  82 ++++++
 .../inference/put_eis/PutEisResponse.ts       |  24 ++
 .../example/request/PutEisRequestExample.yaml |  11 +
 10 files changed, 797 insertions(+), 40 deletions(-)
 create mode 100644 specification/_json_spec/inference.put.eis.json
 create mode 100644 specification/inference/put_eis/PutEisRequest.ts
 create mode 100644 specification/inference/put_eis/PutEisResponse.ts
 create mode 100644 specification/inference/put_eis/example/request/PutEisRequestExample.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 6aece9a723..5d3f2d8a5c 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17748,6 +17748,74 @@
         "x-state": "Added in 8.11.0"
       }
     },
+    "/_inference/{task_type}/{eis_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an Elastic Inference Service (EIS) inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
+        "operationId": "inference-put-eis",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference.put_eis:EisTaskType"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "eis_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "service": {
+                    "$ref": "#/components/schemas/inference.put_eis:ServiceType"
+                  },
+                  "service_settings": {
+                    "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings"
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 8.12.0"
+      }
+    },
     "/_inference/{task_type}/{openai_inference_id}": {
       "put": {
         "tags": [
@@ -76860,6 +76928,42 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
+      "inference.put_eis:EisTaskType": {
+        "type": "string",
+        "enum": [
+          "chat_completion"
+        ]
+      },
+      "inference.put_eis:ServiceType": {
+        "type": "string",
+        "enum": [
+          "elastic"
+        ]
+      },
+      "inference.put_eis:EisServiceSettings": {
+        "type": "object",
+        "properties": {
+          "model_id": {
+            "description": "The name of the model to use for the inference task.",
+            "type": "string"
+          },
+          "rate_limit": {
+            "$ref": "#/components/schemas/inference._types:RateLimitSetting"
+          }
+        },
+        "required": [
+          "model_id"
+        ]
+      },
+      "inference._types:RateLimitSetting": {
+        "type": "object",
+        "properties": {
+          "requests_per_minute": {
+            "description": "The number of requests allowed per minute.",
+            "type": "number"
+          }
+        }
+      },
       "inference.put_openai:OpenAITaskType": {
         "type": "string",
         "enum": [
@@ -76912,15 +77016,6 @@
           "model_id"
         ]
       },
-      "inference._types:RateLimitSetting": {
-        "type": "object",
-        "properties": {
-          "requests_per_minute": {
-            "description": "The number of requests allowed per minute.",
-            "type": "number"
-          }
-        }
-      },
       "inference.put_openai:OpenAITaskSettings": {
         "type": "object",
         "properties": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 170d015a7c..124dae311d 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9719,6 +9719,74 @@
         "x-state": "Added in 8.11.0"
       }
     },
+    "/_inference/{task_type}/{eis_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an Elastic Inference Service (EIS) inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
+        "operationId": "inference-put-eis",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference.put_eis:EisTaskType"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "eis_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "service": {
+                    "$ref": "#/components/schemas/inference.put_eis:ServiceType"
+                  },
+                  "service_settings": {
+                    "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings"
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 8.12.0"
+      }
+    },
     "/_inference/{task_type}/{openai_inference_id}": {
       "put": {
         "tags": [
@@ -48195,6 +48263,42 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
+      "inference.put_eis:EisTaskType": {
+        "type": "string",
+        "enum": [
+          "chat_completion"
+        ]
+      },
+      "inference.put_eis:ServiceType": {
+        "type": "string",
+        "enum": [
+          "elastic"
+        ]
+      },
+      "inference.put_eis:EisServiceSettings": {
+        "type": "object",
+        "properties": {
+          "model_id": {
+            "description": "The name of the model to use for the inference task.",
+            "type": "string"
+          },
+          "rate_limit": {
+            "$ref": "#/components/schemas/inference._types:RateLimitSetting"
+          }
+        },
+        "required": [
+          "model_id"
+        ]
+      },
+      "inference._types:RateLimitSetting": {
+        "type": "object",
+        "properties": {
+          "requests_per_minute": {
+            "description": "The number of requests allowed per minute.",
+            "type": "number"
+          }
+        }
+      },
       "inference.put_openai:OpenAITaskType": {
         "type": "string",
         "enum": [
@@ -48247,15 +48351,6 @@
           "model_id"
         ]
       },
-      "inference._types:RateLimitSetting": {
-        "type": "object",
-        "properties": {
-          "requests_per_minute": {
-            "description": "The number of requests allowed per minute.",
-            "type": "number"
-          }
-        }
-      },
       "inference.put_openai:OpenAITaskSettings": {
         "type": "object",
         "properties": {
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index 26a51f2716..9c0960916c 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4713,6 +4713,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.12.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
+      "docId": "inference-api-put-eis",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html",
+      "name": "inference.put_eis",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.put_eis"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.put_eis"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "PUT"
+          ],
+          "path": "/_inference/{task_type}/{eis_inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -27292,6 +27337,98 @@
       },
       "specLocation": "inference/put/PutResponse.ts#L22-L24"
     },
+    {
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The type of service supported for the specified task type. In this case, `elastic`.",
+            "name": "service",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ServiceType",
+                "namespace": "inference.put_eis"
+              }
+            }
+          },
+          {
+            "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.",
+            "name": "service_settings",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "EisServiceSettings",
+                "namespace": "inference.put_eis"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "kind": "request",
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_eis"
+      },
+      "path": [
+        {
+          "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "EisTaskType",
+              "namespace": "inference.put_eis"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "eis_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62"
+    },
+    {
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "kind": "response",
+      "name": {
+        "name": "Response",
+        "namespace": "inference.put_eis"
+      },
+      "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24"
+    },
     {
       "attachedBehaviors": [
         "CommonQueryParameters"
@@ -99831,6 +99968,32 @@
       ],
       "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106"
     },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "chat_completion"
+        }
+      ],
+      "name": {
+        "name": "EisTaskType",
+        "namespace": "inference.put_eis"
+      },
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elastic"
+        }
+      ],
+      "name": {
+        "name": "ServiceType",
+        "namespace": "inference.put_eis"
+      },
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70"
+    },
     {
       "kind": "enum",
       "members": [
@@ -120199,6 +120362,62 @@
       ],
       "specLocation": "inference/_types/Services.ts#L60-L89"
     },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "EisServiceSettings",
+        "namespace": "inference.put_eis"
+      },
+      "properties": [
+        {
+          "description": "The name of the model to use for the inference task.",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "RateLimitSetting",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The number of requests allowed per minute.",
+          "name": "requests_per_minute",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L95-L100"
+    },
     {
       "kind": "interface",
       "name": {
@@ -120286,28 +120505,6 @@
       ],
       "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L94-L136"
     },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "RateLimitSetting",
-        "namespace": "inference._types"
-      },
-      "properties": [
-        {
-          "description": "The number of requests allowed per minute.",
-          "name": "requests_per_minute",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/_types/Services.ts#L95-L100"
-    },
     {
       "kind": "interface",
       "name": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 328e0564fe..72025c6792 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9306,6 +9306,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.12.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
+      "docId": "inference-api-put-eis",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html",
+      "name": "inference.put_eis",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.put_eis"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.put_eis"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "PUT"
+          ],
+          "path": "/_inference/{task_type}/{eis_inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -149961,6 +150006,158 @@
       },
       "specLocation": "inference/put/PutResponse.ts#L22-L24"
     },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "EisServiceSettings",
+        "namespace": "inference.put_eis"
+      },
+      "properties": [
+        {
+          "description": "The name of the model to use for the inference task.",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "chat_completion"
+        }
+      ],
+      "name": {
+        "name": "EisTaskType",
+        "namespace": "inference.put_eis"
+      },
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The type of service supported for the specified task type. In this case, `elastic`.",
+            "name": "service",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ServiceType",
+                "namespace": "inference.put_eis"
+              }
+            }
+          },
+          {
+            "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.",
+            "name": "service_settings",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "EisServiceSettings",
+                "namespace": "inference.put_eis"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_eis"
+      },
+      "path": [
+        {
+          "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "EisTaskType",
+              "namespace": "inference.put_eis"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "eis_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.put_eis"
+      },
+      "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elastic"
+        }
+      ],
+      "name": {
+        "name": "ServiceType",
+        "namespace": "inference.put_eis"
+      },
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70"
+    },
     {
       "kind": "interface",
       "name": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 9bc05b0e33..8010d6e3ba 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13225,6 +13225,26 @@ export interface InferencePutRequest extends RequestBase {
 
 export type InferencePutResponse = InferenceInferenceEndpointInfo
 
+export interface InferencePutEisEisServiceSettings {
+  model_id: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferencePutEisEisTaskType = 'chat_completion'
+
+export interface InferencePutEisRequest extends RequestBase {
+  task_type: InferencePutEisEisTaskType
+  eis_inference_id: Id
+  body?: {
+    service: InferencePutEisServiceType
+    service_settings: InferencePutEisEisServiceSettings
+  }
+}
+
+export type InferencePutEisResponse = InferenceInferenceEndpointInfo
+
+export type InferencePutEisServiceType = 'elastic'
+
 export interface InferencePutOpenaiOpenAIServiceSettings {
   api_key: string
   dimensions?: integer
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index b4ea1b4724..865d3b4e33 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -318,6 +318,7 @@ inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{br
 inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html
 inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html
 inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html
+inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elastic.html
 inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html
 inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html
 inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html
diff --git a/specification/_json_spec/inference.put.eis.json b/specification/_json_spec/inference.put.eis.json
new file mode 100644
index 0000000000..281758769e
--- /dev/null
+++ b/specification/_json_spec/inference.put.eis.json
@@ -0,0 +1,35 @@
+{
+  "inference.put_eis": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html",
+      "description": "Configure an EIS inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{eis_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "eis_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's service settings"
+    }
+  }
+}
diff --git a/specification/inference/put_eis/PutEisRequest.ts b/specification/inference/put_eis/PutEisRequest.ts
new file mode 100644
index 0000000000..c788009a32
--- /dev/null
+++ b/specification/inference/put_eis/PutEisRequest.ts
@@ -0,0 +1,82 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RateLimitSetting } from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+
+/**
+ * Create an Elastic Inference Service (EIS) inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).
+ * @rest_spec_name inference.put_eis
+ * @availability stack since=8.12.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-eis
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{eis_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API.
+     */
+    task_type: EisTaskType
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    eis_inference_id: Id
+  }
+  body: {
+    /**
+     * The type of service supported for the specified task type. In this case, `elastic`.
+     */
+    service: ServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `elastic` service.
+     */
+    service_settings: EisServiceSettings
+  }
+}
+
+export enum EisTaskType {
+  chat_completion
+}
+
+export enum ServiceType {
+  elastic
+}
+
+export class EisServiceSettings {
+  /**
+   * The name of the model to use for the inference task.
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned.
+   * By default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.
+   */
+  rate_limit?: RateLimitSetting
+}
diff --git a/specification/inference/put_eis/PutEisResponse.ts b/specification/inference/put_eis/PutEisResponse.ts
new file mode 100644
index 0000000000..d40639b031
--- /dev/null
+++ b/specification/inference/put_eis/PutEisResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfo } from '@inference/_types/Services'
+
+export class Response {
+  body: InferenceEndpointInfo
+}
diff --git a/specification/inference/put_eis/example/request/PutEisRequestExample.yaml b/specification/inference/put_eis/example/request/PutEisRequestExample.yaml
new file mode 100644
index 0000000000..573bc66d22
--- /dev/null
+++ b/specification/inference/put_eis/example/request/PutEisRequestExample.yaml
@@ -0,0 +1,11 @@
+summary: A chat completion task
+description: Run `PUT _inference/chat_completion/chat-completion-endpoint` to create an inference endpoint to perform a chat completion task type.
+# method_request: "PUT _inference/chat_completion/chat-completion-endpoint"
+# type: "request"
+value: |-
+  {
+      "service": "elastic",
+      "service_settings": {
+          "model_id": "rainbow-sprinkles"
+      }
+  }

From 2cdb0b654d8d445d49b178805d84d8f3d01b88d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 18 Mar 2025 15:59:26 +0100
Subject: [PATCH 2/2] Update specification/_doc_ids/table.csv

---
 specification/_doc_ids/table.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index 865d3b4e33..d338333d1e 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -318,7 +318,7 @@ inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{br
 inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html
 inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html
 inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html
-inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elastic.html
+inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html
 inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html
 inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html
 inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html