From 8fd08a0603680513fa80043984681f2c77569445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= <szabosteve@gmail.com> Date: Tue, 18 Mar 2025 15:16:47 +0100 Subject: [PATCH 1/2] Adds PUT EIS inference details (#3987) * Adds EIS inference details. Co-authored-by: Jonathan Buttner <56361221+jonathan-buttner@users.noreply.github.com> --- output/openapi/elasticsearch-openapi.json | 113 +++++++- .../elasticsearch-serverless-openapi.json | 113 +++++++- output/schema/schema-serverless.json | 241 ++++++++++++++++-- output/schema/schema.json | 197 ++++++++++++++ output/typescript/types.ts | 20 ++ specification/_doc_ids/table.csv | 1 + .../_json_spec/inference.put.eis.json | 35 +++ .../inference/put_eis/PutEisRequest.ts | 82 ++++++ .../inference/put_eis/PutEisResponse.ts | 24 ++ .../example/request/PutEisRequestExample.yaml | 11 + 10 files changed, 797 insertions(+), 40 deletions(-) create mode 100644 specification/_json_spec/inference.put.eis.json create mode 100644 specification/inference/put_eis/PutEisRequest.ts create mode 100644 specification/inference/put_eis/PutEisResponse.ts create mode 100644 specification/inference/put_eis/example/request/PutEisRequestExample.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 6aece9a723..5d3f2d8a5c 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17748,6 +17748,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{eis_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Elastic Inference Service (EIS) inference endpoint", + "description": "Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "operationId": "inference-put-eis", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_eis:EisTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "eis_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_eis:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -76860,6 +76928,42 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_eis:EisTaskType": { + "type": "string", + "enum": [ + "chat_completion" + ] + }, + "inference.put_eis:ServiceType": { + "type": "string", + "enum": [ + "elastic" + ] + }, + "inference.put_eis:EisServiceSettings": { + "type": "object", + "properties": { + "model_id": { + "description": "The name of the model to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "model_id" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ @@ -76912,15 +77016,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_openai:OpenAITaskSettings": { "type": "object", "properties": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 170d015a7c..124dae311d 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9719,6 +9719,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{eis_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Elastic Inference Service (EIS) inference endpoint", + "description": "Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "operationId": "inference-put-eis", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_eis:EisTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "eis_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_eis:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -48195,6 +48263,42 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_eis:EisTaskType": { + "type": "string", + "enum": [ + "chat_completion" + ] + }, + "inference.put_eis:ServiceType": { + "type": "string", + "enum": [ + "elastic" + ] + }, + "inference.put_eis:EisServiceSettings": { + "type": "object", + "properties": { + "model_id": { + "description": "The name of the model to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "model_id" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ @@ -48247,15 +48351,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_openai:OpenAITaskSettings": { "type": "object", "properties": { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 26a51f2716..9c0960916c 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4713,6 +4713,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "docId": "inference-api-put-eis", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", + "name": "inference.put_eis", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{eis_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27292,6 +27337,98 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + } + } + } + ] + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -99831,6 +99968,32 @@ ], "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" }, + { + "kind": "enum", + "members": [ + { + "name": "chat_completion" + } + ], + "name": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66" + }, + { + "kind": "enum", + "members": [ + { + "name": "elastic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" + }, { "kind": "enum", "members": [ @@ -120199,6 +120362,62 @@ ], "specLocation": "inference/_types/Services.ts#L60-L89" }, + { + "kind": "interface", + "name": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + }, + "properties": [ + { + "description": "The name of the model to use for the inference task.", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82" + }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, { "kind": "interface", "name": { @@ -120286,28 +120505,6 @@ ], "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L94-L136" }, - { - "kind": "interface", - "name": { - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L95-L100" - }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 328e0564fe..72025c6792 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9306,6 +9306,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "docId": "inference-api-put-eis", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", + "name": "inference.put_eis", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{eis_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -149961,6 +150006,158 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "name": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + }, + "properties": [ + { + "description": "The name of the model to use for the inference task.", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82" + }, + { + "kind": "enum", + "members": [ + { + "name": "chat_completion" + } + ], + "name": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + } + } + } + ] + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "elastic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 9bc05b0e33..8010d6e3ba 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13225,6 +13225,26 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutEisEisServiceSettings { + model_id: string + rate_limit?: InferenceRateLimitSetting +} + +export type InferencePutEisEisTaskType = 'chat_completion' + +export interface InferencePutEisRequest extends RequestBase { + task_type: InferencePutEisEisTaskType + eis_inference_id: Id + body?: { + service: InferencePutEisServiceType + service_settings: InferencePutEisEisServiceSettings + } +} + +export type InferencePutEisResponse = InferenceInferenceEndpointInfo + +export type InferencePutEisServiceType = 'elastic' + export interface InferencePutOpenaiOpenAIServiceSettings { api_key: string dimensions?: integer diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index b4ea1b4724..865d3b4e33 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -318,6 +318,7 @@ inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{br inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html +inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elastic.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html diff --git a/specification/_json_spec/inference.put.eis.json b/specification/_json_spec/inference.put.eis.json new file mode 100644 index 0000000000..281758769e --- /dev/null +++ b/specification/_json_spec/inference.put.eis.json @@ -0,0 +1,35 @@ +{ + "inference.put_eis": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", + "description": "Configure an EIS inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{eis_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "eis_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's service settings" + } + } +} diff --git a/specification/inference/put_eis/PutEisRequest.ts b/specification/inference/put_eis/PutEisRequest.ts new file mode 100644 index 0000000000..c788009a32 --- /dev/null +++ b/specification/inference/put_eis/PutEisRequest.ts @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RateLimitSetting } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an Elastic Inference Service (EIS) inference endpoint. + * + * Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS). + * @rest_spec_name inference.put_eis + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-eis + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{eis_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. + */ + task_type: EisTaskType + /** + * The unique identifier of the inference endpoint. + */ + eis_inference_id: Id + } + body: { + /** + * The type of service supported for the specified task type. In this case, `elastic`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `elastic` service. + */ + service_settings: EisServiceSettings + } +} + +export enum EisTaskType { + chat_completion +} + +export enum ServiceType { + elastic +} + +export class EisServiceSettings { + /** + * The name of the model to use for the inference task. + */ + model_id: string + /** + * This setting helps to minimize the number of rate limit errors returned. + * By default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`. + */ + rate_limit?: RateLimitSetting +} diff --git a/specification/inference/put_eis/PutEisResponse.ts b/specification/inference/put_eis/PutEisResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_eis/PutEisResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_eis/example/request/PutEisRequestExample.yaml b/specification/inference/put_eis/example/request/PutEisRequestExample.yaml new file mode 100644 index 0000000000..573bc66d22 --- /dev/null +++ b/specification/inference/put_eis/example/request/PutEisRequestExample.yaml @@ -0,0 +1,11 @@ +summary: A chat completion task +description: Run `PUT _inference/chat_completion/chat-completion-endpoint` to create an inference endpoint to perform a chat completion task type. +# method_request: "PUT _inference/chat_completion/chat-completion-endpoint" +# type: "request" +value: |- + { + "service": "elastic", + "service_settings": { + "model_id": "rainbow-sprinkles" + } + } From 2cdb0b654d8d445d49b178805d84d8f3d01b88d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= <szabosteve@gmail.com> Date: Tue, 18 Mar 2025 15:59:26 +0100 Subject: [PATCH 2/2] Update specification/_doc_ids/table.csv --- specification/_doc_ids/table.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 865d3b4e33..d338333d1e 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -318,7 +318,7 @@ inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{br inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html -inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elastic.html +inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html