From 473f061e9e4beee06d141cad14af6a7f4ba723f2 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 09:24:13 -0700 Subject: [PATCH] Add Azure AI Studio inference API (#4020) (cherry picked from commit 0eab22cb8125693e2ceb97917666473a085fb4e0) --- output/openapi/elasticsearch-openapi.json | 160 ++++ .../elasticsearch-serverless-openapi.json | 160 ++++ output/schema/schema-serverless.json | 898 ++++++++++++++++++ output/schema/schema.json | 349 +++++++ output/typescript/types.ts | 33 + specification/_doc_ids/table.csv | 3 + .../inference.put_azureaistudio.json | 35 + .../PutAzureAiStudioRequest.ts | 164 ++++ .../PutAzureAiStudioResponse.ts | 24 + .../PutAzureAiStudioRequestExample1.yaml | 14 + .../PutAzureAiStudioRequestExample2.yaml | 14 + 11 files changed, 1854 insertions(+) create mode 100644 specification/_json_spec/inference.put_azureaistudio.json create mode 100644 specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts create mode 100644 specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts create mode 100644 specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml create mode 100644 specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index ad798529c6..c26fa7693b 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18103,6 +18103,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure AI studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureAiStudioRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{azureopenai_inference_id}": { "put": { "tags": [ @@ -78226,6 +78312,80 @@ "max_tokens" ] }, + "inference.put_azureaistudio:AzureAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureaistudio:ServiceType": { + "type": "string", + "enum": [ + "azureaistudio" + ] + }, + "inference.put_azureaistudio:AzureAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://ai.azure.com/" + }, + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "endpoint_type": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio" + }, + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "type": "string" + }, + "target": { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "endpoint_type", + "target", + "provider" + ] + }, + "inference.put_azureaistudio:AzureAiStudioTaskSettings": { + "type": "object", + "properties": { + "do_sample": { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "type": "number" + }, + "max_new_tokens": { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "type": "number" + }, + "user": { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_azureopenai:AzureOpenAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 57fe2ecf41..e74782363d 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -10069,6 +10069,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure AI studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureAiStudioRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{azureopenai_inference_id}": { "put": { "tags": [ @@ -49556,6 +49642,80 @@ "max_tokens" ] }, + "inference.put_azureaistudio:AzureAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureaistudio:ServiceType": { + "type": "string", + "enum": [ + "azureaistudio" + ] + }, + "inference.put_azureaistudio:AzureAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://ai.azure.com/" + }, + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "endpoint_type": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio" + }, + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "type": "string" + }, + "target": { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "endpoint_type", + "target", + "provider" + ] + }, + "inference.put_azureaistudio:AzureAiStudioTaskSettings": { + "type": "object", + "properties": { + "do_sample": { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "type": "number" + }, + "max_new_tokens": { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "type": "number" + }, + "user": { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_azureopenai:AzureOpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index cc12bd4c02..68de1a7193 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4995,6 +4995,189 @@ "visibility": "public" } }, +<<<<<<< HEAD +======= + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-anthropic", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html", + "name": "inference.put_anthropic", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{anthropic_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "name": "inference.put_azureaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureaistudio_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.13.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-cohere", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html", + "name": "inference.put_cohere", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_cohere" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_cohere" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{cohere_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, +>>>>>>> 0eab22cb8 (Add Azure AI Studio inference API (#4020)) "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", "docId": "inference-api-put-eis", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html", @@ -28042,9 +28225,29 @@ "kind": "properties", "properties": [ { +<<<<<<< HEAD "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= +<<<<<<< HEAD +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +======= + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "chunking_settings", "required": false, "type": { @@ -28056,26 +28259,77 @@ } }, { +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", +======= + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD +<<<<<<< HEAD + "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } } }, { +<<<<<<< HEAD "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD +<<<<<<< HEAD + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } } }, @@ -28086,13 +28340,33 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudTaskSettings", "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD +<<<<<<< HEAD + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } } } ] }, +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { "PutAlibabaCloudRequestExample1": { @@ -28114,6 +28388,27 @@ "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", "summary": "A text embedding task", "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" +<<<<<<< HEAD +======= +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } }, "inherits": { @@ -28125,7 +28420,19 @@ "kind": "request", "name": { "name": "Request", +<<<<<<< HEAD + "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, "path": [ { @@ -28135,14 +28442,41 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD +<<<<<<< HEAD "name": "AlibabaCloudTaskType", "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } } }, { "description": "The unique identifier of the inference endpoint.", +<<<<<<< HEAD + "name": "alibabacloud_inference_id", +======= +<<<<<<< HEAD +<<<<<<< HEAD "name": "alibabacloud_inference_id", +======= + "name": "amazonbedrock_inference_id", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "name": "amazonbedrock_inference_id", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -28154,7 +28488,19 @@ } ], "query": [], +<<<<<<< HEAD + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" +======= +<<<<<<< HEAD +<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, { "body": { @@ -28170,9 +28516,27 @@ "kind": "response", "name": { "name": "Response", +<<<<<<< HEAD + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" +======= +<<<<<<< HEAD +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" +======= + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, { "attachedBehaviors": [ @@ -28184,6 +28548,7 @@ { "description": "The chunking configuration object.", "extDocId": "inference-chunking", +<<<<<<< HEAD "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", "name": "chunking_settings", "required": false, @@ -28315,6 +28680,17 @@ "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= +<<<<<<< HEAD +<<<<<<< HEAD + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "chunking_settings", "required": false, "type": { @@ -28438,7 +28814,15 @@ { "description": "The chunking configuration object.", "extDocId": "inference-chunking", +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "name": "chunking_settings", "required": false, "type": { @@ -28450,26 +28834,43 @@ } }, { +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", +======= + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_azureopenai" +======= + "namespace": "inference.put_azureaistudio" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) } } }, { +<<<<<<< HEAD "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AzureOpenAIServiceSettings", "namespace": "inference.put_azureopenai" +======= + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) } } }, @@ -28480,13 +28881,19 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AzureOpenAITaskSettings", "namespace": "inference.put_azureopenai" +======= + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) } } } ] }, +<<<<<<< HEAD "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { "PutAzureOpenAiRequestExample1": { @@ -28498,6 +28905,19 @@ "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", "summary": "A completion task", "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" +======= + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) } }, "inherits": { @@ -28509,24 +28929,41 @@ "kind": "request", "name": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_azureopenai" }, "path": [ { "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", +======= + "namespace": "inference.put_azureaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AzureOpenAITaskType", "namespace": "inference.put_azureopenai" +======= + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) } } }, { "description": "The unique identifier of the inference endpoint.", +<<<<<<< HEAD "name": "azureopenai_inference_id", +======= + "name": "azureaistudio_inference_id", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -28538,7 +28975,11 @@ } ], "query": [], +<<<<<<< HEAD "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" +======= + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) }, { "body": { @@ -28554,9 +28995,15 @@ "kind": "response", "name": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_azureopenai" }, "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" +======= + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) }, { "attachedBehaviors": [ @@ -28568,7 +29015,17 @@ { "description": "The chunking configuration object.", "extDocId": "inference-chunking", +<<<<<<< HEAD "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +<<<<<<< HEAD +======= +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "chunking_settings", "required": false, "type": { @@ -28696,6 +29153,17 @@ "kind": "properties", "properties": [ { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 0eab22cb8 (Add Azure AI Studio inference API (#4020)) +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "description": "The type of service supported for the specified task type. In this case, `elastic`.", "name": "service", "required": true, @@ -102485,6 +102953,11 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "completion" }, { @@ -102494,24 +102967,75 @@ "name": "space_embedding" }, { +<<<<<<< HEAD +======= +======= +<<<<<<< HEAD +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +======= + "name": "completion" + }, + { +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "text_embedding" } ], "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "AlibabaCloudTaskType", "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" +<<<<<<< HEAD +======= +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, { "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "alibabacloud-ai-search" +======= +<<<<<<< HEAD +<<<<<<< HEAD + "name": "alibabacloud-ai-search" +======= + "name": "amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "name": "amazonbedrock" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } ], "name": { "name": "ServiceType", +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" @@ -102544,6 +103068,15 @@ "namespace": "inference.put_amazonbedrock" }, "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" +<<<<<<< HEAD +======= +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, { "kind": "enum", @@ -102578,27 +103111,51 @@ "name": "completion" }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "text_embedding" } ], "name": { +<<<<<<< HEAD "name": "AzureOpenAITaskType", "namespace": "inference.put_azureopenai" }, "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" +======= + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) }, { "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "azureopenai" +======= + "name": "azureaistudio" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) } ], "name": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_azureopenai" }, "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" +======= + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) }, { "kind": "enum", @@ -102607,6 +103164,14 @@ "name": "completion" }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +======= +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "rerank" }, { @@ -102715,6 +103280,17 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 0eab22cb8 (Add Azure AI Studio inference API (#4020)) +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "chat_completion" } ], @@ -123499,6 +124075,11 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" }, @@ -123506,6 +124087,7 @@ { "description": "A valid API key for the AlibabaCloud AI Search API.", "name": "api_key", +<<<<<<< HEAD "required": true, "type": { "kind": "instance_of", @@ -123627,6 +124209,13 @@ { "kind": "interface", "name": { +======= +======= +<<<<<<< HEAD +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +======= +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "AmazonBedrockServiceSettings", "namespace": "inference.put_amazonbedrock" }, @@ -123634,6 +124223,13 @@ { "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", "name": "access_key", +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -123644,10 +124240,29 @@ } }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", + "extDocId": "alibabacloud-api-keys", + "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", + "name": "host", +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", "extDocId": "amazonbedrock-models", "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", "name": "model", +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -123658,6 +124273,15 @@ } }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", "name": "provider", "required": false, @@ -123685,6 +124309,13 @@ }, { "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "rate_limit", "required": false, "type": { @@ -123696,10 +124327,39 @@ } }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max รท qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", + "name": "service_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the workspace used for the inference task.", + "name": "workspace", +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", "extDocId": "amazonbedrock-secret-keys", "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", "name": "secret_key", +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -123710,11 +124370,66 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +======= +<<<<<<< HEAD +<<<<<<< HEAD + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, { "kind": "interface", "name": { +<<<<<<< HEAD +======= + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { +<<<<<<< HEAD +<<<<<<< HEAD + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" + }, + "properties": [ + { + "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" }, @@ -123729,16 +124444,48 @@ "type": { "name": "integer", "namespace": "_types" +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } } }, { +<<<<<<< HEAD + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", +======= +<<<<<<< HEAD +<<<<<<< HEAD + "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", + "name": "return_token", +======= "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", "name": "temperature", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "required": false, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD + "name": "boolean", + "namespace": "_builtins" +======= +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "float", "namespace": "_types" } @@ -123765,11 +124512,30 @@ "type": { "name": "float", "namespace": "_types" +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) } } } ], +<<<<<<< HEAD + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" +======= +<<<<<<< HEAD +<<<<<<< HEAD + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) }, { "kind": "interface", @@ -123881,6 +124647,11 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "AzureOpenAIServiceSettings", "namespace": "inference.put_azureopenai" }, @@ -123902,6 +124673,17 @@ { "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", "name": "api_version", +======= + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", + "name": "api_key", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -123912,10 +124694,17 @@ } }, { +<<<<<<< HEAD "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", "extDocId": "azureopenai", "extDocUrl": "https://oai.azure.com/", "name": "deployment_id", +======= + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "required": true, "type": { "kind": "instance_of", @@ -123926,11 +124715,17 @@ } }, { +<<<<<<< HEAD "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", "extDocId": "azureopenai-auth", "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", "name": "entra_id", "required": false, +======= + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "type": { "kind": "instance_of", "type": { @@ -123940,9 +124735,25 @@ } }, { +<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", "extDocId": "azureopenai-quota-limits", "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", +======= + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "name": "rate_limit", "required": false, "type": { @@ -123952,6 +124763,7 @@ "namespace": "inference._types" } } +<<<<<<< HEAD }, { "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", @@ -123969,16 +124781,79 @@ } ], "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" +======= + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) }, { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AzureOpenAITaskSettings", "namespace": "inference.put_azureopenai" }, "properties": [ { "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +======= + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) "name": "user", "required": false, "type": { @@ -123990,11 +124865,23 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" +======= + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) }, { "kind": "interface", "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +======= +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "CohereServiceSettings", "namespace": "inference.put_cohere" }, @@ -124126,6 +125013,17 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) +======= +>>>>>>> 0eab22cb8 (Add Azure AI Studio inference API (#4020)) +>>>>>>> 7ba6e3f4b (Add Azure AI Studio inference API (#4020)) +>>>>>>> 2f96db287 (Add Azure AI Studio inference API (#4020)) "name": "EisServiceSettings", "namespace": "inference.put_eis" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index ac6eee9aaa..a96110fa90 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9486,6 +9486,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-ai-studio.html", + "name": "inference.put_azureaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureaistudio_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -151798,6 +151843,310 @@ }, "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + } + } + } + ] + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index fa4b944f52..b68f45b692 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13342,6 +13342,39 @@ export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfo export type InferencePutAnthropicServiceType = 'anthropic' +export interface InferencePutAzureaistudioAzureAiStudioServiceSettings { + api_key: string + endpoint_type: string + target: string + provider: string + rate_limit?: InferenceRateLimitSetting +} + +export interface InferencePutAzureaistudioAzureAiStudioTaskSettings { + do_sample?: float + max_new_tokens?: integer + temperature?: float + top_p?: float + user?: string +} + +export type InferencePutAzureaistudioAzureAiStudioTaskType = 'completion' | 'text_embedding' + +export interface InferencePutAzureaistudioRequest extends RequestBase { + task_type: InferencePutAzureaistudioAzureAiStudioTaskType + azureaistudio_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAzureaistudioServiceType + service_settings: InferencePutAzureaistudioAzureAiStudioServiceSettings + task_settings?: InferencePutAzureaistudioAzureAiStudioTaskSettings + } +} + +export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo + +export type InferencePutAzureaistudioServiceType = 'azureaistudio' + export interface InferencePutAzureopenaiAzureOpenAIServiceSettings { api_key?: string api_version: string diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 7190d187e9..e99d60a8e7 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -32,6 +32,8 @@ azureopenai,https://oai.azure.com/ azureopenai-auth,https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication azureopenai-portal,https://portal.azure.com/#view/HubsExtension/BrowseAll azureopenai-quota-limits,https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits +azureaistudio-api-keys,https://ai.azure.com/ +azureaistudio-endpoint-types,https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio behavioral-analytics-collection-event,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-analytics-collection-event.html behavioral-analytics-event-reference,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/behavioral-analytics-event-reference.html byte-units,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/api-conventions.html#byte-units @@ -338,6 +340,7 @@ inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{bran inference-api-post-eis-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html inference-api-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-alibabacloud-ai-search.html +inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-ai-studio.html inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-cohere.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html diff --git a/specification/_json_spec/inference.put_azureaistudio.json b/specification/_json_spec/inference.put_azureaistudio.json new file mode 100644 index 0000000000..00de83eca7 --- /dev/null +++ b/specification/_json_spec/inference.put_azureaistudio.json @@ -0,0 +1,35 @@ +{ + "inference.put_azureaistudio": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "description": "Configure an Azure AI Studio inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{azureaistudio_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "azureaistudio_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts new file mode 100644 index 0000000000..5e3602f381 --- /dev/null +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts @@ -0,0 +1,164 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Azure AI studio inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `azureaistudio` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_azureaistudio + * @availability stack since=8.14.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-azureaistudio + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{azureaistudio_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AzureAiStudioTaskType + /** + * The unique identifier of the inference endpoint. + */ + azureaistudio_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `azureaistudio`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `openai` service. + */ + service_settings: AzureAiStudioServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AzureAiStudioTaskSettings + } +} + +export enum AzureAiStudioTaskType { + completion, + text_embedding +} + +export enum ServiceType { + azureaistudio +} + +export class AzureAiStudioServiceSettings { + /** + * A valid API key of your Azure AI Studio model deployment. + * This key can be found on the overview page for your deployment in the management section of your Azure AI Studio account. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id azureaistudio-api-keys + */ + api_key: string + /** + * The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`. + * The `token` endpoint type is for "pay as you go" endpoints that are billed per token. + * The `realtime` endpoint type is for "real-time" endpoints that are billed per hour of usage. + * @ext_doc_id azureaistudio-endpoint-types + */ + endpoint_type: string + /** + * The target URL of your Azure AI Studio model deployment. + * This can be found on the overview page for your deployment in the management section of your Azure AI Studio account. + */ + target: string + /** + * The model provider for your deployment. + * Note that some providers may support only certain task types. + * Supported providers include: + * + * * `cohere` - available for `text_embedding` and `completion` task types + * * `databricks` - available for `completion` task type only + * * `meta` - available for `completion` task type only + * * `microsoft_phi` - available for `completion` task type only + * * `mistral` - available for `completion` task type only + * * `openai` - available for `text_embedding` and `completion` task types + */ + provider: string + /** + * This setting helps to minimize the number of rate limit errors returned from Azure AI Studio. + * By default, the `azureaistudio` service sets the number of requests allowed per minute to 240. + */ + rate_limit?: RateLimitSetting +} + +export class AzureAiStudioTaskSettings { + /** + * For a `completion` task, instruct the inference process to perform sampling. + * It has no effect unless `temperature` or `top_p` is specified. + */ + do_sample?: float + /** + * For a `completion` task, provide a hint for the maximum number of output tokens to be generated. + * @server_default 64 + */ + max_new_tokens?: integer + /** + * For a `completion` task, control the apparent creativity of generated completions with a sampling temperature. + * It must be a number in the range of 0.0 to 2.0. + * It should not be used if `top_p` is specified. + */ + temperature?: float + /** + * For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability. + * It is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0. + * It should not be used if `temperature` is specified. + */ + top_p?: float + /** + * For a `text_embedding` task, specify the user issuing the request. + * This information can be used for abuse detection. + */ + user?: string +} diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml new file mode 100644 index 0000000000..0db68a9a3a --- /dev/null +++ b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment. +# method_request: "PUT _inference/text_embedding/azure_ai_studio_embeddings" +# type: "request" +value: |- + { + "service": "azureaistudio", + "service_settings": { + "api_key": "Azure-AI-Studio-API-key", + "target": "Target-Uri", + "provider": "openai", + "endpoint_type": "token" + } + } diff --git a/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml new file mode 100644 index 0000000000..74d00dce8e --- /dev/null +++ b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task. +# method_request: "PUT _inference/completion/azure_ai_studio_completion" +# type: "request" +value: |- + { + "service": "azureaistudio", + "service_settings": { + "api_key": "Azure-AI-Studio-API-key", + "target": "Target-URI", + "provider": "databricks", + "endpoint_type": "realtime" + } + }