diff --git a/clients/ts-sdk/openapi.json b/clients/ts-sdk/openapi.json index 3610bc29c..4d7109154 100644 --- a/clients/ts-sdk/openapi.json +++ b/clients/ts-sdk/openapi.json @@ -3124,7 +3124,7 @@ "Dataset" ], "summary": "Create Dataset", - "description": "Auth'ed user must be an owner of the organization to create a dataset.", + "description": "Dataset will be created in the org specified via the TR-Organization header. Auth'ed user must be an owner of the organization to create a dataset.", "operationId": "create_dataset", "parameters": [ { @@ -3143,7 +3143,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateDatasetRequest" + "$ref": "#/components/schemas/CreateDatasetReqPayload" } } }, @@ -3203,7 +3203,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UpdateDatasetRequest" + "$ref": "#/components/schemas/UpdateDatasetReqPayload" } } }, @@ -3250,6 +3250,68 @@ ] } }, + "/api/dataset/batch_create_datasets": { + "post": { + "tags": [ + "Dataset" + ], + "summary": "Batch Create Datasets", + "description": "Datasets will be created in the org specified via the TR-Organization header. Auth'ed user must be an owner of the organization to create datasets. If a tracking_id is ignored due to it already existing on the org, the response will not contain a dataset with that tracking_id and it can be assumed that a dataset with the missing tracking_id already exists.", + "operationId": "batch_create_datasets", + "parameters": [ + { + "name": "TR-Organization", + "in": "header", + "description": "The organization id to use for the request", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "description": "JSON request payload to bulk create datasets", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateDatasetBatchReqPayload" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Page of tags requested with all tags and the number of chunks in the dataset with that tag plus the total number of unique tags for the whole datset", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Datasets" + } + } + } + }, + "400": { + "description": "Service error relating to finding items by tag", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponseBody" + } + } + } + } + }, + "security": [ + { + "ApiKey": [ + "owner" + ] + } + ] + } + }, "/api/dataset/clear/{dataset_id}": { "put": { "tags": [ @@ -7351,7 +7413,7 @@ }, "upsert_by_tracking_id": { "type": "boolean", - "description": "Upsert when a chunk with the same tracking_id exists. By default this is false, and the request will fail if a chunk with the same tracking_id exists. If this is true, the chunk will be updated if a chunk with the same tracking_id exists.", + "description": "Upsert when a chunk with the same tracking_id exists. By default this is false, and chunks will be ignored if another with the same tracking_id exists. If this is true, the chunk will be updated if a chunk with the same tracking_id exists.", "nullable": true }, "weight": { @@ -7961,6 +8023,31 @@ } ] }, + "CreateBatchDataset": { + "type": "object", + "required": [ + "dataset_name" + ], + "properties": { + "dataset_name": { + "type": "string", + "description": "Name of the dataset." + }, + "server_configuration": { + "allOf": [ + { + "$ref": "#/components/schemas/DatasetConfigurationDTO" + } + ], + "nullable": true + }, + "tracking_id": { + "type": "string", + "description": "Optional tracking ID for the dataset. Can be used to track the dataset in external systems. Must be unique within the organization. Strongly recommended to not use a valid uuid value as that will not work with the TR-Dataset header.", + "nullable": true + } + } + }, "CreateChunkGroupReqPayloadEnum": { "oneOf": [ { @@ -7991,7 +8078,27 @@ } ] }, - "CreateDatasetRequest": { + "CreateDatasetBatchReqPayload": { + "type": "object", + "required": [ + "datasets" + ], + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/CreateBatchDataset" + }, + "description": "List of datasets to create" + }, + "upsert": { + "type": "boolean", + "description": "Upsert when a dataset with one of the specified tracking_ids already exists. By default this is false and specified datasets with a tracking_id that already exists in the org will not be ignored. If true, the existing dataset will be updated with the new dataset's details.", + "nullable": true + } + } + }, + "CreateDatasetReqPayload": { "type": "object", "required": [ "dataset_name" @@ -8264,31 +8371,40 @@ "properties": { "created_at": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "Timestamp of the creation of the dataset" }, "deleted": { "type": "integer", - "format": "int32" + "format": "int32", + "description": "Flag to indicate if the dataset has been deleted. Deletes are handled async after the flag is set so as to avoid expensive search index compaction." }, "id": { "type": "string", - "format": "uuid" + "format": "uuid", + "description": "Unique identifier of the dataset, auto-generated uuid created by Trieve" }, "name": { - "type": "string" + "type": "string", + "description": "Name of the dataset" }, "organization_id": { "type": "string", - "format": "uuid" + "format": "uuid", + "description": "Unique identifier of the organization that owns the dataset" + }, + "server_configuration": { + "description": "Configuration of the dataset for RAG, embeddings, BM25, etc." }, - "server_configuration": {}, "tracking_id": { "type": "string", + "description": "Tracking ID of the dataset, can be any string, determined by the user. Tracking ID's are unique identifiers for datasets within an organization. They are designed to match the unique identifier of the dataset in the user's system.", "nullable": true }, "updated_at": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "Timestamp of the last update of the dataset" } }, "example": { @@ -8667,6 +8783,13 @@ "id": "e3e3e3e3-e3e3-e3e3-e3e3-e3e3e3e3e3e3" } }, + "Datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Dataset" + }, + "description": "Datasets" + }, "DateRange": { "type": "object", "description": "DateRange is a JSON object which can be used to filter chunks by a range of dates. This leverages the time_stamp field on chunks in your dataset. You can specify this if you want values in a certain range. You must provide ISO 8601 combined date and time without timezone.", @@ -9903,11 +10026,13 @@ "type": "array", "items": { "$ref": "#/components/schemas/TagsWithCount" - } + }, + "description": "List of tags with the number of chunks in the dataset with that tag." }, "total": { "type": "integer", - "format": "int64" + "format": "int64", + "description": "Total number of unique tags in the dataset." } } }, @@ -14495,10 +14620,12 @@ "properties": { "count": { "type": "integer", - "format": "int64" + "format": "int64", + "description": "Number of chunks in the dataset with that tag" }, "tag": { - "type": "string" + "type": "string", + "description": "Content of the tag" } } }, @@ -14909,7 +15036,7 @@ "weight": 0.5 } }, - "UpdateDatasetRequest": { + "UpdateDatasetReqPayload": { "type": "object", "properties": { "crawl_options": { diff --git a/clients/ts-sdk/package.json b/clients/ts-sdk/package.json index 531459b21..103f3decd 100644 --- a/clients/ts-sdk/package.json +++ b/clients/ts-sdk/package.json @@ -6,7 +6,7 @@ "files": [ "dist" ], - "version": "0.0.14", + "version": "0.0.16", "license": "MIT", "scripts": { "lint": "eslint 'src/**/*.ts'", diff --git a/clients/ts-sdk/src/functions/datasets/datasets.test.ts b/clients/ts-sdk/src/functions/datasets/datasets.test.ts new file mode 100644 index 000000000..f9e2d7d21 --- /dev/null +++ b/clients/ts-sdk/src/functions/datasets/datasets.test.ts @@ -0,0 +1,17 @@ +import { beforeAll, describe, expectTypeOf } from "vitest"; +import { TrieveSDK } from "../../sdk"; +import { EventReturn } from "../../types.gen"; +import { TRIEVE } from "../../__tests__/constants"; +import { test } from "../../__tests__/utils"; + +describe("Events Tests", async () => { + let trieve: TrieveSDK; + beforeAll(() => { + trieve = TRIEVE; + }); + + test("getEventsForDataset", async () => { + const data = await trieve.getEventsForDataset({}); + expectTypeOf(data).toEqualTypeOf(); + }); +}); diff --git a/clients/ts-sdk/src/functions/datasets/index.ts b/clients/ts-sdk/src/functions/datasets/index.ts new file mode 100644 index 000000000..818ef7687 --- /dev/null +++ b/clients/ts-sdk/src/functions/datasets/index.ts @@ -0,0 +1,298 @@ +/** + * This includes all the functions you can use to communicate with our datasets endpoint + * + * @module Dataset Methods + */ + +import { TrieveSDK } from "../../sdk"; +import { + CreateDatasetBatchReqPayload, + CreateDatasetReqPayload, + Dataset, + DatasetAndUsage, + DatasetUsageCount, + EventReturn, + File, + GetAllTagsReqPayload, + GetAllTagsResponse, + GetEventsData, + GroupData, + UpdateDatasetReqPayload, +} from "../../types.gen"; + +/** + * Function that provides the ability to create a dataset. This function is used to create a new dataset in the organization. + * + * Example: + * ```js + * const dataset = await trieve.createDataset({ + * dataset_name: "My Dataset", + * }); + * ``` + */ +export async function createDataset( + /** @hidden */ + this: TrieveSDK, + props: CreateDatasetReqPayload, + signal?: AbortSignal +): Promise { + if (!this.organizationId) { + throw new Error("Organization ID is required to create a dataset"); + } + + return this.trieve.fetch( + "/api/dataset", + "post", + { + data: props, + organizationId: this.organizationId, + }, + signal + ) as Promise; +} + +/** + * Function that provides the ability to update a dataset. This function is used to update an existing dataset in the organization by ID or Tracking ID. + * + * Example: + * ```js + * const dataset = await trieve.updateDataset({ + * tracking_id: "123456", + * dataset_name: "change to this name", + * }); + * ``` + */ +export async function updateDataset( + /** @hidden */ + this: TrieveSDK, + props: UpdateDatasetReqPayload, + signal?: AbortSignal +): Promise { + if (!this.organizationId) { + throw new Error("Organization ID is required to update a dataset"); + } + + return this.trieve.fetch( + "/api/dataset", + "put", + { + data: props, + organizationId: this.organizationId, + }, + signal + ) as Promise; +} + +/** + * Function that provides the ability to create datasets in batch. This function is used to create multiple datasets in the organization. + * + * Example: + * ```js + * const datasets = await trieve.batchCreateDatasets({ + * datasets: [ + * { + * dataset_name: "My Dataset 1", + * }, + * ]}); + * ``` + */ +export async function batchCreateDatasets( + /** @hidden */ + this: TrieveSDK, + props: CreateDatasetBatchReqPayload, + signal?: AbortSignal +): Promise { + if (!this.organizationId) { + throw new Error("Organization ID is required to create a dataset"); + } + + return this.trieve.fetch( + "/api/dataset/batch_create_datasets", + "post", + { + data: props, + organizationId: this.organizationId, + }, + signal + ) as Promise; +} + +/** + * Function that provides the ability to remove all data from a dataset. This function is used to clear all data from a dataset. + * + * Example: + * ```js + * await trieve.clearDataset("1111-2222-3333-4444"); + */ +export async function clearDataset( + /** @hidden */ + this: TrieveSDK, + datasetId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/clear/{dataset_id}", + "put", + { + datasetId, + }, + signal + ) as Promise; +} + +export async function getDatasetEvents( + /** @hidden */ + this: TrieveSDK, + props: GetEventsData, + datasetId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/events", + "post", + { + datasetId, + data: props, + }, + signal + ) as Promise; +} + +export async function getDatasetFiles( + /** @hidden */ + this: TrieveSDK, + datasetId: string, + page: number, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/files/{dataset_id}/{page}", + "get", + { + datasetId, + page, + }, + signal + ) as Promise; +} + +export async function getAllDatasetTags( + /** @hidden */ + this: TrieveSDK, + props: GetAllTagsReqPayload, + datasetId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/get_all_tags", + "post", + { + data: props, + datasetId, + }, + signal + ) as Promise; +} + +export async function getGroupsForDataset( + /** @hidden */ + this: TrieveSDK, + datasetId: string, + page: number, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/groups/{dataset_id}/{page}", + "get", + { + datasetId, + page, + }, + signal + ) as Promise; +} + +export async function getDatasetsFromOrganization( + /** @hidden */ + this: TrieveSDK, + organizationId: string, + limit?: number, + offset?: number, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/organization/{organization_id}", + "get", + { + organizationId, + limit, + offset, + }, + signal + ) as Promise; +} + +export async function getDatasetByTrackingId( + /** @hidden */ + this: TrieveSDK, + trackingId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/tracking_id/{tracking_id}", + "get", + { + datasetId: trackingId, + trackingId, + }, + signal + ) as Promise; +} + +export async function getDatasetUsageById( + /** @hidden */ + this: TrieveSDK, + datasetId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/usage/{dataset_id}", + "get", + { + datasetId, + }, + signal + ) as Promise; +} + +export async function getDatasetById( + /** @hidden */ + this: TrieveSDK, + datasetId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/{dataset_id}", + "get", + { + datasetId, + }, + signal + ) as Promise; +} + +export async function deleteDataset( + /** @hidden */ + this: TrieveSDK, + datasetId: string, + signal?: AbortSignal +): Promise { + return this.trieve.fetch( + "/api/dataset/{dataset_id}", + "delete", + { + datasetId, + }, + signal + ) as Promise; +} diff --git a/clients/ts-sdk/src/sdk.ts b/clients/ts-sdk/src/sdk.ts index c6057e9ea..ca5c34756 100644 --- a/clients/ts-sdk/src/sdk.ts +++ b/clients/ts-sdk/src/sdk.ts @@ -4,16 +4,19 @@ import { TrieveFetchClient } from "./fetch-client"; export class TrieveSDK { trieve: TrieveFetchClient; datasetId: string; + organizationId?: string; constructor({ apiKey, baseUrl = "https://api.trieve.ai", debug = false, datasetId, + organizationId, }: { apiKey: string; baseUrl?: string; debug?: boolean; datasetId: string; + organizationId?: string; }) { this.trieve = new TrieveFetchClient({ apiKey: apiKey, @@ -21,6 +24,7 @@ export class TrieveSDK { debug: debug, }); this.datasetId = datasetId; + this.organizationId = organizationId; } } diff --git a/clients/ts-sdk/src/types.gen.ts b/clients/ts-sdk/src/types.gen.ts index 18f238d57..4182a87a1 100644 --- a/clients/ts-sdk/src/types.gen.ts +++ b/clients/ts-sdk/src/types.gen.ts @@ -375,7 +375,7 @@ export type ChunkReqPayload = { */ tracking_id?: (string) | null; /** - * Upsert when a chunk with the same tracking_id exists. By default this is false, and the request will fail if a chunk with the same tracking_id exists. If this is true, the chunk will be updated if a chunk with the same tracking_id exists. + * Upsert when a chunk with the same tracking_id exists. By default this is false, and chunks will be ignored if another with the same tracking_id exists. If this is true, the chunk will be updated if a chunk with the same tracking_id exists. */ upsert_by_tracking_id?: (boolean) | null; /** @@ -560,13 +560,36 @@ export type CreateBatchChunkGroupReqPayload = Array; +export type CreateBatchDataset = { + /** + * Name of the dataset. + */ + dataset_name: string; + server_configuration?: ((DatasetConfigurationDTO) | null); + /** + * Optional tracking ID for the dataset. Can be used to track the dataset in external systems. Must be unique within the organization. Strongly recommended to not use a valid uuid value as that will not work with the TR-Dataset header. + */ + tracking_id?: (string) | null; +}; + export type CreateChunkGroupReqPayloadEnum = CreateSingleChunkGroupReqPayload | CreateBatchChunkGroupReqPayload; export type CreateChunkGroupResponseEnum = ChunkGroup | ChunkGroups; export type CreateChunkReqPayloadEnum = ChunkReqPayload | CreateBatchChunkReqPayload; -export type CreateDatasetRequest = { +export type CreateDatasetBatchReqPayload = { + /** + * List of datasets to create + */ + datasets: Array; + /** + * Upsert when a dataset with one of the specified tracking_ids already exists. By default this is false and specified datasets with a tracking_id that already exists in the org will not be ignored. If true, the existing dataset will be updated with the new dataset's details. + */ + upsert?: (boolean) | null; +}; + +export type CreateDatasetReqPayload = { crawl_options?: ((CrawlOptions) | null); /** * Name of the dataset. @@ -675,13 +698,37 @@ export type CreateTopicReqPayload = { }; export type Dataset = { + /** + * Timestamp of the creation of the dataset + */ created_at: string; + /** + * Flag to indicate if the dataset has been deleted. Deletes are handled async after the flag is set so as to avoid expensive search index compaction. + */ deleted: number; + /** + * Unique identifier of the dataset, auto-generated uuid created by Trieve + */ id: string; + /** + * Name of the dataset + */ name: string; + /** + * Unique identifier of the organization that owns the dataset + */ organization_id: string; + /** + * Configuration of the dataset for RAG, embeddings, BM25, etc. + */ server_configuration: unknown; + /** + * Tracking ID of the dataset, can be any string, determined by the user. Tracking ID's are unique identifiers for datasets within an organization. They are designed to match the unique identifier of the dataset in the user's system. + */ tracking_id?: (string) | null; + /** + * Timestamp of the last update of the dataset + */ updated_at: string; }; @@ -826,6 +873,11 @@ export type DatasetUsageCount = { id: string; }; +/** + * Datasets + */ +export type Datasets = Array; + /** * DateRange is a JSON object which can be used to filter chunks by a range of dates. This leverages the time_stamp field on chunks in your dataset. You can specify this if you want values in a certain range. You must provide ISO 8601 combined date and time without timezone. */ @@ -1324,7 +1376,13 @@ export type GetAllTagsReqPayload = { }; export type GetAllTagsResponse = { + /** + * List of tags with the number of chunks in the dataset with that tag. + */ tags: Array; + /** + * Total number of unique tags in the dataset. + */ total: number; }; @@ -2613,7 +2671,13 @@ export type SuggestedQueriesResponse = { }; export type TagsWithCount = { + /** + * Number of chunks in the dataset with that tag + */ count: number; + /** + * Content of the tag + */ tag: string; }; @@ -2805,7 +2869,7 @@ export type UpdateChunkReqPayload = { weight?: (number) | null; }; -export type UpdateDatasetRequest = { +export type UpdateDatasetReqPayload = { crawl_options?: ((CrawlOptions) | null); /** * The id of the dataset you want to update. @@ -3630,7 +3694,7 @@ export type CreateDatasetData = { /** * JSON request payload to create a new dataset */ - requestBody: CreateDatasetRequest; + requestBody: CreateDatasetReqPayload; /** * The organization id to use for the request */ @@ -3643,7 +3707,7 @@ export type UpdateDatasetData = { /** * JSON request payload to update a dataset */ - requestBody: UpdateDatasetRequest; + requestBody: UpdateDatasetReqPayload; /** * The organization id to use for the request */ @@ -3652,6 +3716,19 @@ export type UpdateDatasetData = { export type UpdateDatasetResponse = (Dataset); +export type BatchCreateDatasetsData = { + /** + * JSON request payload to bulk create datasets + */ + requestBody: CreateDatasetBatchReqPayload; + /** + * The organization id to use for the request + */ + trOrganization: string; +}; + +export type BatchCreateDatasetsResponse = (Datasets); + export type ClearDatasetData = { /** * The id of the dataset you want to clear. @@ -4986,6 +5063,21 @@ export type $OpenApiTs = { }; }; }; + '/api/dataset/batch_create_datasets': { + post: { + req: BatchCreateDatasetsData; + res: { + /** + * Page of tags requested with all tags and the number of chunks in the dataset with that tag plus the total number of unique tags for the whole datset + */ + 200: Datasets; + /** + * Service error relating to finding items by tag + */ + 400: ErrorResponseBody; + }; + }; + }; '/api/dataset/clear/{dataset_id}': { put: { req: ClearDatasetData;