From ae3c26b50e5aa8d9a7f1477b499a52a343b1f8b7 Mon Sep 17 00:00:00 2001 From: hejp001 <62975933+hejp001@users.noreply.github.com> Date: Wed, 21 Aug 2024 03:43:13 +0800 Subject: [PATCH 01/44] Update common.js (#2148) Translated several entries. --- frontend/src/locales/zh/common.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/frontend/src/locales/zh/common.js b/frontend/src/locales/zh/common.js index 9392ab11cb..59a84b96f9 100644 --- a/frontend/src/locales/zh/common.js +++ b/frontend/src/locales/zh/common.js @@ -152,7 +152,7 @@ const TRANSLATIONS = { title: "聊天模式", chat: { title: "聊天", - "desc-start": "将提供法学硕士的一般知识", + "desc-start": "将提供 LLM 的一般知识", and: "和", "desc-end": "找到的文档上下文的答案。", }, @@ -182,7 +182,7 @@ const TRANSLATIONS = { "desc-end": "模式时,当未找到上下文时,您可能希望返回自定义拒绝响应。", }, temperature: { - title: "LLM Temperature", + title: "LLM 温度", "desc-start": "此设置控制您的 LLM 回答的“创意”程度", "desc-end": "数字越高越有创意。对于某些模型,如果设置得太高,可能会导致响应不一致。", @@ -277,11 +277,11 @@ const TRANSLATIONS = { export: "导出", table: { id: "Id", - by: "Sent By", - workspace: "Workspace", - prompt: "Prompt", - response: "Response", - at: "Sent At", + by: "发送者", + workspace: "工作区", + prompt: "提示", + response: "响应", + at: "发送时间", }, }, From 17abbe97e501937b36e3094bd860598adcd5e421 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Tue, 20 Aug 2024 12:48:08 -0700 Subject: [PATCH 02/44] remove `#swagger.path` from API docs --- server/endpoints/api/admin/index.js | 5 ----- server/endpoints/api/workspace/index.js | 6 ------ server/endpoints/api/workspaceThread/index.js | 1 - server/swagger/openapi.json | 2 +- 4 files changed, 1 insertion(+), 13 deletions(-) diff --git a/server/endpoints/api/admin/index.js b/server/endpoints/api/admin/index.js index d60a0f26a6..e1eb05450a 100644 --- a/server/endpoints/api/admin/index.js +++ b/server/endpoints/api/admin/index.js @@ -143,7 +143,6 @@ function apiAdminEndpoints(app) { app.post("/v1/admin/users/:id", [validApiKey], async (request, response) => { /* #swagger.tags = ['Admin'] - #swagger.path = '/v1/admin/users/{id}' #swagger.parameters['id'] = { in: 'path', description: 'id of the user in the database.', @@ -221,7 +220,6 @@ function apiAdminEndpoints(app) { /* #swagger.tags = ['Admin'] #swagger.description = 'Delete existing user by id. Methods are disabled until multi user mode is enabled via the UI.' - #swagger.path = '/v1/admin/users/{id}' #swagger.parameters['id'] = { in: 'path', description: 'id of the user in the database.', @@ -382,7 +380,6 @@ function apiAdminEndpoints(app) { /* #swagger.tags = ['Admin'] #swagger.description = 'Deactivates (soft-delete) invite by id. Methods are disabled until multi user mode is enabled via the UI.' - #swagger.path = '/v1/admin/invite/{id}' #swagger.parameters['id'] = { in: 'path', description: 'id of the invite in the database.', @@ -432,7 +429,6 @@ function apiAdminEndpoints(app) { async (request, response) => { /* #swagger.tags = ['Admin'] - #swagger.path = '/v1/admin/workspaces/{workspaceId}/users' #swagger.parameters['workspaceId'] = { in: 'path', description: 'id of the workspace.', @@ -487,7 +483,6 @@ function apiAdminEndpoints(app) { async (request, response) => { /* #swagger.tags = ['Admin'] - #swagger.path = '/v1/admin/workspaces/{workspaceId}/update-users' #swagger.parameters['workspaceId'] = { in: 'path', description: 'id of the workspace in the database.', diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 3d4e90fb4d..c9a6cb51e5 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -142,7 +142,6 @@ function apiWorkspaceEndpoints(app) { /* #swagger.tags = ['Workspaces'] #swagger.description = 'Get a workspace by its unique slug.' - #swagger.path = '/v1/workspace/{slug}' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace to find', @@ -209,7 +208,6 @@ function apiWorkspaceEndpoints(app) { /* #swagger.tags = ['Workspaces'] #swagger.description = 'Deletes a workspace by its slug.' - #swagger.path = '/v1/workspace/{slug}' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace to delete', @@ -261,7 +259,6 @@ function apiWorkspaceEndpoints(app) { /* #swagger.tags = ['Workspaces'] #swagger.description = 'Update workspace settings by its unique slug.' - #swagger.path = '/v1/workspace/{slug}/update' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace to find', @@ -341,7 +338,6 @@ function apiWorkspaceEndpoints(app) { /* #swagger.tags = ['Workspaces'] #swagger.description = 'Get a workspaces chats regardless of user by its unique slug.' - #swagger.path = '/v1/workspace/{slug}/chats' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace to find', @@ -402,7 +398,6 @@ function apiWorkspaceEndpoints(app) { /* #swagger.tags = ['Workspaces'] #swagger.description = 'Add or remove documents from a workspace by its unique slug.' - #swagger.path = '/v1/workspace/{slug}/update-embeddings' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace to find', @@ -481,7 +476,6 @@ function apiWorkspaceEndpoints(app) { /* #swagger.tags = ['Workspaces'] #swagger.description = 'Add or remove pin from a document in a workspace by its unique slug.' - #swagger.path = '/workspace/{slug}/update-pin' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace to find', diff --git a/server/endpoints/api/workspaceThread/index.js b/server/endpoints/api/workspaceThread/index.js index e2c6af1c76..de30e24d0b 100644 --- a/server/endpoints/api/workspaceThread/index.js +++ b/server/endpoints/api/workspaceThread/index.js @@ -110,7 +110,6 @@ function apiWorkspaceThreadEndpoints(app) { /* #swagger.tags = ['Workspace Threads'] #swagger.description = 'Update thread name by its unique slug.' - #swagger.path = '/v1/workspace/{slug}/thread/{threadSlug}/update' #swagger.parameters['slug'] = { in: 'path', description: 'Unique slug of workspace', diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index e14ed576ff..ef41449229 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1846,7 +1846,7 @@ } } }, - "/workspace/{slug}/update-pin": { + "/v1/workspace/{slug}/update-pin": { "post": { "tags": [ "Workspaces" From 1f96b837b3883f2958d0a4845b15e835c7e53319 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Wed, 21 Aug 2024 14:47:06 -0700 Subject: [PATCH 03/44] Refactor api endpoint chat handler to its own function (#2157) remove legacy `chatWithWorkspace` and cleanup `index.js` --- .vscode/settings.json | 1 + server/endpoints/api/workspace/index.js | 37 +- server/endpoints/api/workspaceThread/index.js | 14 +- server/utils/chats/apiChatHandler.js | 481 ++++++++++++++++++ server/utils/chats/index.js | 212 -------- 5 files changed, 514 insertions(+), 231 deletions(-) create mode 100644 server/utils/chats/apiChatHandler.js diff --git a/.vscode/settings.json b/.vscode/settings.json index 3fcc79cd5a..549fd15742 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -43,6 +43,7 @@ "searxng", "Serper", "Serply", + "streamable", "textgenwebui", "togetherai", "Unembed", diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index c9a6cb51e5..1fe9ad8dc3 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -4,19 +4,16 @@ const { Telemetry } = require("../../../models/telemetry"); const { DocumentVectors } = require("../../../models/vectors"); const { Workspace } = require("../../../models/workspace"); const { WorkspaceChats } = require("../../../models/workspaceChats"); -const { chatWithWorkspace } = require("../../../utils/chats"); const { getVectorDbClass } = require("../../../utils/helpers"); const { multiUserMode, reqBody } = require("../../../utils/http"); const { validApiKey } = require("../../../utils/middleware/validApiKey"); -const { - streamChatWithWorkspace, - VALID_CHAT_MODE, -} = require("../../../utils/chats/stream"); +const { VALID_CHAT_MODE } = require("../../../utils/chats/stream"); const { EventLogs } = require("../../../models/eventLogs"); const { convertToChatHistory, writeResponseChunk, } = require("../../../utils/helpers/chat/responses"); +const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler"); function apiWorkspaceEndpoints(app) { if (!app) return; @@ -584,7 +581,7 @@ function apiWorkspaceEndpoints(app) { try { const { slug } = request.params; const { message, mode = "query" } = reqBody(request); - const workspace = await Workspace.get({ slug }); + const workspace = await Workspace.get({ slug: String(slug) }); if (!workspace) { response.status(400).json({ @@ -612,9 +609,17 @@ function apiWorkspaceEndpoints(app) { return; } - const result = await chatWithWorkspace(workspace, message, mode); + const result = await ApiChatHandler.chatSync({ + workspace, + message, + mode, + user: null, + thread: null, + }); + await Telemetry.sendTelemetry("sent_chat", { - LLMSelection: process.env.LLM_PROVIDER || "openai", + LLMSelection: + workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", @@ -623,7 +628,7 @@ function apiWorkspaceEndpoints(app) { workspaceName: workspace?.name, chatModel: workspace?.chatModel || "System Default", }); - response.status(200).json({ ...result }); + return response.status(200).json({ ...result }); } catch (e) { console.error(e.message, e); response.status(500).json({ @@ -702,7 +707,7 @@ function apiWorkspaceEndpoints(app) { try { const { slug } = request.params; const { message, mode = "query" } = reqBody(request); - const workspace = await Workspace.get({ slug }); + const workspace = await Workspace.get({ slug: String(slug) }); if (!workspace) { response.status(400).json({ @@ -736,9 +741,17 @@ function apiWorkspaceEndpoints(app) { response.setHeader("Connection", "keep-alive"); response.flushHeaders(); - await streamChatWithWorkspace(response, workspace, message, mode); + await ApiChatHandler.streamChat({ + response, + workspace, + message, + mode, + user: null, + thread: null, + }); await Telemetry.sendTelemetry("sent_chat", { - LLMSelection: process.env.LLM_PROVIDER || "openai", + LLMSelection: + workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", diff --git a/server/endpoints/api/workspaceThread/index.js b/server/endpoints/api/workspaceThread/index.js index de30e24d0b..cdc4d598cf 100644 --- a/server/endpoints/api/workspaceThread/index.js +++ b/server/endpoints/api/workspaceThread/index.js @@ -3,7 +3,6 @@ const { WorkspaceThread } = require("../../../models/workspaceThread"); const { Workspace } = require("../../../models/workspace"); const { validApiKey } = require("../../../utils/middleware/validApiKey"); const { reqBody, multiUserMode } = require("../../../utils/http"); -const { chatWithWorkspace } = require("../../../utils/chats"); const { streamChatWithWorkspace, VALID_CHAT_MODE, @@ -16,6 +15,7 @@ const { } = require("../../../utils/helpers/chat/responses"); const { WorkspaceChats } = require("../../../models/workspaceChats"); const { User } = require("../../../models/user"); +const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler"); function apiWorkspaceThreadEndpoints(app) { if (!app) return; @@ -405,13 +405,13 @@ function apiWorkspaceThreadEndpoints(app) { } const user = userId ? await User.get({ id: Number(userId) }) : null; - const result = await chatWithWorkspace( + const result = await ApiChatHandler.chatSync({ workspace, message, mode, user, - thread - ); + thread, + }); await Telemetry.sendTelemetry("sent_chat", { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", @@ -556,14 +556,14 @@ function apiWorkspaceThreadEndpoints(app) { response.setHeader("Connection", "keep-alive"); response.flushHeaders(); - await streamChatWithWorkspace( + await ApiChatHandler.streamChat({ response, workspace, message, mode, user, - thread - ); + thread, + }); await Telemetry.sendTelemetry("sent_chat", { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", diff --git a/server/utils/chats/apiChatHandler.js b/server/utils/chats/apiChatHandler.js new file mode 100644 index 0000000000..a52e2da14f --- /dev/null +++ b/server/utils/chats/apiChatHandler.js @@ -0,0 +1,481 @@ +const { v4: uuidv4 } = require("uuid"); +const { DocumentManager } = require("../DocumentManager"); +const { WorkspaceChats } = require("../../models/workspaceChats"); +const { getVectorDbClass, getLLMProvider } = require("../helpers"); +const { writeResponseChunk } = require("../helpers/chat/responses"); +const { chatPrompt, sourceIdentifier, recentChatHistory } = require("./index"); + +/** + * @typedef ResponseObject + * @property {string} id - uuid of response + * @property {string} type - Type of response + * @property {string|null} textResponse - full text response + * @property {object[]} sources + * @property {boolean} close + * @property {string|null} error + */ + +/** + * Handle synchronous chats with your workspace via the developer API endpoint + * @param {{ + * workspace: import("@prisma/client").workspaces, + * message:string, + * mode: "chat"|"query", + * user: import("@prisma/client").users|null, + * thread: import("@prisma/client").workspace_threads|null, + * }} parameters + * @returns {Promise} + */ +async function chatSync({ + workspace, + message = null, + mode = "chat", + user = null, + thread = null, +}) { + const uuid = uuidv4(); + const chatMode = mode ?? "chat"; + const LLMConnector = getLLMProvider({ + provider: workspace?.chatProvider, + model: workspace?.chatModel, + }); + const VectorDb = getVectorDbClass(); + const messageLimit = workspace?.openAiHistory || 20; + const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); + const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); + + // User is trying to query-mode chat a workspace that has no data in it - so + // we should exit early as no information can be found under these conditions. + if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: String(message), + response: { + text: textResponse, + sources: [], + type: chatMode, + }, + include: false, + }); + + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse, + }; + } + + // If we are here we know that we are in a workspace that is: + // 1. Chatting in "chat" mode and may or may _not_ have embeddings + // 2. Chatting in "query" mode and has at least 1 embedding + let contextTexts = []; + let sources = []; + let pinnedDocIdentifiers = []; + const { rawHistory, chatHistory } = await recentChatHistory({ + user, + workspace, + thread, + messageLimit, + chatMode, + }); + + await new DocumentManager({ + workspace, + maxTokens: LLMConnector.promptWindowLimit(), + }) + .pinnedDocs() + .then((pinnedDocs) => { + pinnedDocs.forEach((doc) => { + const { pageContent, ...metadata } = doc; + pinnedDocIdentifiers.push(sourceIdentifier(doc)); + contextTexts.push(doc.pageContent); + sources.push({ + text: + pageContent.slice(0, 1_000) + + "...continued on in source document...", + ...metadata, + }); + }); + }); + + const vectorSearchResults = + embeddingsCount !== 0 + ? await VectorDb.performSimilaritySearch({ + namespace: workspace.slug, + input: message, + LLMConnector, + similarityThreshold: workspace?.similarityThreshold, + topN: workspace?.topN, + filterIdentifiers: pinnedDocIdentifiers, + }) + : { + contextTexts: [], + sources: [], + message: null, + }; + + // Failed similarity search if it was run at all and failed. + if (!!vectorSearchResults.message) { + return { + id: uuid, + type: "abort", + textResponse: null, + sources: [], + close: true, + error: vectorSearchResults.message, + }; + } + + const { fillSourceWindow } = require("../helpers/chat"); + const filledSources = fillSourceWindow({ + nDocs: workspace?.topN || 4, + searchResults: vectorSearchResults.sources, + history: rawHistory, + filterIdentifiers: pinnedDocIdentifiers, + }); + + // Why does contextTexts get all the info, but sources only get current search? + // This is to give the ability of the LLM to "comprehend" a contextual response without + // populating the Citations under a response with documents the user "thinks" are irrelevant + // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. + // If a past citation was used to answer the question - that is visible in the history so it logically makes sense + // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. + // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. + contextTexts = [...contextTexts, ...filledSources.contextTexts]; + sources = [...sources, ...vectorSearchResults.sources]; + + // If in query mode and no context chunks are found from search, backfill, or pins - do not + // let the LLM try to hallucinate a response or use general knowledge and exit early + if (chatMode === "query" && contextTexts.length === 0) { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { + text: textResponse, + sources: [], + type: chatMode, + }, + threadId: thread?.id || null, + include: false, + user, + }); + + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse, + }; + } + + // Compress & Assemble message to ensure prompt passes token limit with room for response + // and build system messages based on inputs and history. + const messages = await LLMConnector.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: message, + contextTexts, + chatHistory, + }, + rawHistory + ); + + // Send the text completion. + const textResponse = await LLMConnector.getChatCompletion(messages, { + temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, + }); + + if (!textResponse) { + return { + id: uuid, + type: "abort", + textResponse: null, + sources: [], + close: true, + error: "No text completion could be completed with this input.", + }; + } + + const { chat } = await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { text: textResponse, sources, type: chatMode }, + threadId: thread?.id || null, + user, + }); + + return { + id: uuid, + type: "textResponse", + close: true, + error: null, + chatId: chat.id, + textResponse, + sources, + }; +} + +/** + * Handle streamable HTTP chunks for chats with your workspace via the developer API endpoint + * @param {{ + * response: import("express").Response, + * workspace: import("@prisma/client").workspaces, + * message:string, + * mode: "chat"|"query", + * user: import("@prisma/client").users|null, + * thread: import("@prisma/client").workspace_threads|null, + * }} parameters + * @returns {Promise} + */ +async function streamChat({ + response, + workspace, + message = null, + mode = "chat", + user = null, + thread = null, +}) { + const uuid = uuidv4(); + const chatMode = mode ?? "chat"; + const LLMConnector = getLLMProvider({ + provider: workspace?.chatProvider, + model: workspace?.chatModel, + }); + + const VectorDb = getVectorDbClass(); + const messageLimit = workspace?.openAiHistory || 20; + const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); + const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); + + // User is trying to query-mode chat a workspace that has no data in it - so + // we should exit early as no information can be found under these conditions. + if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse, + sources: [], + attachments: [], + close: true, + error: null, + }); + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { + text: textResponse, + sources: [], + type: chatMode, + attachments: [], + }, + threadId: thread?.id || null, + include: false, + user, + }); + return; + } + + // If we are here we know that we are in a workspace that is: + // 1. Chatting in "chat" mode and may or may _not_ have embeddings + // 2. Chatting in "query" mode and has at least 1 embedding + let completeText; + let contextTexts = []; + let sources = []; + let pinnedDocIdentifiers = []; + const { rawHistory, chatHistory } = await recentChatHistory({ + user, + workspace, + thread, + messageLimit, + }); + + // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search + // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window. + // However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this + // it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than + // what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best + // suited for high-context models. + await new DocumentManager({ + workspace, + maxTokens: LLMConnector.promptWindowLimit(), + }) + .pinnedDocs() + .then((pinnedDocs) => { + pinnedDocs.forEach((doc) => { + const { pageContent, ...metadata } = doc; + pinnedDocIdentifiers.push(sourceIdentifier(doc)); + contextTexts.push(doc.pageContent); + sources.push({ + text: + pageContent.slice(0, 1_000) + + "...continued on in source document...", + ...metadata, + }); + }); + }); + + const vectorSearchResults = + embeddingsCount !== 0 + ? await VectorDb.performSimilaritySearch({ + namespace: workspace.slug, + input: message, + LLMConnector, + similarityThreshold: workspace?.similarityThreshold, + topN: workspace?.topN, + filterIdentifiers: pinnedDocIdentifiers, + }) + : { + contextTexts: [], + sources: [], + message: null, + }; + + // Failed similarity search if it was run at all and failed. + if (!!vectorSearchResults.message) { + writeResponseChunk(response, { + id: uuid, + type: "abort", + textResponse: null, + sources: [], + close: true, + error: vectorSearchResults.message, + }); + return; + } + + const { fillSourceWindow } = require("../helpers/chat"); + const filledSources = fillSourceWindow({ + nDocs: workspace?.topN || 4, + searchResults: vectorSearchResults.sources, + history: rawHistory, + filterIdentifiers: pinnedDocIdentifiers, + }); + + // Why does contextTexts get all the info, but sources only get current search? + // This is to give the ability of the LLM to "comprehend" a contextual response without + // populating the Citations under a response with documents the user "thinks" are irrelevant + // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. + // If a past citation was used to answer the question - that is visible in the history so it logically makes sense + // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. + // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. + contextTexts = [...contextTexts, ...filledSources.contextTexts]; + sources = [...sources, ...vectorSearchResults.sources]; + + // If in query mode and no context chunks are found from search, backfill, or pins - do not + // let the LLM try to hallucinate a response or use general knowledge and exit early + if (chatMode === "query" && contextTexts.length === 0) { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse, + sources: [], + close: true, + error: null, + }); + + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { + text: textResponse, + sources: [], + type: chatMode, + attachments: [], + }, + threadId: thread?.id || null, + include: false, + user, + }); + return; + } + + // Compress & Assemble message to ensure prompt passes token limit with room for response + // and build system messages based on inputs and history. + const messages = await LLMConnector.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: message, + contextTexts, + chatHistory, + }, + rawHistory + ); + + // If streaming is not explicitly enabled for connector + // we do regular waiting of a response and send a single chunk. + if (LLMConnector.streamingEnabled() !== true) { + console.log( + `\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.` + ); + completeText = await LLMConnector.getChatCompletion(messages, { + temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, + }); + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: completeText, + close: true, + error: false, + }); + } else { + const stream = await LLMConnector.streamGetChatCompletion(messages, { + temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, + }); + completeText = await LLMConnector.handleStream(response, stream, { + uuid, + sources, + }); + } + + if (completeText?.length > 0) { + const { chat } = await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { text: completeText, sources, type: chatMode }, + threadId: thread?.id || null, + user, + }); + + writeResponseChunk(response, { + uuid, + type: "finalizeResponseStream", + close: true, + error: false, + chatId: chat.id, + }); + return; + } + + writeResponseChunk(response, { + uuid, + type: "finalizeResponseStream", + close: true, + error: false, + }); + return; +} + +module.exports.ApiChatHandler = { + chatSync, + streamChat, +}; diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js index dd0f6076fa..3ec358728e 100644 --- a/server/utils/chats/index.js +++ b/server/utils/chats/index.js @@ -1,9 +1,7 @@ const { v4: uuidv4 } = require("uuid"); const { WorkspaceChats } = require("../../models/workspaceChats"); const { resetMemory } = require("./commands/reset"); -const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { convertToPromptHistory } = require("../helpers/chat/responses"); -const { DocumentManager } = require("../DocumentManager"); const { SlashCommandPresets } = require("../../models/slashCommandsPresets"); const VALID_COMMANDS = { @@ -34,216 +32,6 @@ async function grepCommand(message, user = null) { return updatedMessage; } -async function chatWithWorkspace( - workspace, - message, - chatMode = "chat", - user = null, - thread = null -) { - const uuid = uuidv4(); - const updatedMessage = await grepCommand(message, user); - - if (Object.keys(VALID_COMMANDS).includes(updatedMessage)) { - return await VALID_COMMANDS[updatedMessage](workspace, message, uuid, user); - } - - const LLMConnector = getLLMProvider({ - provider: workspace?.chatProvider, - model: workspace?.chatModel, - }); - const VectorDb = getVectorDbClass(); - - const messageLimit = workspace?.openAiHistory || 20; - const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); - const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); - - // User is trying to query-mode chat a workspace that has no data in it - so - // we should exit early as no information can be found under these conditions. - if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") { - const textResponse = - workspace?.queryRefusalResponse ?? - "There is no relevant information in this workspace to answer your query."; - - await WorkspaceChats.new({ - workspaceId: workspace.id, - prompt: message, - response: { - text: textResponse, - sources: [], - type: chatMode, - }, - threadId: thread?.id || null, - include: false, - user, - }); - - return { - id: uuid, - type: "textResponse", - sources: [], - close: true, - error: null, - textResponse, - }; - } - - // If we are here we know that we are in a workspace that is: - // 1. Chatting in "chat" mode and may or may _not_ have embeddings - // 2. Chatting in "query" mode and has at least 1 embedding - let contextTexts = []; - let sources = []; - let pinnedDocIdentifiers = []; - const { rawHistory, chatHistory } = await recentChatHistory({ - user, - workspace, - thread, - messageLimit, - chatMode, - }); - - // See stream.js comment for more information on this implementation. - await new DocumentManager({ - workspace, - maxTokens: LLMConnector.promptWindowLimit(), - }) - .pinnedDocs() - .then((pinnedDocs) => { - pinnedDocs.forEach((doc) => { - const { pageContent, ...metadata } = doc; - pinnedDocIdentifiers.push(sourceIdentifier(doc)); - contextTexts.push(doc.pageContent); - sources.push({ - text: - pageContent.slice(0, 1_000) + - "...continued on in source document...", - ...metadata, - }); - }); - }); - - const vectorSearchResults = - embeddingsCount !== 0 - ? await VectorDb.performSimilaritySearch({ - namespace: workspace.slug, - input: message, - LLMConnector, - similarityThreshold: workspace?.similarityThreshold, - topN: workspace?.topN, - filterIdentifiers: pinnedDocIdentifiers, - }) - : { - contextTexts: [], - sources: [], - message: null, - }; - - // Failed similarity search if it was run at all and failed. - if (!!vectorSearchResults.message) { - return { - id: uuid, - type: "abort", - textResponse: null, - sources: [], - close: true, - error: vectorSearchResults.message, - }; - } - - const { fillSourceWindow } = require("../helpers/chat"); - const filledSources = fillSourceWindow({ - nDocs: workspace?.topN || 4, - searchResults: vectorSearchResults.sources, - history: rawHistory, - filterIdentifiers: pinnedDocIdentifiers, - }); - - // Why does contextTexts get all the info, but sources only get current search? - // This is to give the ability of the LLM to "comprehend" a contextual response without - // populating the Citations under a response with documents the user "thinks" are irrelevant - // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. - // If a past citation was used to answer the question - that is visible in the history so it logically makes sense - // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. - // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. - contextTexts = [...contextTexts, ...filledSources.contextTexts]; - sources = [...sources, ...vectorSearchResults.sources]; - - // If in query mode and no context chunks are found from search, backfill, or pins - do not - // let the LLM try to hallucinate a response or use general knowledge and exit early - if (chatMode === "query" && contextTexts.length === 0) { - const textResponse = - workspace?.queryRefusalResponse ?? - "There is no relevant information in this workspace to answer your query."; - - await WorkspaceChats.new({ - workspaceId: workspace.id, - prompt: message, - response: { - text: textResponse, - sources: [], - type: chatMode, - }, - threadId: thread?.id || null, - include: false, - user, - }); - - return { - id: uuid, - type: "textResponse", - sources: [], - close: true, - error: null, - textResponse, - }; - } - - // Compress & Assemble message to ensure prompt passes token limit with room for response - // and build system messages based on inputs and history. - const messages = await LLMConnector.compressMessages( - { - systemPrompt: chatPrompt(workspace), - userPrompt: updatedMessage, - contextTexts, - chatHistory, - }, - rawHistory - ); - - // Send the text completion. - const textResponse = await LLMConnector.getChatCompletion(messages, { - temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, - }); - - if (!textResponse) { - return { - id: uuid, - type: "abort", - textResponse: null, - sources: [], - close: true, - error: "No text completion could be completed with this input.", - }; - } - - const { chat } = await WorkspaceChats.new({ - workspaceId: workspace.id, - prompt: message, - response: { text: textResponse, sources, type: chatMode }, - threadId: thread?.id || null, - user, - }); - return { - id: uuid, - type: "textResponse", - close: true, - error: null, - chatId: chat.id, - textResponse, - sources, - }; -} - async function recentChatHistory({ user = null, workspace, From 2d2e49bc00c5bc5380f32299ecc340063f60377c Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 21 Aug 2024 14:58:56 -0700 Subject: [PATCH 04/44] fix missing export --- server/utils/chats/index.js | 1 - 1 file changed, 1 deletion(-) diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js index 3ec358728e..17fbd1569d 100644 --- a/server/utils/chats/index.js +++ b/server/utils/chats/index.js @@ -73,7 +73,6 @@ function sourceIdentifier(sourceDocument) { module.exports = { sourceIdentifier, recentChatHistory, - chatWithWorkspace, chatPrompt, grepCommand, VALID_COMMANDS, From fdc3add53cf12f4a4215b3c251e811d24de2196e Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Wed, 21 Aug 2024 15:25:47 -0700 Subject: [PATCH 05/44] Api session id support (#2158) * Refactor api endpoint chat handler to its own function remove legacy `chatWithWorkspace` and cleanup `index.js` * Add `sessionId` in dev API to partition chats logically statelessly --- server/endpoints/api/workspace/index.js | 12 ++++++---- server/endpoints/api/workspaceThread/index.js | 1 + server/endpoints/workspaceThreads.js | 1 + server/endpoints/workspaces.js | 1 + server/models/workspaceChats.js | 6 ++++- .../20240821215625_init/migration.sql | 2 ++ server/prisma/schema.prisma | 23 ++++++++++--------- server/swagger/openapi.json | 6 +++-- server/utils/agents/index.js | 1 + server/utils/chats/apiChatHandler.js | 13 ++++++++++- server/utils/chats/index.js | 2 ++ server/utils/helpers/chat/convertTo.js | 6 ++++- 12 files changed, 54 insertions(+), 20 deletions(-) create mode 100644 server/prisma/migrations/20240821215625_init/migration.sql diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 1fe9ad8dc3..694baea982 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -550,7 +550,8 @@ function apiWorkspaceEndpoints(app) { "application/json": { example: { message: "What is AnythingLLM?", - mode: "query | chat" + mode: "query | chat", + sessionId: "identifier-to-partition-chats-by-external-id" } } } @@ -580,7 +581,7 @@ function apiWorkspaceEndpoints(app) { */ try { const { slug } = request.params; - const { message, mode = "query" } = reqBody(request); + const { message, mode = "query", sessionId = null } = reqBody(request); const workspace = await Workspace.get({ slug: String(slug) }); if (!workspace) { @@ -615,6 +616,7 @@ function apiWorkspaceEndpoints(app) { mode, user: null, thread: null, + sessionId: !!sessionId ? String(sessionId) : null, }); await Telemetry.sendTelemetry("sent_chat", { @@ -658,7 +660,8 @@ function apiWorkspaceEndpoints(app) { "application/json": { example: { message: "What is AnythingLLM?", - mode: "query | chat" + mode: "query | chat", + sessionId: "identifier-to-partition-chats-by-external-id" } } } @@ -706,7 +709,7 @@ function apiWorkspaceEndpoints(app) { */ try { const { slug } = request.params; - const { message, mode = "query" } = reqBody(request); + const { message, mode = "query", sessionId = null } = reqBody(request); const workspace = await Workspace.get({ slug: String(slug) }); if (!workspace) { @@ -748,6 +751,7 @@ function apiWorkspaceEndpoints(app) { mode, user: null, thread: null, + sessionId: !!sessionId ? String(sessionId) : null, }); await Telemetry.sendTelemetry("sent_chat", { LLMSelection: diff --git a/server/endpoints/api/workspaceThread/index.js b/server/endpoints/api/workspaceThread/index.js index cdc4d598cf..f8552d73c1 100644 --- a/server/endpoints/api/workspaceThread/index.js +++ b/server/endpoints/api/workspaceThread/index.js @@ -299,6 +299,7 @@ function apiWorkspaceThreadEndpoints(app) { { workspaceId: workspace.id, thread_id: thread.id, + api_session_id: null, // Do not include API session chats. include: true, }, null, diff --git a/server/endpoints/workspaceThreads.js b/server/endpoints/workspaceThreads.js index 4e071992b6..4265039631 100644 --- a/server/endpoints/workspaceThreads.js +++ b/server/endpoints/workspaceThreads.js @@ -138,6 +138,7 @@ function workspaceThreadEndpoints(app) { workspaceId: workspace.id, user_id: user?.id || null, thread_id: thread.id, + api_session_id: null, // Do not include API session chats. include: true, }, null, diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js index 43b0936791..6a6df19347 100644 --- a/server/endpoints/workspaces.js +++ b/server/endpoints/workspaces.js @@ -793,6 +793,7 @@ function workspaceEndpoints(app) { user_id: user?.id, include: true, // only duplicate visible chats thread_id: threadId, + api_session_id: null, // Do not include API session chats. id: { lte: Number(chatId) }, }, null, diff --git a/server/models/workspaceChats.js b/server/models/workspaceChats.js index 52d96c400e..ef474c4ef7 100644 --- a/server/models/workspaceChats.js +++ b/server/models/workspaceChats.js @@ -8,6 +8,7 @@ const WorkspaceChats = { user = null, threadId = null, include = true, + apiSessionId = null, }) { try { const chat = await prisma.workspace_chats.create({ @@ -17,6 +18,7 @@ const WorkspaceChats = { response: JSON.stringify(response), user_id: user?.id || null, thread_id: threadId, + api_session_id: apiSessionId, include, }, }); @@ -40,6 +42,7 @@ const WorkspaceChats = { workspaceId, user_id: userId, thread_id: null, // this function is now only used for the default thread on workspaces and users + api_session_id: null, // do not include api-session chats in the frontend for anyone. include: true, }, ...(limit !== null ? { take: limit } : {}), @@ -63,6 +66,7 @@ const WorkspaceChats = { where: { workspaceId, thread_id: null, // this function is now only used for the default thread on workspaces + api_session_id: null, // do not include api-session chats in the frontend for anyone. include: true, }, ...(limit !== null ? { take: limit } : {}), @@ -196,7 +200,7 @@ const WorkspaceChats = { const user = res.user_id ? await User.get({ id: res.user_id }) : null; res.user = user ? { username: user.username } - : { username: "unknown user" }; + : { username: res.api_session_id !== null ? "API" : "unknown user" }; } return results; diff --git a/server/prisma/migrations/20240821215625_init/migration.sql b/server/prisma/migrations/20240821215625_init/migration.sql new file mode 100644 index 0000000000..35bce1b30e --- /dev/null +++ b/server/prisma/migrations/20240821215625_init/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "workspace_chats" ADD COLUMN "api_session_id" TEXT; diff --git a/server/prisma/schema.prisma b/server/prisma/schema.prisma index f385e66f45..b45e29119d 100644 --- a/server/prisma/schema.prisma +++ b/server/prisma/schema.prisma @@ -170,17 +170,18 @@ model workspace_suggested_messages { } model workspace_chats { - id Int @id @default(autoincrement()) - workspaceId Int - prompt String - response String - include Boolean @default(true) - user_id Int? - thread_id Int? // No relation to prevent whole table migration - createdAt DateTime @default(now()) - lastUpdatedAt DateTime @default(now()) - feedbackScore Boolean? - users users? @relation(fields: [user_id], references: [id], onDelete: Cascade, onUpdate: Cascade) + id Int @id @default(autoincrement()) + workspaceId Int + prompt String + response String + include Boolean @default(true) + user_id Int? + thread_id Int? // No relation to prevent whole table migration + api_session_id String? // String identifier for only the dev API to parition chats in any mode. + createdAt DateTime @default(now()) + lastUpdatedAt DateTime @default(now()) + feedbackScore Boolean? + users users? @relation(fields: [user_id], references: [id], onDelete: Cascade, onUpdate: Cascade) } model workspace_agent_invocations { diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index ef41449229..078e38a884 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1972,7 +1972,8 @@ "application/json": { "example": { "message": "What is AnythingLLM?", - "mode": "query | chat" + "mode": "query | chat", + "sessionId": "identifier-to-partition-chats-by-external-id" } } } @@ -2064,7 +2065,8 @@ "application/json": { "example": { "message": "What is AnythingLLM?", - "mode": "query | chat" + "mode": "query | chat", + "sessionId": "identifier-to-partition-chats-by-external-id" } } } diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index f6c9b3e856..b0654eae19 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -42,6 +42,7 @@ class AgentHandler { workspaceId: this.invocation.workspace_id, user_id: this.invocation.user_id || null, thread_id: this.invocation.thread_id || null, + api_session_id: null, include: true, }, limit, diff --git a/server/utils/chats/apiChatHandler.js b/server/utils/chats/apiChatHandler.js index a52e2da14f..bce341bacc 100644 --- a/server/utils/chats/apiChatHandler.js +++ b/server/utils/chats/apiChatHandler.js @@ -23,6 +23,7 @@ const { chatPrompt, sourceIdentifier, recentChatHistory } = require("./index"); * mode: "chat"|"query", * user: import("@prisma/client").users|null, * thread: import("@prisma/client").workspace_threads|null, + * sessionId: string|null, * }} parameters * @returns {Promise} */ @@ -32,6 +33,7 @@ async function chatSync({ mode = "chat", user = null, thread = null, + sessionId = null, }) { const uuid = uuidv4(); const chatMode = mode ?? "chat"; @@ -60,6 +62,7 @@ async function chatSync({ type: chatMode, }, include: false, + apiSessionId: sessionId, }); return { @@ -83,7 +86,7 @@ async function chatSync({ workspace, thread, messageLimit, - chatMode, + apiSessionId: sessionId, }); await new DocumentManager({ @@ -168,6 +171,7 @@ async function chatSync({ }, threadId: thread?.id || null, include: false, + apiSessionId: sessionId, user, }); @@ -214,6 +218,7 @@ async function chatSync({ prompt: message, response: { text: textResponse, sources, type: chatMode }, threadId: thread?.id || null, + apiSessionId: sessionId, user, }); @@ -237,6 +242,7 @@ async function chatSync({ * mode: "chat"|"query", * user: import("@prisma/client").users|null, * thread: import("@prisma/client").workspace_threads|null, + * sessionId: string|null, * }} parameters * @returns {Promise} */ @@ -247,6 +253,7 @@ async function streamChat({ mode = "chat", user = null, thread = null, + sessionId = null, }) { const uuid = uuidv4(); const chatMode = mode ?? "chat"; @@ -285,6 +292,7 @@ async function streamChat({ attachments: [], }, threadId: thread?.id || null, + apiSessionId: sessionId, include: false, user, }); @@ -303,6 +311,7 @@ async function streamChat({ workspace, thread, messageLimit, + apiSessionId: sessionId, }); // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search @@ -402,6 +411,7 @@ async function streamChat({ attachments: [], }, threadId: thread?.id || null, + apiSessionId: sessionId, include: false, user, }); @@ -453,6 +463,7 @@ async function streamChat({ prompt: message, response: { text: completeText, sources, type: chatMode }, threadId: thread?.id || null, + apiSessionId: sessionId, user, }); diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js index 17fbd1569d..387b70ce70 100644 --- a/server/utils/chats/index.js +++ b/server/utils/chats/index.js @@ -37,6 +37,7 @@ async function recentChatHistory({ workspace, thread = null, messageLimit = 20, + apiSessionId = null, }) { const rawHistory = ( await WorkspaceChats.where( @@ -44,6 +45,7 @@ async function recentChatHistory({ workspaceId: workspace.id, user_id: user?.id || null, thread_id: thread?.id || null, + api_session_id: apiSessionId || null, include: true, }, messageLimit, diff --git a/server/utils/helpers/chat/convertTo.js b/server/utils/helpers/chat/convertTo.js index 962cdc4b6e..a1c0a1bcbd 100644 --- a/server/utils/helpers/chat/convertTo.js +++ b/server/utils/helpers/chat/convertTo.js @@ -50,7 +50,11 @@ async function prepareWorkspaceChatsForExport(format = "jsonl") { const responseJson = JSON.parse(chat.response); return { id: chat.id, - username: chat.user ? chat.user.username : "unknown user", + username: chat.user + ? chat.user.username + : chat.api_session_id !== null + ? "API" + : "unknown user", workspace: chat.workspace ? chat.workspace.name : "unknown workspace", prompt: chat.prompt, response: responseJson.text, From 2de9e492ec16ada194512e6aeac77969d69fa950 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Thu, 22 Aug 2024 13:12:09 -0700 Subject: [PATCH 06/44] Enabled use of `@agent` (and skills) via dev API calls (#2161) * Use `@agent` via dev API * Move EphemeralEventListener to same file as agent --- .../agents/aibitat/plugins/http-socket.js | 87 +++++ server/utils/agents/defaults.js | 58 +-- server/utils/agents/ephemeral.js | 351 ++++++++++++++++++ server/utils/agents/index.js | 24 +- server/utils/chats/apiChatHandler.js | 110 ++++++ 5 files changed, 595 insertions(+), 35 deletions(-) create mode 100644 server/utils/agents/aibitat/plugins/http-socket.js create mode 100644 server/utils/agents/ephemeral.js diff --git a/server/utils/agents/aibitat/plugins/http-socket.js b/server/utils/agents/aibitat/plugins/http-socket.js new file mode 100644 index 0000000000..30c68f6949 --- /dev/null +++ b/server/utils/agents/aibitat/plugins/http-socket.js @@ -0,0 +1,87 @@ +const chalk = require("chalk"); +const { RetryError } = require("../error"); +const { Telemetry } = require("../../../../models/telemetry"); + +/** + * HTTP Interface plugin for Aibitat to emulate a websocket interface in the agent + * framework so we dont have to modify the interface for passing messages and responses + * in REST or WSS. + */ +const httpSocket = { + name: "httpSocket", + startupConfig: { + params: { + handler: { + required: true, + }, + muteUserReply: { + required: false, + default: true, + }, + introspection: { + required: false, + default: true, + }, + }, + }, + plugin: function ({ + handler, + muteUserReply = true, // Do not post messages to "USER" back to frontend. + introspection = false, // when enabled will attach socket to Aibitat object with .introspect method which reports status updates to frontend. + }) { + return { + name: this.name, + setup(aibitat) { + aibitat.onError(async (error) => { + if (!!error?.message) { + console.error(chalk.red(` error: ${error.message}`), error); + aibitat.introspect( + `Error encountered while running: ${error.message}` + ); + } + + if (error instanceof RetryError) { + console.error(chalk.red(` retrying in 60 seconds...`)); + setTimeout(() => { + aibitat.retry(); + }, 60_000); + return; + } + }); + + aibitat.introspect = (messageText) => { + if (!introspection) return; // Dump thoughts when not wanted. + handler.send( + JSON.stringify({ type: "statusResponse", content: messageText }) + ); + }; + + // expose function for sockets across aibitat + // type param must be set or else msg will not be shown or handled in UI. + aibitat.socket = { + send: (type = "__unhandled", content = "") => { + handler.send(JSON.stringify({ type, content })); + }, + }; + + // We can only receive one message response with HTTP + // so we end on first response. + aibitat.onMessage((message) => { + if (message.from !== "USER") + Telemetry.sendTelemetry("agent_chat_sent"); + if (message.from === "USER" && muteUserReply) return; + handler.send(JSON.stringify(message)); + handler.close(); + }); + + aibitat.onTerminate(() => { + handler.close(); + }); + }, + }; + }, +}; + +module.exports = { + httpSocket, +}; diff --git a/server/utils/agents/defaults.js b/server/utils/agents/defaults.js index 796a7bbcb1..a6d30ca15b 100644 --- a/server/utils/agents/defaults.js +++ b/server/utils/agents/defaults.js @@ -22,36 +22,48 @@ const WORKSPACE_AGENT = { AgentPlugins.webScraping.name, // Collector web-scraping ]; - const _setting = ( - await SystemSettings.get({ label: "default_agent_skills" }) - )?.value; - - safeJsonParse(_setting, []).forEach((skillName) => { - if (!AgentPlugins.hasOwnProperty(skillName)) return; - - // This is a plugin module with many sub-children plugins who - // need to be named via `${parent}#${child}` naming convention - if (Array.isArray(AgentPlugins[skillName].plugin)) { - for (const subPlugin of AgentPlugins[skillName].plugin) { - defaultFunctions.push( - `${AgentPlugins[skillName].name}#${subPlugin.name}` - ); - } - return; - } - - // This is normal single-stage plugin - defaultFunctions.push(AgentPlugins[skillName].name); - }); - return { role: Provider.systemPrompt(provider), - functions: defaultFunctions, + functions: [ + ...defaultFunctions, + ...(await agentSkillsFromSystemSettings()), + ], }; }, }; +/** + * Fetches and preloads the names/identifiers for plugins that will be dynamically + * loaded later + * @returns {Promise} + */ +async function agentSkillsFromSystemSettings() { + const systemFunctions = []; + const _setting = (await SystemSettings.get({ label: "default_agent_skills" })) + ?.value; + + safeJsonParse(_setting, []).forEach((skillName) => { + if (!AgentPlugins.hasOwnProperty(skillName)) return; + + // This is a plugin module with many sub-children plugins who + // need to be named via `${parent}#${child}` naming convention + if (Array.isArray(AgentPlugins[skillName].plugin)) { + for (const subPlugin of AgentPlugins[skillName].plugin) { + systemFunctions.push( + `${AgentPlugins[skillName].name}#${subPlugin.name}` + ); + } + return; + } + + // This is normal single-stage plugin + systemFunctions.push(AgentPlugins[skillName].name); + }); + return systemFunctions; +} + module.exports = { USER_AGENT, WORKSPACE_AGENT, + agentSkillsFromSystemSettings, }; diff --git a/server/utils/agents/ephemeral.js b/server/utils/agents/ephemeral.js new file mode 100644 index 0000000000..831fa5032a --- /dev/null +++ b/server/utils/agents/ephemeral.js @@ -0,0 +1,351 @@ +const AIbitat = require("./aibitat"); +const AgentPlugins = require("./aibitat/plugins"); +const { httpSocket } = require("./aibitat/plugins/http-socket.js"); +const { WorkspaceChats } = require("../../models/workspaceChats"); +const { safeJsonParse } = require("../http"); +const { + USER_AGENT, + WORKSPACE_AGENT, + agentSkillsFromSystemSettings, +} = require("./defaults"); +const { AgentHandler } = require("."); +const { + WorkspaceAgentInvocation, +} = require("../../models/workspaceAgentInvocation"); + +/** + * This is an instance and functional Agent handler, but it does not utilize + * sessions or websocket's and is instead a singular one-off agent run that does + * not persist between invocations + */ +class EphemeralAgentHandler extends AgentHandler { + #invocationUUID = null; + #workspace = null; + #userId = null; + #threadId = null; + #sessionId = null; + #prompt = null; + #funcsToLoad = []; + + aibitat = null; + channel = null; + provider = null; + model = null; + + constructor({ + uuid, + workspace, + prompt, + userId = null, + threadId = null, + sessionId = null, + }) { + super({ uuid }); + this.#invocationUUID = uuid; + this.#workspace = workspace; + this.#prompt = prompt; + + this.#userId = userId; + this.#threadId = threadId; + this.#sessionId = sessionId; + } + + log(text, ...args) { + console.log(`\x1b[36m[EphemeralAgentHandler]\x1b[0m ${text}`, ...args); + } + + closeAlert() { + this.log(`End ${this.#invocationUUID}::${this.provider}:${this.model}`); + } + + async #chatHistory(limit = 10) { + try { + const rawHistory = ( + await WorkspaceChats.where( + { + workspaceId: this.#workspace.id, + user_id: this.#userId || null, + thread_id: this.#threadId || null, + api_session_id: this.#sessionId, + include: true, + }, + limit, + { id: "desc" } + ) + ).reverse(); + + const agentHistory = []; + rawHistory.forEach((chatLog) => { + agentHistory.push( + { + from: USER_AGENT.name, + to: WORKSPACE_AGENT.name, + content: chatLog.prompt, + state: "success", + }, + { + from: WORKSPACE_AGENT.name, + to: USER_AGENT.name, + content: safeJsonParse(chatLog.response)?.text || "", + state: "success", + } + ); + }); + return agentHistory; + } catch (e) { + this.log("Error loading chat history", e.message); + return []; + } + } + + /** + * Finds or assumes the model preference value to use for API calls. + * If multi-model loading is supported, we use their agent model selection of the workspace + * If not supported, we attempt to fallback to the system provider value for the LLM preference + * and if that fails - we assume a reasonable base model to exist. + * @returns {string} the model preference value to use in API calls + */ + #fetchModel() { + if (!Object.keys(this.noProviderModelDefault).includes(this.provider)) + return this.#workspace.agentModel || this.providerDefault(); + + // Provider has no reliable default (cant load many models) - so we need to look at system + // for the model param. + const sysModelKey = this.noProviderModelDefault[this.provider]; + if (!!sysModelKey) + return process.env[sysModelKey] ?? this.providerDefault(); + + // If all else fails - look at the provider default list + return this.providerDefault(); + } + + #providerSetupAndCheck() { + this.provider = this.#workspace.agentProvider; + this.model = this.#fetchModel(); + this.log(`Start ${this.#invocationUUID}::${this.provider}:${this.model}`); + this.checkSetup(); + } + + #attachPlugins(args) { + for (const name of this.#funcsToLoad) { + // Load child plugin + if (name.includes("#")) { + const [parent, childPluginName] = name.split("#"); + if (!AgentPlugins.hasOwnProperty(parent)) { + this.log( + `${parent} is not a valid plugin. Skipping inclusion to agent cluster.` + ); + continue; + } + + const childPlugin = AgentPlugins[parent].plugin.find( + (child) => child.name === childPluginName + ); + if (!childPlugin) { + this.log( + `${parent} does not have child plugin named ${childPluginName}. Skipping inclusion to agent cluster.` + ); + continue; + } + + const callOpts = this.parseCallOptions( + args, + childPlugin?.startupConfig?.params, + name + ); + this.aibitat.use(childPlugin.plugin(callOpts)); + this.log( + `Attached ${parent}:${childPluginName} plugin to Agent cluster` + ); + continue; + } + + // Load single-stage plugin. + if (!AgentPlugins.hasOwnProperty(name)) { + this.log( + `${name} is not a valid plugin. Skipping inclusion to agent cluster.` + ); + continue; + } + + const callOpts = this.parseCallOptions( + args, + AgentPlugins[name].startupConfig.params + ); + const AIbitatPlugin = AgentPlugins[name]; + this.aibitat.use(AIbitatPlugin.plugin(callOpts)); + this.log(`Attached ${name} plugin to Agent cluster`); + } + } + + async #loadAgents() { + // Default User agent and workspace agent + this.log(`Attaching user and default agent to Agent cluster.`); + this.aibitat.agent(USER_AGENT.name, await USER_AGENT.getDefinition()); + this.aibitat.agent( + WORKSPACE_AGENT.name, + await WORKSPACE_AGENT.getDefinition(this.provider) + ); + + this.#funcsToLoad = [ + AgentPlugins.docSummarizer.name, + AgentPlugins.webScraping.name, + ...(await agentSkillsFromSystemSettings()), + ]; + } + + async init() { + this.#providerSetupAndCheck(); + return this; + } + + async createAIbitat( + args = { + handler, + } + ) { + this.aibitat = new AIbitat({ + provider: this.provider ?? "openai", + model: this.model ?? "gpt-4o", + chats: await this.#chatHistory(20), + handlerProps: { + log: this.log, + }, + }); + + // Attach HTTP response object if defined for chunk streaming. + this.log(`Attached ${httpSocket.name} plugin to Agent cluster`); + this.aibitat.use( + httpSocket.plugin({ + handler: args.handler, + muteUserReply: true, + introspection: true, + }) + ); + + // Load required agents (Default + custom) + await this.#loadAgents(); + + // Attach all required plugins for functions to operate. + this.#attachPlugins(args); + } + + startAgentCluster() { + return this.aibitat.start({ + from: USER_AGENT.name, + to: this.channel ?? WORKSPACE_AGENT.name, + content: this.#prompt, + }); + } + + /** + * Determine if the message provided is an agent invocation. + * @param {{message:string}} parameters + * @returns {boolean} + */ + static isAgentInvocation({ message }) { + const agentHandles = WorkspaceAgentInvocation.parseAgents(message); + if (agentHandles.length > 0) return true; + return false; + } +} + +const EventEmitter = require("node:events"); +const { writeResponseChunk } = require("../helpers/chat/responses"); + +/** + * This is a special EventEmitter specifically used in the Aibitat agent handler + * that enables us to use HTTP to relay all .introspect and .send events back to an + * http handler instead of websockets, like we do on the frontend. This interface is meant to + * mock a websocket interface for the methods used and bind them to an HTTP method so that the developer + * API can invoke agent calls. + */ +class EphemeralEventListener extends EventEmitter { + messages = []; + constructor() { + super(); + } + + send(jsonData) { + const data = JSON.parse(jsonData); + this.messages.push(data); + this.emit("chunk", data); + } + + close() { + this.emit("closed"); + } + + /** + * Compacts all messages in class and returns them in a condensed format. + * @returns {{thoughts: string[], textResponse: string}} + */ + packMessages() { + const thoughts = []; + let textResponse = null; + for (let msg of this.messages) { + if (msg.type !== "statusResponse") { + textResponse = msg.content; + } else { + thoughts.push(msg.content); + } + } + return { thoughts, textResponse }; + } + + /** + * Waits on the HTTP plugin to emit the 'closed' event from the agentHandler + * so that we can compact and return all the messages in the current queue. + * @returns {Promise<{thoughts: string[], textResponse: string}>} + */ + async waitForClose() { + return new Promise((resolve) => { + this.once("closed", () => resolve(this.packMessages())); + }); + } + + /** + * Streams the events with `writeResponseChunk` over HTTP chunked encoding + * and returns on the close event emission. + * ---------- + * DevNote: Agents do not stream so in here we are simply + * emitting the thoughts and text response as soon as we get them. + * @param {import("express").Response} response + * @param {string} uuid - Unique identifier that is the same across chunks. + * @returns {Promise<{thoughts: string[], textResponse: string}>} + */ + async streamAgentEvents(response, uuid) { + const onChunkHandler = (data) => { + if (data.type === "statusResponse") { + return writeResponseChunk(response, { + id: uuid, + type: "agentThought", + thought: data.content, + sources: [], + attachments: [], + close: false, + error: null, + }); + } + + return writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse: data.content, + sources: [], + attachments: [], + close: true, + error: null, + }); + }; + this.on("chunk", onChunkHandler); + + // Wait for close and after remove chunk listener + return this.waitForClose().then((closedResponse) => { + this.removeListener("chunk", onChunkHandler); + return closedResponse; + }); + } +} + +module.exports = { EphemeralAgentHandler, EphemeralEventListener }; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index b0654eae19..86563d1850 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -10,7 +10,7 @@ const { USER_AGENT, WORKSPACE_AGENT } = require("./defaults"); class AgentHandler { #invocationUUID; #funcsToLoad = []; - #noProviderModelDefault = { + noProviderModelDefault = { azure: "OPEN_MODEL_PREF", lmstudio: "LMSTUDIO_MODEL_PREF", textgenwebui: null, // does not even use `model` in API req @@ -74,7 +74,7 @@ class AgentHandler { } } - #checkSetup() { + checkSetup() { switch (this.provider) { case "openai": if (!process.env.OPEN_AI_KEY) @@ -163,7 +163,7 @@ class AgentHandler { } } - #providerDefault() { + providerDefault() { switch (this.provider) { case "openai": return "gpt-4o"; @@ -210,24 +210,24 @@ class AgentHandler { * @returns {string} the model preference value to use in API calls */ #fetchModel() { - if (!Object.keys(this.#noProviderModelDefault).includes(this.provider)) - return this.invocation.workspace.agentModel || this.#providerDefault(); + if (!Object.keys(this.noProviderModelDefault).includes(this.provider)) + return this.invocation.workspace.agentModel || this.providerDefault(); // Provider has no reliable default (cant load many models) - so we need to look at system // for the model param. - const sysModelKey = this.#noProviderModelDefault[this.provider]; + const sysModelKey = this.noProviderModelDefault[this.provider]; if (!!sysModelKey) - return process.env[sysModelKey] ?? this.#providerDefault(); + return process.env[sysModelKey] ?? this.providerDefault(); // If all else fails - look at the provider default list - return this.#providerDefault(); + return this.providerDefault(); } #providerSetupAndCheck() { this.provider = this.invocation.workspace.agentProvider; this.model = this.#fetchModel(); this.log(`Start ${this.#invocationUUID}::${this.provider}:${this.model}`); - this.#checkSetup(); + this.checkSetup(); } async #validInvocation() { @@ -239,7 +239,7 @@ class AgentHandler { this.invocation = invocation ?? null; } - #parseCallOptions(args, config = {}, pluginName) { + parseCallOptions(args, config = {}, pluginName) { const callOpts = {}; for (const [param, definition] of Object.entries(config)) { if ( @@ -280,7 +280,7 @@ class AgentHandler { continue; } - const callOpts = this.#parseCallOptions( + const callOpts = this.parseCallOptions( args, childPlugin?.startupConfig?.params, name @@ -300,7 +300,7 @@ class AgentHandler { continue; } - const callOpts = this.#parseCallOptions( + const callOpts = this.parseCallOptions( args, AgentPlugins[name].startupConfig.params ); diff --git a/server/utils/chats/apiChatHandler.js b/server/utils/chats/apiChatHandler.js index bce341bacc..3fa475ca1a 100644 --- a/server/utils/chats/apiChatHandler.js +++ b/server/utils/chats/apiChatHandler.js @@ -4,6 +4,11 @@ const { WorkspaceChats } = require("../../models/workspaceChats"); const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { writeResponseChunk } = require("../helpers/chat/responses"); const { chatPrompt, sourceIdentifier, recentChatHistory } = require("./index"); +const { + EphemeralAgentHandler, + EphemeralEventListener, +} = require("../agents/ephemeral"); +const { Telemetry } = require("../../models/telemetry"); /** * @typedef ResponseObject @@ -37,6 +42,59 @@ async function chatSync({ }) { const uuid = uuidv4(); const chatMode = mode ?? "chat"; + + if (EphemeralAgentHandler.isAgentInvocation({ message })) { + await Telemetry.sendTelemetry("agent_chat_started"); + + // Initialize the EphemeralAgentHandler to handle non-continuous + // conversations with agents since this is over REST. + const agentHandler = new EphemeralAgentHandler({ + uuid, + workspace, + prompt: message, + userId: user?.id || null, + threadId: thread?.id || null, + sessionId, + }); + + // Establish event listener that emulates websocket calls + // in Aibitat so that we can keep the same interface in Aibitat + // but use HTTP. + const eventListener = new EphemeralEventListener(); + await agentHandler.init(); + await agentHandler.createAIbitat({ handler: eventListener }); + agentHandler.startAgentCluster(); + + // The cluster has started and now we wait for close event since + // this is a synchronous call for an agent, so we return everything at once. + // After this, we conclude the call as we normally do. + return await eventListener + .waitForClose() + .then(async ({ thoughts, textResponse }) => { + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: String(message), + response: { + text: textResponse, + sources: [], + type: chatMode, + thoughts, + }, + include: false, + apiSessionId: sessionId, + }); + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse, + thoughts, + }; + }); + } + const LLMConnector = getLLMProvider({ provider: workspace?.chatProvider, model: workspace?.chatModel, @@ -257,6 +315,58 @@ async function streamChat({ }) { const uuid = uuidv4(); const chatMode = mode ?? "chat"; + + if (EphemeralAgentHandler.isAgentInvocation({ message })) { + await Telemetry.sendTelemetry("agent_chat_started"); + + // Initialize the EphemeralAgentHandler to handle non-continuous + // conversations with agents since this is over REST. + const agentHandler = new EphemeralAgentHandler({ + uuid, + workspace, + prompt: message, + userId: user?.id || null, + threadId: thread?.id || null, + sessionId, + }); + + // Establish event listener that emulates websocket calls + // in Aibitat so that we can keep the same interface in Aibitat + // but use HTTP. + const eventListener = new EphemeralEventListener(); + await agentHandler.init(); + await agentHandler.createAIbitat({ handler: eventListener }); + agentHandler.startAgentCluster(); + + // The cluster has started and now we wait for close event since + // and stream back any results we get from agents as they come in. + return eventListener + .streamAgentEvents(response, uuid) + .then(async ({ thoughts, textResponse }) => { + console.log({ thoughts, textResponse }); + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: String(message), + response: { + text: textResponse, + sources: [], + type: chatMode, + thoughts, + }, + include: false, + apiSessionId: sessionId, + }); + writeResponseChunk(response, { + uuid, + type: "finalizeResponseStream", + textResponse, + thoughts, + close: true, + error: false, + }); + }); + } + const LLMConnector = getLLMProvider({ provider: workspace?.chatProvider, model: workspace?.chatModel, From 3a3399af94e0b0c83812deb91ef48fb6a7f49f04 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Thu, 22 Aug 2024 15:31:36 -0700 Subject: [PATCH 07/44] Move `embed` to submodule (#2163) * Move `embed` to submodule * update README --- .gitmodules | 4 + README.md | 2 +- embed | 1 + embed/.gitignore | 25 - embed/README.md | 112 - embed/index.html | 17 - embed/jsconfig.json | 10 - embed/package.json | 51 - embed/postcss.config.js | 10 - embed/scripts/updateHljs.mjs | 35 - embed/src/App.jsx | 71 - embed/src/assets/anything-llm-dark.png | Bin 8413 -> 0 bytes embed/src/assets/anything-llm-icon.svg | 5 - .../HistoricalMessage/Actions/index.jsx | 43 - .../ChatHistory/HistoricalMessage/index.jsx | 97 - .../ChatHistory/PromptReply/index.jsx | 111 - .../ChatContainer/ChatHistory/index.jsx | 163 - .../ChatContainer/PromptInput/index.jsx | 102 - .../ChatWindow/ChatContainer/index.jsx | 145 - .../components/ChatWindow/Header/index.jsx | 155 - embed/src/components/ChatWindow/index.jsx | 99 - embed/src/components/Head.jsx | 131 - embed/src/components/OpenButton/index.jsx | 35 - embed/src/components/ResetChat/index.jsx | 20 - embed/src/components/SessionId/index.jsx | 12 - embed/src/components/Sponsor/index.jsx | 17 - embed/src/hooks/chat/useChatHistory.js | 27 - embed/src/hooks/useOpen.js | 16 - embed/src/hooks/useScriptAttributes.js | 104 - embed/src/hooks/useSessionId.js | 29 - embed/src/index.css | 32 - embed/src/main.jsx | 31 - embed/src/models/chatService.js | 109 - embed/src/utils/chat/hljs.js | 88 - embed/src/utils/chat/index.js | 96 - embed/src/utils/chat/markdown.js | 49 - embed/src/utils/constants.js | 15 - embed/src/utils/date.js | 9 - embed/tailwind.config.js | 103 - embed/vite.config.js | 68 - embed/yarn.lock | 3430 ----------------- 41 files changed, 6 insertions(+), 5673 deletions(-) create mode 100644 .gitmodules create mode 160000 embed delete mode 100644 embed/.gitignore delete mode 100644 embed/README.md delete mode 100644 embed/index.html delete mode 100644 embed/jsconfig.json delete mode 100644 embed/package.json delete mode 100644 embed/postcss.config.js delete mode 100644 embed/scripts/updateHljs.mjs delete mode 100644 embed/src/App.jsx delete mode 100644 embed/src/assets/anything-llm-dark.png delete mode 100644 embed/src/assets/anything-llm-icon.svg delete mode 100644 embed/src/components/ChatWindow/ChatContainer/ChatHistory/HistoricalMessage/Actions/index.jsx delete mode 100644 embed/src/components/ChatWindow/ChatContainer/ChatHistory/HistoricalMessage/index.jsx delete mode 100644 embed/src/components/ChatWindow/ChatContainer/ChatHistory/PromptReply/index.jsx delete mode 100644 embed/src/components/ChatWindow/ChatContainer/ChatHistory/index.jsx delete mode 100644 embed/src/components/ChatWindow/ChatContainer/PromptInput/index.jsx delete mode 100644 embed/src/components/ChatWindow/ChatContainer/index.jsx delete mode 100644 embed/src/components/ChatWindow/Header/index.jsx delete mode 100644 embed/src/components/ChatWindow/index.jsx delete mode 100644 embed/src/components/Head.jsx delete mode 100644 embed/src/components/OpenButton/index.jsx delete mode 100644 embed/src/components/ResetChat/index.jsx delete mode 100644 embed/src/components/SessionId/index.jsx delete mode 100644 embed/src/components/Sponsor/index.jsx delete mode 100644 embed/src/hooks/chat/useChatHistory.js delete mode 100644 embed/src/hooks/useOpen.js delete mode 100644 embed/src/hooks/useScriptAttributes.js delete mode 100644 embed/src/hooks/useSessionId.js delete mode 100644 embed/src/index.css delete mode 100644 embed/src/main.jsx delete mode 100644 embed/src/models/chatService.js delete mode 100644 embed/src/utils/chat/hljs.js delete mode 100644 embed/src/utils/chat/index.js delete mode 100644 embed/src/utils/chat/markdown.js delete mode 100644 embed/src/utils/constants.js delete mode 100644 embed/src/utils/date.js delete mode 100644 embed/tailwind.config.js delete mode 100644 embed/vite.config.js delete mode 100644 embed/yarn.lock diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..dfb4bfcaa7 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "embed"] + branch = main + path = embed + url = git@github.com:Mintplex-Labs/anythingllm-embed.git diff --git a/README.md b/README.md index 178fef08e1..e5acb7a03a 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,7 @@ This monorepo consists of three main sections: - `server`: A NodeJS express server to handle all the interactions and do all the vectorDB management and LLM interactions. - `collector`: NodeJS express server that process and parses documents from the UI. - `docker`: Docker instructions and build process + information for building from source. -- `embed`: Code specifically for generation of the [embed widget](./embed/README.md). +- `embed`: Submodule specifically for generation & creation of the [web embed widget](https://github.com/Mintplex-Labs/anythingllm-embed). ## 🛳 Self Hosting diff --git a/embed b/embed new file mode 160000 index 0000000000..22a0848d58 --- /dev/null +++ b/embed @@ -0,0 +1 @@ +Subproject commit 22a0848d58e3a758d85d93d9204a72a65854ea94 diff --git a/embed/.gitignore b/embed/.gitignore deleted file mode 100644 index 4d3751d9a5..0000000000 --- a/embed/.gitignore +++ /dev/null @@ -1,25 +0,0 @@ -# Logs -logs -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* -pnpm-debug.log* -lerna-debug.log* - -node_modules -dist -dist-ssr -*.local - -# Editor directories and files -.vscode/* -!.vscode/extensions.json -!yarn.lock -.idea -.DS_Store -*.suo -*.ntvs* -*.njsproj -*.sln -*.sw? diff --git a/embed/README.md b/embed/README.md deleted file mode 100644 index af6621ba1b..0000000000 --- a/embed/README.md +++ /dev/null @@ -1,112 +0,0 @@ -# AnythingLLM Embedded Chat Widget - -> [!WARNING] -> The use of the AnythingLLM embed is currently in beta. Please request a feature or -> report a bug via a Github Issue if you have any issues. - -> [!WARNING] -> The core AnythingLLM team publishes a pre-built version of the script that is bundled -> with the main application. You can find it at the frontend URL `/embed/anythingllm-chat-widget.min.js`. -> You should only be working in this repo if you are wanting to build your own custom embed. - -This folder of AnythingLLM contains the source code for how the embedded version of AnythingLLM works to provide a public facing interface of your workspace. - -The AnythingLLM Embedded chat widget allows you to expose a workspace and its embedded knowledge base as a chat bubble via a ` -``` - -### `