Skip to content

Commit

Permalink
Purge cached docs and remove docs from all workspaces on vectorDB/emb…
Browse files Browse the repository at this point in the history
…edder changes (#2819)

* wip remove all docs clear vector db on embedder/vector db change

* purge all cached docs and remove docs from workspaces on vectordb/embedder change

* lint

* remove unneeded console log

* remove reset vector stores endpoint and move to server side updateENV with postUpdate check

* reset embed module

* remove unused import

* simplify deletion process
rescoped document deletion to be more general for speed, everything needs to be reset anyway
fixed issue where unembedded docs not in any workspaces, but cached, were not removed

* add back missing readme file
update warning text modals

---------

Co-authored-by: timothycarambat <[email protected]>
  • Loading branch information
shatfield4 and timothycarambat authored Dec 16, 2024
1 parent d145602 commit ae51061
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 46 deletions.
74 changes: 44 additions & 30 deletions frontend/src/components/ChangeWarning/index.jsx
Original file line number Diff line number Diff line change
@@ -1,46 +1,60 @@
import { Warning } from "@phosphor-icons/react";
import { Warning, X } from "@phosphor-icons/react";

export default function ChangeWarningModal({
warningText = "",
onClose,
onConfirm,
}) {
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative bg-main-gradient rounded-lg shadow">
<div className="flex items-start justify-between p-4 border-b rounded-t border-gray-500/50">
<div className="flex items-center gap-2">
<Warning
className="text-yellow-300 text-lg w-6 h-6"
weight="fill"
/>
<h3 className="text-xl font-semibold text-yellow-300">Warning</h3>
</div>
<div className="w-full max-w-2xl bg-theme-bg-secondary rounded-lg shadow border-2 border-theme-modal-border overflow-hidden z-9999">
<div className="relative p-6 border-b rounded-t border-theme-modal-border">
<div className="w-full flex gap-x-2 items-center">
<Warning className="text-red-500 w-6 h-6" weight="fill" />
<h3 className="text-xl font-semibold text-red-500 overflow-hidden overflow-ellipsis whitespace-nowrap">
WARNING - This action is irreversible
</h3>
</div>
<div className="w-[550px] p-6 text-white">
<p>
{warningText}
<button
onClick={onClose}
type="button"
className="absolute top-4 right-4 transition-all duration-300 bg-transparent rounded-lg text-sm p-1 inline-flex items-center hover:bg-theme-modal-border hover:border-theme-modal-border hover:border-opacity-50 border-transparent border"
>
<X size={24} weight="bold" className="text-white" />
</button>
</div>
<div
className="h-full w-full overflow-y-auto"
style={{ maxHeight: "calc(100vh - 200px)" }}
>
<div className="py-7 px-9 space-y-2 flex-col">
<p className="text-white">
{warningText.split("\\n").map((line, index) => (
<span key={index}>
{line}
<br />
</span>
))}
<br />
<br />
Are you sure you want to proceed?
</p>
</div>

<div className="flex w-full justify-between items-center p-6 space-x-2 border-t rounded-b border-gray-500/50">
<button
onClick={onClose}
type="button"
className="px-4 py-2 rounded-lg text-white hover:bg-red-500 transition-all duration-300"
>
Cancel
</button>
<button
onClick={onConfirm}
className="transition-all duration-300 border border-slate-200 px-4 py-2 rounded-lg text-white text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800 focus:ring-gray-800"
>
Confirm
</button>
</div>
</div>
<div className="flex w-full justify-end items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
<button
onClick={onClose}
type="button"
className="transition-all duration-300 bg-transparent text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Cancel
</button>
<button
onClick={onConfirm}
type="submit"
className="transition-all duration-300 bg-red-500 light:text-white text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Confirm
</button>
</div>
</div>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ export default function GeneralEmbeddingPreference() {
)}
<ModalWrapper isOpen={isOpen}>
<ChangeWarningModal
warningText="Switching the embedding model will break previously embedded documents from working during chat. They will need to un-embed from every workspace and fully removed and re-uploaded so they can be embed by the new embedding model."
warningText="Switching the embedding model will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
onClose={closeModal}
onConfirm={handleSaveSettings}
/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ export default function GeneralVectorDatabase() {
)}
<ModalWrapper isOpen={isOpen}>
<ChangeWarningModal
warningText="Switching the vector database will ignore previously embedded documents and future similarity search results. They will need to be re-added to each workspace."
warningText="Switching the vector database will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
onClose={closeModal}
onConfirm={handleSaveSettings}
/>
Expand Down
34 changes: 22 additions & 12 deletions server/models/vectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@ const DocumentVectors = {
}
},

where: async function (clause = {}, limit) {
try {
const results = await prisma.document_vectors.findMany({
where: clause,
take: limit || undefined,
});
return results;
} catch (error) {
console.error("Where query failed", error);
return [];
}
},

deleteForWorkspace: async function (workspaceId) {
const documents = await Document.forWorkspace(workspaceId);
const docIds = [...new Set(documents.map((doc) => doc.docId))];
Expand All @@ -40,27 +53,24 @@ const DocumentVectors = {
}
},

where: async function (clause = {}, limit) {
deleteIds: async function (ids = []) {
try {
const results = await prisma.document_vectors.findMany({
where: clause,
take: limit || undefined,
await prisma.document_vectors.deleteMany({
where: { id: { in: ids } },
});
return results;
return true;
} catch (error) {
console.error("Where query failed", error);
return [];
console.error("Delete IDs failed", error);
return false;
}
},

deleteIds: async function (ids = []) {
delete: async function (clause = {}) {
try {
await prisma.document_vectors.deleteMany({
where: { id: { in: ids } },
});
await prisma.document_vectors.deleteMany({ where: clause });
return true;
} catch (error) {
console.error("Delete IDs failed", error);
console.error("Delete failed", error);
return false;
}
},
Expand Down
11 changes: 11 additions & 0 deletions server/utils/files/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,16 @@ async function getWatchedDocumentFilenames(filenames = []) {
}, {});
}

/**
* Purges the entire vector-cache folder and recreates it.
* @returns {void}
*/
function purgeEntireVectorCache() {
fs.rmSync(vectorCachePath, { recursive: true, force: true });
fs.mkdirSync(vectorCachePath);
return;
}

module.exports = {
findDocumentInDocuments,
cachedVectorInformation,
Expand All @@ -293,4 +303,5 @@ module.exports = {
isWithin,
documentsPath,
hasVectorCachedFiles,
purgeEntireVectorCache,
};
5 changes: 3 additions & 2 deletions server/utils/helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@

/**
* Gets the systems current vector database provider.
* @param {('pinecone' | 'chroma' | 'lancedb' | 'weaviate' | 'qdrant' | 'milvus' | 'zilliz' | 'astra') | null} getExactly - If provided, this will return an explit provider.
* @returns { BaseVectorDatabaseProvider}
*/
function getVectorDbClass() {
const vectorSelection = process.env.VECTOR_DB || "lancedb";
function getVectorDbClass(getExactly = null) {
const vectorSelection = getExactly ?? process.env.VECTOR_DB ?? "lancedb";
switch (vectorSelection) {
case "pinecone":
const { Pinecone } = require("../vectorDbProviders/pinecone");
Expand Down
23 changes: 23 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const { resetAllVectorStores } = require("../vectorStore/resetAllVectorStores");

const KEY_MAPPING = {
LLMProvider: {
envKey: "LLM_PROVIDER",
Expand Down Expand Up @@ -248,6 +250,7 @@ const KEY_MAPPING = {
EmbeddingEngine: {
envKey: "EMBEDDING_ENGINE",
checks: [supportedEmbeddingModel],
postUpdate: [handleVectorStoreReset],
},
EmbeddingBasePath: {
envKey: "EMBEDDING_BASE_PATH",
Expand All @@ -256,6 +259,7 @@ const KEY_MAPPING = {
EmbeddingModelPref: {
envKey: "EMBEDDING_MODEL_PREF",
checks: [isNotEmpty],
postUpdate: [handleVectorStoreReset],
},
EmbeddingModelMaxChunkLength: {
envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
Expand All @@ -276,6 +280,7 @@ const KEY_MAPPING = {
VectorDB: {
envKey: "VECTOR_DB",
checks: [isNotEmpty, supportedVectorDB],
postUpdate: [handleVectorStoreReset],
},

// Chroma Options
Expand Down Expand Up @@ -878,6 +883,24 @@ function noRestrictedChars(input = "") {
: null;
}

async function handleVectorStoreReset(key, prevValue, nextValue) {
if (prevValue === nextValue) return;
if (key === "VectorDB") {
console.log(
`Vector configuration changed from ${prevValue} to ${nextValue} - resetting ${prevValue} namespaces`
);
return await resetAllVectorStores({ vectorDbKey: prevValue });
}

if (key === "EmbeddingEngine" || key === "EmbeddingModelPref") {
console.log(
`${key} changed from ${prevValue} to ${nextValue} - resetting ${process.env.VECTOR_DB} namespaces`
);
return await resetAllVectorStores({ vectorDbKey: process.env.VECTOR_DB });
}
return false;
}

// This will force update .env variables which for any which reason were not able to be parsed or
// read from an ENV file as this seems to be a complicating step for many so allowing people to write
// to the process will at least alleviate that issue. It does not perform comprehensive validity checks or sanity checks
Expand Down
48 changes: 48 additions & 0 deletions server/utils/vectorStore/resetAllVectorStores.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const { Workspace } = require("../../models/workspace");
const { Document } = require("../../models/documents");
const { DocumentVectors } = require("../../models/vectors");
const { EventLogs } = require("../../models/eventLogs");
const { purgeEntireVectorCache } = require("../files");
const { getVectorDbClass } = require("../helpers");

/**
* Resets all vector database and associated content:
* - Purges the entire vector-cache folder.
* - Deletes all document vectors from the database.
* - Deletes all documents from the database.
* - Deletes all vector db namespaces for each workspace.
* - Logs an event indicating the reset.
* @param {string} vectorDbKey - The _previous_ vector database provider name that we will be resetting.
* @returns {Promise<boolean>} - True if successful, false otherwise.
*/
async function resetAllVectorStores({ vectorDbKey }) {
try {
const workspaces = await Workspace.where();
purgeEntireVectorCache(); // Purges the entire vector-cache folder.
await DocumentVectors.delete(); // Deletes all document vectors from the database.
await Document.delete(); // Deletes all documents from the database.
await EventLogs.logEvent("workspace_vectors_reset", {
reason: "System vector configuration changed",
});

console.log(
"Resetting anythingllm managed vector namespaces for",
vectorDbKey
);
const VectorDb = getVectorDbClass(vectorDbKey);
for (const workspace of workspaces) {
try {
await VectorDb["delete-namespace"]({ namespace: workspace.slug });
} catch (e) {
console.error(e.message);
}
}

return true;
} catch (error) {
console.error("Failed to reset vector stores:", error);
return false;
}
}

module.exports = { resetAllVectorStores };

0 comments on commit ae51061

Please sign in to comment.