-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathai_core.py
21 lines (18 loc) · 796 Bytes
/
ai_core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_nomic import NomicEmbeddings
def load_documents(urls):
docs = [WebBaseLoader(url).load() for url in urls]
return [item for sublist in docs for item in sublist]
def split_documents(docs):
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=250, chunk_overlap=0
)
return text_splitter.split_documents(docs)
def create_vectorstore(doc_splits):
return Chroma.from_documents(
documents=doc_splits,
collection_name="rag-chroma",
embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local"),
)