Skip to content

Commit 8511f1c

Browse files
committed
Initial commit from Create Llama
0 parents  commit 8511f1c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+21920
-0
lines changed

.devcontainer/devcontainer.json

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"image": "mcr.microsoft.com/vscode/devcontainers/typescript-node:dev-20-bullseye",
3+
"features": {
4+
"ghcr.io/devcontainers-contrib/features/turborepo-npm:1": {},
5+
"ghcr.io/devcontainers-contrib/features/typescript:2": {},
6+
"ghcr.io/devcontainers/features/python:1": {
7+
"version": "3.11",
8+
"toolsToInstall": [
9+
"flake8",
10+
"black",
11+
"mypy",
12+
"poetry"
13+
]
14+
}
15+
},
16+
"customizations": {
17+
"codespaces": {
18+
"openFiles": [
19+
"README.md"
20+
]
21+
},
22+
"vscode": {
23+
"extensions": [
24+
"ms-vscode.typescript-language-features",
25+
"esbenp.prettier-vscode",
26+
"ms-python.python",
27+
"ms-python.black-formatter",
28+
"ms-python.vscode-flake8",
29+
"ms-python.vscode-pylance"
30+
],
31+
"settings": {
32+
"python.formatting.provider": "black",
33+
"python.languageServer": "Pylance",
34+
"python.analysis.typeCheckingMode": "basic"
35+
}
36+
}
37+
},
38+
"containerEnv": {
39+
"POETRY_VIRTUALENVS_CREATE": "false"
40+
},
41+
"forwardPorts": [
42+
3000,
43+
8000
44+
],
45+
"postCreateCommand": "npm install"
46+
}

.env

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# The Llama Cloud API key.
2+
# LLAMA_CLOUD_API_KEY=
3+
4+
# The provider for the AI models to use.
5+
MODEL_PROVIDER=openai
6+
7+
# The name of LLM model to use.
8+
MODEL=gpt-4o-mini
9+
10+
# Name of the embedding model to use.
11+
EMBEDDING_MODEL=text-embedding-3-large
12+
13+
# Dimension of the embedding model to use.
14+
EMBEDDING_DIM=1024
15+
16+
# The questions to help users get started (multi-line).
17+
# CONVERSATION_STARTERS=
18+
19+
# The OpenAI API key to use.
20+
# OPENAI_API_KEY=
21+
22+
# Temperature for sampling from the model.
23+
# LLM_TEMPERATURE=
24+
25+
# Maximum number of tokens to generate.
26+
# LLM_MAX_TOKENS=
27+
28+
# The number of similar embeddings to return when retrieving documents.
29+
# TOP_K=
30+
31+
# The directory to store the local storage cache.
32+
STORAGE_CACHE_DIR=.cache
33+
34+
# FILESERVER_URL_PREFIX is the URL prefix of the server storing the images generated by the interpreter.
35+
FILESERVER_URL_PREFIX=http://localhost:3000/api/files
36+
37+
# Customize prompt to generate the next question suggestions based on the conversation history.
38+
# Disable this prompt to disable the next question suggestions feature.
39+
NEXT_QUESTION_PROMPT="You're a helpful assistant! Your task is to suggest the next question that user might ask.
40+
Here is the conversation history
41+
---------------------
42+
{conversation}
43+
---------------------
44+
Given the conversation history, please give me 3 questions that you might ask next!
45+
Your answer should be wrapped in three sticks which follows the following format:
46+
```
47+
<question 1>
48+
<question 2>
49+
<question 3>
50+
```"
51+
52+
# The system prompt for the AI model.
53+
SYSTEM_PROMPT="You are a DuckDuckGo search agent.
54+
You can use the duckduckgo search tool to get information from the web to answer user questions.
55+
For better results, you can specify the region parameter to get results from a specific region but it's optional.
56+
"
57+

.eslintrc.json

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"extends": ["next/core-web-vitals", "prettier"],
3+
"rules": {
4+
"max-params": ["error", 4],
5+
"prefer-const": "error"
6+
}
7+
}

.gitignore

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2+
3+
# dependencies
4+
/node_modules
5+
/.pnp
6+
.pnp.js
7+
8+
# testing
9+
/coverage
10+
11+
# next.js
12+
/.next/
13+
/out/
14+
15+
# production
16+
/build
17+
18+
# misc
19+
.DS_Store
20+
*.pem
21+
22+
# debug
23+
npm-debug.log*
24+
yarn-debug.log*
25+
yarn-error.log*
26+
27+
# local env files
28+
.env*.local
29+
30+
# vercel
31+
.vercel
32+
33+
# typescript
34+
*.tsbuildinfo
35+
next-env.d.ts
36+
37+
output/

Dockerfile

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM node:20-alpine as build
2+
3+
WORKDIR /app
4+
5+
# Install dependencies
6+
COPY package.json package-lock.* ./
7+
RUN npm install
8+
9+
# Build the application
10+
COPY . .
11+
RUN npm run build
12+
13+
# ====================================
14+
FROM build as release
15+
16+
CMD ["npm", "run", "start"]

README.md

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
2+
3+
## Getting Started
4+
5+
First, install the dependencies:
6+
7+
```
8+
npm install
9+
```
10+
11+
Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
12+
13+
```
14+
npm run generate
15+
```
16+
17+
Third, run the development server:
18+
19+
```
20+
npm run dev
21+
```
22+
23+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
24+
25+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
26+
27+
This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
28+
29+
## Using Docker
30+
31+
1. Build an image for the Next.js app:
32+
33+
```
34+
docker build -t <your_app_image_name> .
35+
```
36+
37+
2. Generate embeddings:
38+
39+
Parse the data and generate the vector embeddings if the `./data` folder exists - otherwise, skip this step:
40+
41+
```
42+
docker run \
43+
--rm \
44+
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
45+
-v $(pwd)/config:/app/config \
46+
-v $(pwd)/data:/app/data \
47+
-v $(pwd)/cache:/app/cache \ # Use your file system to store the vector database
48+
<your_app_image_name> \
49+
npm run generate
50+
```
51+
52+
3. Start the app:
53+
54+
```
55+
docker run \
56+
--rm \
57+
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
58+
-v $(pwd)/config:/app/config \
59+
-v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
60+
-p 3000:3000 \
61+
<your_app_image_name>
62+
```
63+
64+
## Learn More
65+
66+
To learn more about LlamaIndex, take a look at the following resources:
67+
68+
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
69+
- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
70+
71+
You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { LLamaCloudFileService } from "llamaindex";
2+
import { NextResponse } from "next/server";
3+
4+
/**
5+
* This API is to get config from the backend envs and expose them to the frontend
6+
*/
7+
export async function GET() {
8+
if (!process.env.LLAMA_CLOUD_API_KEY) {
9+
return NextResponse.json(
10+
{
11+
error: "env variable LLAMA_CLOUD_API_KEY is required to use LlamaCloud",
12+
},
13+
{ status: 500 },
14+
);
15+
}
16+
const config = {
17+
projects: await LLamaCloudFileService.getAllProjectsWithPipelines(),
18+
pipeline: {
19+
pipeline: process.env.LLAMA_CLOUD_INDEX_NAME,
20+
project: process.env.LLAMA_CLOUD_PROJECT_NAME,
21+
},
22+
};
23+
return NextResponse.json(config, { status: 200 });
24+
}

app/api/chat/config/route.ts

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import { NextResponse } from "next/server";
2+
3+
/**
4+
* This API is to get config from the backend envs and expose them to the frontend
5+
*/
6+
export async function GET() {
7+
const config = {
8+
starterQuestions: process.env.CONVERSATION_STARTERS?.trim().split("\n"),
9+
};
10+
return NextResponse.json(config, { status: 200 });
11+
}

app/api/chat/engine/chat.ts

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import {
2+
BaseChatEngine,
3+
BaseToolWithCall,
4+
LLMAgent,
5+
QueryEngineTool,
6+
} from "llamaindex";
7+
import fs from "node:fs/promises";
8+
import path from "node:path";
9+
import { getDataSource } from "./index";
10+
import { generateFilters } from "./queryFilter";
11+
import { createTools } from "./tools";
12+
13+
export async function createChatEngine(documentIds?: string[], params?: any) {
14+
const tools: BaseToolWithCall[] = [];
15+
16+
// Add a query engine tool if we have a data source
17+
// Delete this code if you don't have a data source
18+
const index = await getDataSource(params);
19+
if (index) {
20+
tools.push(
21+
new QueryEngineTool({
22+
queryEngine: index.asQueryEngine({
23+
preFilters: generateFilters(documentIds || []),
24+
}),
25+
metadata: {
26+
name: "data_query_engine",
27+
description: `A query engine for documents from your data source.`,
28+
},
29+
}),
30+
);
31+
}
32+
33+
const configFile = path.join("config", "tools.json");
34+
let toolConfig: any;
35+
try {
36+
// add tools from config file if it exists
37+
toolConfig = JSON.parse(await fs.readFile(configFile, "utf8"));
38+
} catch (e) {
39+
console.info(`Could not read ${configFile} file. Using no tools.`);
40+
}
41+
if (toolConfig) {
42+
tools.push(...(await createTools(toolConfig)));
43+
}
44+
45+
const agent = new LLMAgent({
46+
tools,
47+
systemPrompt: process.env.SYSTEM_PROMPT,
48+
}) as unknown as BaseChatEngine;
49+
50+
return agent;
51+
}

app/api/chat/engine/generate.ts

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { VectorStoreIndex } from "llamaindex";
2+
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
3+
4+
import * as dotenv from "dotenv";
5+
6+
import { getDocuments } from "./loader";
7+
import { initSettings } from "./settings";
8+
9+
// Load environment variables from local .env file
10+
dotenv.config();
11+
12+
async function getRuntime(func: any) {
13+
const start = Date.now();
14+
await func();
15+
const end = Date.now();
16+
return end - start;
17+
}
18+
19+
async function generateDatasource() {
20+
console.log(`Generating storage context...`);
21+
// Split documents, create embeddings and store them in the storage context
22+
const persistDir = process.env.STORAGE_CACHE_DIR;
23+
if (!persistDir) {
24+
throw new Error("STORAGE_CACHE_DIR environment variable is required!");
25+
}
26+
const ms = await getRuntime(async () => {
27+
const storageContext = await storageContextFromDefaults({
28+
persistDir,
29+
});
30+
const documents = await getDocuments();
31+
32+
await VectorStoreIndex.fromDocuments(documents, {
33+
storageContext,
34+
});
35+
});
36+
console.log(`Storage context successfully generated in ${ms / 1000}s.`);
37+
}
38+
39+
(async () => {
40+
initSettings();
41+
await generateDatasource();
42+
console.log("Finished generating storage.");
43+
})();

app/api/chat/engine/index.ts

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
2+
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
3+
4+
export async function getDataSource(params?: any) {
5+
const persistDir = process.env.STORAGE_CACHE_DIR;
6+
if (!persistDir) {
7+
throw new Error("STORAGE_CACHE_DIR environment variable is required!");
8+
}
9+
const storageContext = await storageContextFromDefaults({
10+
persistDir,
11+
});
12+
13+
const numberOfDocs = Object.keys(
14+
(storageContext.docStore as SimpleDocumentStore).toDict(),
15+
).length;
16+
if (numberOfDocs === 0) {
17+
return null;
18+
}
19+
return await VectorStoreIndex.init({
20+
storageContext,
21+
});
22+
}

0 commit comments

Comments
 (0)