Azure-Samples
diff --git a/‎.devcontainer/devcontainer.json
+46 b/‎.devcontainer/devcontainer.json
+46
diff --git a/‎.env
+57 b/‎.env
+57
diff --git a/‎.eslintrc.json
+7 b/‎.eslintrc.json
+7
diff --git a/‎.gitignore
+37 b/‎.gitignore
+37
diff --git a/‎Dockerfile
+16 b/‎Dockerfile
+16
diff --git a/‎README.md
+71 b/‎README.md
+71
diff --git a/‎app/api/chat/config/llamacloud/route.ts
+24 b/‎app/api/chat/config/llamacloud/route.ts
+24
diff --git a/‎app/api/chat/config/route.ts
+11 b/‎app/api/chat/config/route.ts
+11
diff --git a/‎app/api/chat/engine/chat.ts
+51 b/‎app/api/chat/engine/chat.ts
+51
diff --git a/‎app/api/chat/engine/generate.ts
+43 b/‎app/api/chat/engine/generate.ts
+43
diff --git a/‎app/api/chat/engine/index.ts
+22 b/‎app/api/chat/engine/index.ts
+22
@@ -0,0 +1,46 @@
+{
+  "image": "mcr.microsoft.com/vscode/devcontainers/typescript-node:dev-20-bullseye",
+  "features": {
+    "ghcr.io/devcontainers-contrib/features/turborepo-npm:1": {},
+    "ghcr.io/devcontainers-contrib/features/typescript:2": {},
+    "ghcr.io/devcontainers/features/python:1": {
+      "version": "3.11",
+      "toolsToInstall": [
+        "flake8",
+        "black",
+        "mypy",
+        "poetry"
+      ]
+    }
+  },
+  "customizations": {
+    "codespaces": {
+      "openFiles": [
+        "README.md"
+      ]
+    },
+    "vscode": {
+      "extensions": [
+        "ms-vscode.typescript-language-features",
+        "esbenp.prettier-vscode",
+        "ms-python.python",
+        "ms-python.black-formatter",
+        "ms-python.vscode-flake8",
+        "ms-python.vscode-pylance"
+      ],
+      "settings": {
+        "python.formatting.provider": "black",
+        "python.languageServer": "Pylance",
+        "python.analysis.typeCheckingMode": "basic"
+      }
+    }
+  },
+  "containerEnv": {
+    "POETRY_VIRTUALENVS_CREATE": "false"
+  },
+  "forwardPorts": [
+    3000,
+    8000
+  ],
+  "postCreateCommand": "npm install"
+}
@@ -0,0 +1,57 @@
+# The Llama Cloud API key.
+# LLAMA_CLOUD_API_KEY=
+
+# The provider for the AI models to use.
+MODEL_PROVIDER=openai
+
+# The name of LLM model to use.
+MODEL=gpt-4o-mini
+
+# Name of the embedding model to use.
+EMBEDDING_MODEL=text-embedding-3-large
+
+# Dimension of the embedding model to use.
+EMBEDDING_DIM=1024
+
+# The questions to help users get started (multi-line).
+# CONVERSATION_STARTERS=
+
+# The OpenAI API key to use.
+# OPENAI_API_KEY=
+
+# Temperature for sampling from the model.
+# LLM_TEMPERATURE=
+
+# Maximum number of tokens to generate.
+# LLM_MAX_TOKENS=
+
+# The number of similar embeddings to return when retrieving documents.
+# TOP_K=
+
+# The directory to store the local storage cache.
+STORAGE_CACHE_DIR=.cache
+
+# FILESERVER_URL_PREFIX is the URL prefix of the server storing the images generated by the interpreter.
+FILESERVER_URL_PREFIX=http://localhost:3000/api/files
+
+# Customize prompt to generate the next question suggestions based on the conversation history.
+# Disable this prompt to disable the next question suggestions feature.
+NEXT_QUESTION_PROMPT="You're a helpful assistant! Your task is to suggest the next question that user might ask. 
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that you might ask next!
+Your answer should be wrapped in three sticks which follows the following format:
+```
+<question 1>
+<question 2>
+<question 3>
+```"
+
+# The system prompt for the AI model.
+SYSTEM_PROMPT="You are a DuckDuckGo search agent. 
+You can use the duckduckgo search tool to get information from the web to answer user questions.
+For better results, you can specify the region parameter to get results from a specific region but it's optional.
+"
+
@@ -0,0 +1,7 @@
+{
+  "extends": ["next/core-web-vitals", "prettier"],
+  "rules": {
+    "max-params": ["error", 4],
+    "prefer-const": "error"
+  }
+}
@@ -0,0 +1,37 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# local env files
+.env*.local
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
+
+output/
@@ -0,0 +1,16 @@
+FROM node:20-alpine as build
+
+WORKDIR /app
+
+# Install dependencies
+COPY package.json package-lock.* ./
+RUN npm install
+
+# Build the application
+COPY . .
+RUN npm run build
+
+# ====================================
+FROM build as release
+
+CMD ["npm", "run", "start"]
@@ -0,0 +1,71 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
+
+## Getting Started
+
+First, install the dependencies:
+
+```
+npm install
+```
+
+Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
+
+```
+npm run generate
+```
+
+Third, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
+You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+
+This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
+
+## Using Docker
+
+1. Build an image for the Next.js app:
+
+```
+docker build -t <your_app_image_name> .
+```
+
+2. Generate embeddings:
+
+Parse the data and generate the vector embeddings if the `./data` folder exists - otherwise, skip this step:
+
+```
+docker run \
+  --rm \
+  -v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
+  -v $(pwd)/config:/app/config \
+  -v $(pwd)/data:/app/data \
+  -v $(pwd)/cache:/app/cache \ # Use your file system to store the vector database
+  <your_app_image_name> \
+  npm run generate
+```
+
+3. Start the app:
+
+```
+docker run \
+  --rm \
+  -v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
+  -v $(pwd)/config:/app/config \
+  -v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
+  -p 3000:3000 \
+  <your_app_image_name>
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,24 @@
+import { LLamaCloudFileService } from "llamaindex";
+import { NextResponse } from "next/server";
+
+/**
+ * This API is to get config from the backend envs and expose them to the frontend
+ */
+export async function GET() {
+  if (!process.env.LLAMA_CLOUD_API_KEY) {
+    return NextResponse.json(
+      {
+        error: "env variable LLAMA_CLOUD_API_KEY is required to use LlamaCloud",
+      },
+      { status: 500 },
+    );
+  }
+  const config = {
+    projects: await LLamaCloudFileService.getAllProjectsWithPipelines(),
+    pipeline: {
+      pipeline: process.env.LLAMA_CLOUD_INDEX_NAME,
+      project: process.env.LLAMA_CLOUD_PROJECT_NAME,
+    },
+  };
+  return NextResponse.json(config, { status: 200 });
+}
@@ -0,0 +1,11 @@
+import { NextResponse } from "next/server";
+
+/**
+ * This API is to get config from the backend envs and expose them to the frontend
+ */
+export async function GET() {
+  const config = {
+    starterQuestions: process.env.CONVERSATION_STARTERS?.trim().split("\n"),
+  };
+  return NextResponse.json(config, { status: 200 });
+}
@@ -0,0 +1,51 @@
+import {
+  BaseChatEngine,
+  BaseToolWithCall,
+  LLMAgent,
+  QueryEngineTool,
+} from "llamaindex";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { getDataSource } from "./index";
+import { generateFilters } from "./queryFilter";
+import { createTools } from "./tools";
+
+export async function createChatEngine(documentIds?: string[], params?: any) {
+  const tools: BaseToolWithCall[] = [];
+
+  // Add a query engine tool if we have a data source
+  // Delete this code if you don't have a data source
+  const index = await getDataSource(params);
+  if (index) {
+    tools.push(
+      new QueryEngineTool({
+        queryEngine: index.asQueryEngine({
+          preFilters: generateFilters(documentIds || []),
+        }),
+        metadata: {
+          name: "data_query_engine",
+          description: `A query engine for documents from your data source.`,
+        },
+      }),
+    );
+  }
+
+  const configFile = path.join("config", "tools.json");
+  let toolConfig: any;
+  try {
+    // add tools from config file if it exists
+    toolConfig = JSON.parse(await fs.readFile(configFile, "utf8"));
+  } catch (e) {
+    console.info(`Could not read ${configFile} file. Using no tools.`);
+  }
+  if (toolConfig) {
+    tools.push(...(await createTools(toolConfig)));
+  }
+
+  const agent = new LLMAgent({
+    tools,
+    systemPrompt: process.env.SYSTEM_PROMPT,
+  }) as unknown as BaseChatEngine;
+
+  return agent;
+}
@@ -0,0 +1,43 @@
+import { VectorStoreIndex } from "llamaindex";
+import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
+
+import * as dotenv from "dotenv";
+
+import { getDocuments } from "./loader";
+import { initSettings } from "./settings";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+async function getRuntime(func: any) {
+  const start = Date.now();
+  await func();
+  const end = Date.now();
+  return end - start;
+}
+
+async function generateDatasource() {
+  console.log(`Generating storage context...`);
+  // Split documents, create embeddings and store them in the storage context
+  const persistDir = process.env.STORAGE_CACHE_DIR;
+  if (!persistDir) {
+    throw new Error("STORAGE_CACHE_DIR environment variable is required!");
+  }
+  const ms = await getRuntime(async () => {
+    const storageContext = await storageContextFromDefaults({
+      persistDir,
+    });
+    const documents = await getDocuments();
+
+    await VectorStoreIndex.fromDocuments(documents, {
+      storageContext,
+    });
+  });
+  console.log(`Storage context successfully generated in ${ms / 1000}s.`);
+}
+
+(async () => {
+  initSettings();
+  await generateDatasource();
+  console.log("Finished generating storage.");
+})();
@@ -0,0 +1,22 @@
+import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
+import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
+
+export async function getDataSource(params?: any) {
+  const persistDir = process.env.STORAGE_CACHE_DIR;
+  if (!persistDir) {
+    throw new Error("STORAGE_CACHE_DIR environment variable is required!");
+  }
+  const storageContext = await storageContextFromDefaults({
+    persistDir,
+  });
+
+  const numberOfDocs = Object.keys(
+    (storageContext.docStore as SimpleDocumentStore).toDict(),
+  ).length;
+  if (numberOfDocs === 0) {
+    return null;
+  }
+  return await VectorStoreIndex.init({
+    storageContext,
+  });
+}