Skip to content

Commit

Permalink
Dev (#1662)
Browse files Browse the repository at this point in the history
* docs: add docs and schema for "OS" provider (#1536)

* ignore .env

* ✨ use and cache imports for autocomplete (#1456)

* ✨ use and cache imports for autocomplete

* fix tsc

* add voyage rerank-1

* import Handlebars

* feat: update onboarding w/ embeddings model (#1570)

* chore(gui): remove unused pages

* feat: add embeddings step

* feat: update styles

* feat: copy button updates

* fix: correct pull command for embed model

* fix: remove commented code

* fix: remove commented code

* feat: simplify copy btn props

* chore: rename onboarding selection event

* feat: add provider config

* fix: undo msg name

* remove dead code

* fix: invalid mode check

* fix: remove testing logic

* fix: fullscreen gui retains context when hidden, fixed fullscreen focusing (#1582)

* small UI tweaks

* media query

* feat: add best experience onboarding

* small fixes

* feat: add free trial card to onboarding (#1600)

* feat: add free trial card to onboarding

* add import

* chore: add telemetry for full screen toggle (#1618)

* rerank-lite-1

* basic tests for VS Code extension

* chore: onboarding metrics (#1626)

* fix: pageview tracking

* feat: add onboarding telemetry

* create single `onboardingStatus` type

* improved var naming

* remove console logs

* fix double adding of context providers

* fix cross-platform build validation

* Update troubleshooting.md (#1637)

* add back skip onboarding button

* fix free trial embeddings error

* Nate/indexing fixes (#1642)

* fix pausing of indexing

* don't send empty array to openai embeddings

* catch embeddings errors without stopping entire indexing process

* update version

* changelog

* Update troubleshooting.md (#1646)

* chore: reduce vscode extension bundle size (#1647)

* feat: make disabled state a tooltip (#1653)

* add content-type header to ollama /api/show req

* support legacy OpenAI formatted servers

* Tests for indexing + follow all .gitignore syntax (#1661)

* cleaner indexing progress updates messages

* chunking tests

* first round of testing for walkDir in .ts

* few more tests

* swap fs with ide

* clean up dead code

* replace traverseDirectory

* fix listFolders

* smoother indexing updates for chunking

* ide pathSetp

* absolute paths test

* fix path sep error with abs paths on windows

* clean up tests

* feat: Client Certificate Options Support (#1658)

* feat: support client certificate authentication options

* docs: support client certificate authentication options

* chore: update package.json

* docs: move clientCertificate to it's own example

* update config_schema.json with client cert options

* Add support for the HuggingFace Text Embeddings Inference server (#1657)

Co-authored-by: Rob Leidle <[email protected]>

* update package.json version

---------

Co-authored-by: Patrick Erichsen <[email protected]>
Co-authored-by: Jonah Wagner <[email protected]>
Co-authored-by: 华丽 <[email protected]>
Co-authored-by: Ten <[email protected]>
Co-authored-by: Rob Leidle <[email protected]>
Co-authored-by: Rob Leidle <[email protected]>
  • Loading branch information
7 people authored Jul 4, 2024
1 parent d69b830 commit 882a79d
Show file tree
Hide file tree
Showing 48 changed files with 1,094 additions and 244 deletions.
3 changes: 2 additions & 1 deletion .prompts/test.prompt → .prompts/jest.prompt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ Write unit tests for the above selected code, following each of these instructio
- The tests should be complete and sophisticated
- Give the tests just as chat output, don't edit any file
- Don't explain how to set up `jest`
- Write a single code block, making sure to label with the language being used (e.g. "```typscript")
- Write a single code block, making sure to label with the language being used (e.g. "```typscript")
- Do not under any circumstances mock any functions or modules
16 changes: 11 additions & 5 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,21 @@
"CONTINUE_GLOBAL_DIR": "${workspaceFolder}/binary/.continue"
}
},

{
"name": "Debug Jest Tests",
"type": "node",
"request": "launch",
"name": "Jest All",
"program": "${workspaceFolder}/core/node_modules/.bin/jest",
"args": ["--runInBand"],
"runtimeArgs": [
"--inspect-brk",
"${workspaceRoot}/core/node_modules/.bin/jest",
"${fileBasenameNoExtension}",
"--runInBand",
"--config",
"${workspaceRoot}/core/jest.config.js"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"disableOptimisticBPs": true
"internalConsoleOptions": "neverOpen"
},
{
"type": "chrome",
Expand Down
3 changes: 2 additions & 1 deletion core/config/promptFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import Handlebars from "handlebars";
import path from "path";
import * as YAML from "yaml";
import type { IDE, SlashCommand } from "..";
import { walkDir } from "../indexing/walkDir";
import { stripImages } from "../llm/countTokens.js";
import { renderTemplatedString } from "../llm/llms/index.js";
import { getBasename } from "../util/index.js";
Expand All @@ -18,7 +19,7 @@ export async function getPromptFiles(
return [];
}

const paths = await ide.listWorkspaceContents(dir, false);
const paths = await walkDir(dir, ide, { ignoreFiles: [] });
const results = paths.map(async (path) => {
const content = await ide.readFile(path);
return { path, content };
Expand Down
2 changes: 1 addition & 1 deletion core/config/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,6 @@ declare global {
stackDepth: number,
): Promise<string[]>;
getAvailableThreads(): Promise<Thread[]>;
listWorkspaceContents(directory?: string, useGitIgnore?: boolean): Promise<string[]>;
listFolders(): Promise<string[]>;
getWorkspaceDirs(): Promise<string[]>;
getWorkspaceConfigs(): Promise<ContinueRcJson[]>;
Expand Down Expand Up @@ -639,6 +638,7 @@ declare global {
}
export type EmbeddingsProviderName =
| "huggingface-tei"
| "transformers.js"
| "ollama"
| "openai"
Expand Down
13 changes: 9 additions & 4 deletions core/context/providers/FileContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import {
ContextSubmenuItem,
LoadSubmenuItemsArgs,
} from "../../index.js";
import { getBasename, groupByLastNPathParts, getUniqueFilePath } from "../../util/index.js";
import { walkDir } from "../../indexing/walkDir.js";
import {
getBasename,
getUniqueFilePath,
groupByLastNPathParts,
} from "../../util/index.js";
import { BaseContextProvider } from "../index.js";

const MAX_SUBMENU_ITEMS = 10_000;
Expand Down Expand Up @@ -40,12 +45,12 @@ class FileContextProvider extends BaseContextProvider {
const workspaceDirs = await args.ide.getWorkspaceDirs();
const results = await Promise.all(
workspaceDirs.map((dir) => {
return args.ide.listWorkspaceContents(dir);
return walkDir(dir, args.ide);
}),
);
const files = results.flat().slice(-MAX_SUBMENU_ITEMS);
const files = results.flat().slice(-MAX_SUBMENU_ITEMS);
const fileGroups = groupByLastNPathParts(files, 2);

return files.map((file) => {
return {
id: file,
Expand Down
3 changes: 2 additions & 1 deletion core/context/providers/FileTreeContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
ContextProviderDescription,
ContextProviderExtras,
} from "../../index.js";
import { walkDir } from "../../indexing/walkDir.js";
import { splitPath } from "../../util/index.js";
import { BaseContextProvider } from "../index.js";

Expand Down Expand Up @@ -43,7 +44,7 @@ class FileTreeContextProvider extends BaseContextProvider {
const trees = [];

for (const workspaceDir of workspaceDirs) {
const contents = await extras.ide.listWorkspaceContents(workspaceDir);
const contents = await walkDir(workspaceDir, extras.ide);

const subDirTree: Directory = {
name: splitPath(workspaceDir).pop() ?? "",
Expand Down
13 changes: 9 additions & 4 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,6 @@ export interface IDE {
stackDepth: number,
): Promise<string[]>;
getAvailableThreads(): Promise<Thread[]>;
listWorkspaceContents(
directory?: string,
useGitIgnore?: boolean,
): Promise<string[]>;
listFolders(): Promise<string[]>;
getWorkspaceDirs(): Promise<string[]>;
getWorkspaceConfigs(): Promise<ContinueRcJson[]>;
Expand Down Expand Up @@ -482,6 +478,7 @@ export interface IDE {

// Callbacks
onDidChangeActiveTextEditor(callback: (filepath: string) => void): void;
pathSep(): Promise<string>;
}

// Slash Commands
Expand Down Expand Up @@ -667,6 +664,13 @@ export interface RequestOptions {
headers?: { [key: string]: string };
extraBodyProperties?: { [key: string]: any };
noProxy?: string[];
clientCertificate?: ClientCertificateOptions;
}

export interface ClientCertificateOptions {
cert: string;
key: string;
passphrase?: string;
}

export interface StepWithParams {
Expand Down Expand Up @@ -722,6 +726,7 @@ export interface ModelDescription {
}

export type EmbeddingsProviderName =
| "huggingface-tei"
| "transformers.js"
| "ollama"
| "openai"
Expand Down
6 changes: 3 additions & 3 deletions core/indexing/LanceDbIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ export class LanceDbIndex implements CodebaseIndex {
accumulatedProgress += 1 / results.addTag.length / 3;
yield {
progress: accumulatedProgress,
desc: `Indexing ${path}`,
desc: `Indexing ${getBasename(path)}`,
status: "indexing",
};
}
Expand All @@ -337,7 +337,7 @@ export class LanceDbIndex implements CodebaseIndex {
accumulatedProgress += 1 / toDel.length / 3;
yield {
progress: accumulatedProgress,
desc: `Stashing ${path}`,
desc: `Stashing ${getBasename(path)}`,
status: "indexing",
};
}
Expand All @@ -354,7 +354,7 @@ export class LanceDbIndex implements CodebaseIndex {
accumulatedProgress += 1 / results.del.length / 3;
yield {
progress: accumulatedProgress,
desc: `Removing ${path}`,
desc: `Removing ${getBasename(path)}`,
status: "indexing",
};
}
Expand Down
25 changes: 24 additions & 1 deletion core/indexing/chunk/ChunkCodebaseIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
}
}

const progressReservedForTagging = 0.3;
let accumulatedProgress = 0;

// Compute chunks for new files
const contents = await Promise.all(
results.compute.map(({ path }) => this.readFile(path)),
Expand All @@ -111,8 +114,10 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
handleChunk(chunk);
}

accumulatedProgress =
(i / results.compute.length) * (1 - progressReservedForTagging);
yield {
progress: i / results.compute.length,
progress: accumulatedProgress,
desc: `Chunking ${getBasename(item.path)}`,
status: "indexing",
};
Expand All @@ -134,6 +139,12 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
}

markComplete([item], IndexResultType.AddTag);
accumulatedProgress += 1 / results.addTag.length / 4;
yield {
progress: accumulatedProgress,
desc: `Chunking ${getBasename(item.path)}`,
status: "indexing",
};
}

// Remove tag
Expand All @@ -150,6 +161,12 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
[tagString, item.cacheKey, item.path],
);
markComplete([item], IndexResultType.RemoveTag);
accumulatedProgress += 1 / results.removeTag.length / 4;
yield {
progress: accumulatedProgress,
desc: `Removing ${getBasename(item.path)}`,
status: "indexing",
};
}

// Delete
Expand All @@ -164,6 +181,12 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
]);

markComplete([item], IndexResultType.Delete);
accumulatedProgress += 1 / results.del.length / 4;
yield {
progress: accumulatedProgress,
desc: `Removing ${getBasename(item.path)}`,
status: "indexing",
};
}
}
}
4 changes: 4 additions & 0 deletions core/indexing/chunk/basic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ export function* basicChunker(
contents: string,
maxChunkSize: number,
): Generator<ChunkWithoutID> {
if (contents.trim().length === 0) {
return;
}

let chunkContent = "";
let chunkTokens = 0;
let startLine = 0;
Expand Down
5 changes: 4 additions & 1 deletion core/indexing/chunk/code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ function collapseChildren(
}
code = code.slice(node.startIndex);
let removedChild = false;
while (countTokens(code) > maxChunkSize && collapsedChildren.length > 0) {
while (
countTokens(code.trim()) > maxChunkSize &&
collapsedChildren.length > 0
) {
removedChild = true;
// Remove children starting at the end - TODO: Add multiple chunks so no children are missing
const childCode = collapsedChildren.pop()!;
Expand Down
103 changes: 103 additions & 0 deletions core/indexing/embeddings/HuggingFaceTEIEmbeddingsProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import fetch, { Response } from "node-fetch";
import { EmbedOptions, FetchFunction } from "../..";
import { withExponentialBackoff } from "../../util/withExponentialBackoff";
import BaseEmbeddingsProvider from "./BaseEmbeddingsProvider";

class HuggingFaceTEIEmbeddingsProvider extends BaseEmbeddingsProvider {
private maxBatchSize = 32;

static defaultOptions: Partial<EmbedOptions> | undefined = {
apiBase: "http://localhost:8080",
model: "tei",
};

constructor(options: EmbedOptions, fetch: FetchFunction) {
super(options, fetch);
// without this extra slash the last portion of the path will be dropped from the URL when using the node.js URL constructor
if (!this.options.apiBase?.endsWith("/")) {
this.options.apiBase += "/";
}
this.doInfoRequest().then(response => {
this.options.model = response.model_id;
this.maxBatchSize = response.max_client_batch_size;
});
}

async embed(chunks: string[]) {
const promises = [];
for (let i = 0; i < chunks.length; i += this.maxBatchSize) {
promises.push(this.doEmbedRequest(chunks.slice(i, i + this.maxBatchSize)));
}
const results = await Promise.all(promises);
return results.flat();
}

async doEmbedRequest(batch: string[]): Promise<number[][]> {
const resp = await withExponentialBackoff<Response>(() =>
this.fetch(new URL("embed", this.options.apiBase), {
method: "POST",
body: JSON.stringify({
inputs: batch
}),
headers: {
"Content-Type": "application/json",
}
}),
);
if (!resp.ok) {
const text = await resp.text();
const embedError = JSON.parse(text) as TEIEmbedErrorResponse;
if (!embedError.error_type || !embedError.error) {
throw new Error(text);
}
throw new TEIEmbedError(embedError);
}
return (await resp.json()) as number[][];
}

async doInfoRequest(): Promise<TEIInfoResponse> {
const resp = await withExponentialBackoff<Response>(() =>
this.fetch(new URL("info", this.options.apiBase), {
method: "GET",
}),
);
if (!resp.ok) {
throw new Error(await resp.text());
}
return (await resp.json()) as TEIInfoResponse;
}
}

class TEIEmbedError extends Error {
constructor(teiResponse: TEIEmbedErrorResponse) {
super(JSON.stringify(teiResponse));
}
}

type TEIEmbedErrorResponse = {
error: string
error_type: string
}

type TEIInfoResponse = {
model_id: string;
model_sha: string;
model_dtype: string;
model_type: {
embedding: {
pooling: string;
}
};
max_concurrent_requests: number;
max_input_length: number;
max_batch_tokens: number;
max_batch_requests: number;
max_client_batch_size: number;
auto_truncate: boolean;
tokenization_workers: number;
version: string;
sha: string;
docker_label: string;
};

export default HuggingFaceTEIEmbeddingsProvider;
3 changes: 3 additions & 0 deletions core/indexing/embeddings/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { EmbeddingsProviderName } from "../../index.js";
import BaseEmbeddingsProvider from "./BaseEmbeddingsProvider.js";
import CohereEmbeddingsProvider from "./CohereEmbeddingsProvider.js";
import FreeTrialEmbeddingsProvider from "./FreeTrialEmbeddingsProvider.js";
import HuggingFaceTEIEmbeddingsProvider from "./HuggingFaceTEIEmbeddingsProvider.js";
import OllamaEmbeddingsProvider from "./OllamaEmbeddingsProvider.js";
import OpenAIEmbeddingsProvider from "./OpenAIEmbeddingsProvider.js";
import TransformersJsEmbeddingsProvider from "./TransformersJsEmbeddingsProvider.js";
Expand All @@ -22,5 +23,7 @@ export const allEmbeddingsProviders: Record<
cohere: CohereEmbeddingsProvider,
// eslint-disable-next-line @typescript-eslint/naming-convention
"free-trial": FreeTrialEmbeddingsProvider,
// eslint-disable-next-line @typescript-eslint/naming-convention
"huggingface-tei": HuggingFaceTEIEmbeddingsProvider,
gemini: GeminiEmbeddingsProvider,
};
Loading

0 comments on commit 882a79d

Please sign in to comment.