Skip to content

Commit c0c5fdb

Browse files
committed
chore: improve performance
1 parent 30a3eb7 commit c0c5fdb

File tree

8 files changed

+28
-14
lines changed

8 files changed

+28
-14
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pdf-extract = "0.8.2"
4545
rand = "0.9"
4646
rayon = "1.10"
4747
readability = { version = "0.3.0", default-features = false }
48+
regex = "1"
4849
rubato = "0.16"
4950
screencapturekit = "0.3"
5051
serde = "1"

packages/backend/native/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ file-format = { workspace = true }
1313
napi = { workspace = true, features = ["async"] }
1414
napi-derive = { workspace = true }
1515
rand = { workspace = true }
16+
regex = { workspace = true }
1617
sha3 = { workspace = true }
1718
tiktoken-rs = { workspace = true }
1819
v_htmlescape = { workspace = true }

packages/backend/native/src/doc_loader.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,37 @@ use napi::{
44
bindgen_prelude::{AsyncTask, Buffer},
55
Env, JsObject, Result, Task,
66
};
7+
use regex::Regex;
78

89
pub struct Document {
910
inner: Doc,
11+
re: Regex,
1012
}
1113

1214
impl Document {
1315
fn name(&self) -> String {
1416
self.inner.name.clone()
1517
}
1618

19+
fn clean_input(&self, input: &str) -> String {
20+
self
21+
.re
22+
.replace_all(input, "")
23+
.replace("\n", " ")
24+
.replace(" ", " ")
25+
.replace("\x00", "")
26+
.trim()
27+
.to_string()
28+
}
29+
1730
fn chunks(&self, env: Env) -> Result<JsObject> {
1831
let mut array = env.create_array_with_length(self.inner.chunks.len())?;
1932
for (i, chunk) in self.inner.chunks.iter().enumerate() {
33+
let content = self.clean_input(&chunk.content);
34+
2035
let mut obj = env.create_object()?;
2136
obj.set_named_property("index", i as i64)?;
22-
obj.set_named_property("content", chunk.content.clone())?;
37+
obj.set_named_property("content", content)?;
2338
array.set_element(i as u32, obj)?;
2439
}
2540
Ok(array)
@@ -45,7 +60,8 @@ impl Task for AsyncParseDocResponse {
4560

4661
fn compute(&mut self) -> Result<Self::Output> {
4762
let doc = Doc::new(&self.file_path, &self.doc).map_err(|e| anyhow!(e))?;
48-
Ok(Document { inner: doc })
63+
let re = Regex::new(r"(Figure|Table)\s+\d+\.").map_err(|e| anyhow!(e))?;
64+
Ok(Document { inner: doc, re })
4965
}
5066

5167
fn resolve(&mut self, env: Env, doc: Document) -> Result<Self::JsValue> {

packages/backend/server/src/__tests__/copilot.e2e.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { randomUUID } from 'node:crypto';
22

3+
import { Package } from '@affine-tools/utils/workspace';
34
import type { TestFn } from 'ava';
45
import ava from 'ava';
56
import Sinon from 'sinon';
@@ -755,7 +756,7 @@ test('should be able to manage context', async t => {
755756

756757
const fs = await import('node:fs');
757758
const buffer = fs.readFileSync(
758-
new URL('../../../../common/native/fixtures/sample.pdf', import.meta.url)
759+
new Package('@affine/native').join('fixtures/sample.pdf').toFileUrl()
759760
);
760761

761762
{

packages/backend/server/src/__tests__/copilot.spec.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { randomUUID } from 'node:crypto';
22

3+
import { Package } from '@affine-tools/utils/workspace';
34
import type { TestFn } from 'ava';
45
import ava from 'ava';
56
import Sinon from 'sinon';
@@ -1330,7 +1331,7 @@ test('should be able to manage context', async t => {
13301331

13311332
const fs = await import('node:fs');
13321333
const buffer = fs.readFileSync(
1333-
new URL('../../../../common/native/fixtures/sample.pdf', import.meta.url)
1334+
new Package('@affine/native').join('fixtures/sample.pdf').toFileUrl()
13341335
);
13351336

13361337
{

packages/backend/server/src/plugins/copilot/context/job.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import {
1010
Config,
1111
EventBus,
1212
JobQueue,
13-
metrics,
1413
OnEvent,
1514
OnJob,
1615
} from '../../../base';
@@ -161,9 +160,6 @@ export class CopilotContextDocJob implements OnModuleInit {
161160
chunkSize: total,
162161
});
163162
} catch (e: any) {
164-
metrics.doc
165-
.counter('auto_embed_pending_files_error')
166-
.add(1, { contextId, fileId });
167163
this.logger.error(
168164
`Failed to embed pending file: ${contextId}::${fileId}`,
169165
e
@@ -200,9 +196,6 @@ export class CopilotContextDocJob implements OnModuleInit {
200196
}
201197
}
202198
} catch (e: any) {
203-
metrics.doc
204-
.counter('auto_embed_pending_docs_error')
205-
.add(1, { workspaceId });
206199
this.logger.error(
207200
`Failed to embed pending doc: ${workspaceId}::${docId}`,
208201
e

packages/backend/server/src/plugins/copilot/context/types.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,10 @@ export abstract class EmbeddingClient {
122122
});
123123
}
124124
const input = doc.chunks.toSorted((a, b) => a.index - b.index);
125-
// chunk input into 2048 every array
125+
// chunk input into 32 every array
126126
const chunks: Chunk[][] = [];
127-
for (let i = 0; i < input.length; i += 2048) {
128-
chunks.push(input.slice(i, i + 2048));
127+
for (let i = 0; i < input.length; i += 32) {
128+
chunks.push(input.slice(i, i + 32));
129129
}
130130
return chunks;
131131
}

0 commit comments

Comments
 (0)