@@ -23,7 +23,8 @@ export function findChunksToUpdateAndRemove(
2323 metadata : BookChunk ;
2424 } [ ] ,
2525) : {
26- chunksToUpdate : Document < BookChunk > [ ] ;
26+ contentChanged : Document < BookChunk > [ ] ;
27+ metadataOnlyChanged : Document < BookChunk > [ ] ;
2728 chunksToRemove : string [ ] ;
2829} {
2930 const storedDataMap = new Map (
@@ -33,31 +34,51 @@ export function findChunksToUpdateAndRemove(
3334 freshChunks . map ( ( chunk ) => [ chunk . metadata . uniqueId , chunk ] ) ,
3435 ) ;
3536
36- // Find chunks that need to be updated (content or metadata has changed)
37- const chunksToUpdate = freshChunks . filter ( ( chunk ) => {
38- const storedMetadata = storedDataMap . get ( chunk . metadata . uniqueId ) ;
39- if ( ! storedMetadata ) {
40- // New chunk that doesn't exist in storage
41- return true ;
37+ const contentChanged : Document < BookChunk > [ ] = [ ] ;
38+ const metadataOnlyChanged : Document < BookChunk > [ ] = [ ] ;
39+
40+ for ( const fresh of freshChunks ) {
41+ const stored = storedDataMap . get ( fresh . metadata . uniqueId ) ;
42+ if ( ! stored ) {
43+ // New doc: requires full insert + embedding
44+ contentChanged . push ( fresh ) ;
45+ continue ;
46+ }
47+
48+ const storedHash = stored . contentHash ;
49+ const freshHash = fresh . metadata . contentHash ;
50+ if ( storedHash !== freshHash ) {
51+ // Content changed: re-embed and upsert fully
52+ contentChanged . push ( fresh ) ;
53+ continue ;
4254 }
43- // Update if content hash changed or any metadata field changed
44- for ( const key in chunk . metadata ) {
45- if (
46- storedMetadata [ key as keyof BookChunk ] !==
47- chunk . metadata [ key as keyof BookChunk ]
48- ) {
49- return true ;
55+
56+ // Content same, check if any metadata field differs
57+ const keys = new Set < keyof BookChunk > ( [
58+ ...( Object . keys ( stored ) as ( keyof BookChunk ) [ ] ) ,
59+ ...( Object . keys ( fresh . metadata ) as ( keyof BookChunk ) [ ] ) ,
60+ ] ) ;
61+
62+ let metaDiffers = false ;
63+ for ( const key of keys ) {
64+ // Ignore contentHash here since we already know it's equal
65+ if ( key === 'contentHash' ) continue ;
66+ if ( stored [ key ] !== fresh . metadata [ key ] ) {
67+ metaDiffers = true ;
68+ break ;
5069 }
5170 }
52- return false ;
53- } ) ;
71+ if ( metaDiffers ) {
72+ metadataOnlyChanged . push ( fresh ) ;
73+ }
74+ }
5475
5576 // Find chunks that need to be removed (no longer exist in fresh chunks)
5677 const chunksToRemove = storedChunkHashes
5778 . filter ( ( stored ) => ! freshChunksMap . has ( stored . uniqueId ) )
5879 . map ( ( stored ) => stored . uniqueId ) ;
5980
60- return { chunksToUpdate , chunksToRemove } ;
81+ return { contentChanged , metadataOnlyChanged , chunksToRemove } ;
6182}
6283
6384/**
@@ -80,16 +101,18 @@ export async function updateVectorStore(
80101 await vectorStore . getStoredBookPagesMetadata ( source ) ;
81102
82103 // Find chunks to update and remove
83- const { chunksToUpdate, chunksToRemove } = findChunksToUpdateAndRemove (
84- chunks ,
85- storedChunkHashes ,
86- ) ;
104+ const { contentChanged, metadataOnlyChanged, chunksToRemove } =
105+ findChunksToUpdateAndRemove ( chunks , storedChunkHashes ) ;
87106
88107 logger . info (
89- `Found ${ storedChunkHashes . length } stored chunks for source: ${ source } . ${ chunksToUpdate . length } chunks to update and ${ chunksToRemove . length } chunks to remove ` ,
108+ `Found ${ storedChunkHashes . length } stored chunks for source: ${ source } . ${ contentChanged . length } content changes, ${ metadataOnlyChanged . length } metadata-only changes, and ${ chunksToRemove . length } removals ` ,
90109 ) ;
91110
92- if ( chunksToUpdate . length === 0 && chunksToRemove . length === 0 ) {
111+ if (
112+ contentChanged . length === 0 &&
113+ metadataOnlyChanged . length === 0 &&
114+ chunksToRemove . length === 0
115+ ) {
93116 logger . info ( 'No changes to update or remove' ) ;
94117 return ;
95118 }
@@ -129,13 +152,19 @@ export async function updateVectorStore(
129152 }
130153
131154 // Update chunks that have changed
132- if ( chunksToUpdate . length > 0 ) {
133- await vectorStore . addDocuments ( chunksToUpdate , {
134- ids : chunksToUpdate . map ( ( chunk ) => chunk . metadata . uniqueId ) ,
155+ if ( contentChanged . length > 0 ) {
156+ await vectorStore . addDocuments ( contentChanged , {
157+ ids : contentChanged . map ( ( chunk ) => chunk . metadata . uniqueId ) ,
158+ } ) ;
159+ }
160+
161+ if ( metadataOnlyChanged . length > 0 ) {
162+ await vectorStore . updateDocumentsMetadata ( metadataOnlyChanged , {
163+ ids : metadataOnlyChanged . map ( ( chunk ) => chunk . metadata . uniqueId ) ,
135164 } ) ;
136165 }
137166
138167 logger . info (
139- `Updated ${ chunksToUpdate . length } chunks and removed ${ chunksToRemove . length } chunks for source: ${ source } .` ,
168+ `Updated ${ contentChanged . length } content chunks, ${ metadataOnlyChanged . length } metadata-only chunks, and removed ${ chunksToRemove . length } chunks for source: ${ source } .` ,
140169 ) ;
141170}
0 commit comments