Skip to content

Commit ebd4f66

Browse files
committed
wip
1 parent cd93953 commit ebd4f66

10 files changed

+130
-21
lines changed

config/knowledge_base.php

+5-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
'timeout' => env('KNOWLEDGE_BASE_TIMEOUT', 90),
77
],
88
'models' => [
9-
'knowledge_base_id' => \Borah\KnowledgeBase\Models\KnowledgeBaseId::class,
9+
'knowledge_base_chunk' => \Borah\KnowledgeBase\Models\KnowledgeBaseChunk::class,
10+
],
11+
'chunking' => [
12+
'size' => env('KNOWLEDGE_BASE_CHUNK_SIZE', 1000),
13+
'overlap' => env('KNOWLEDGE_BASE_CHUNK_OVERLAP', 100),
1014
],
1115
];

database/migrations/2024_01_01_000000_create_knowledge_base_ids_table.php renamed to database/migrations/2024_01_01_000000_create_knowledge_base_chunks_table.php

+3-1
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
*/
1212
public function up(): void
1313
{
14-
Schema::create('knowledge_base_ids', function (Blueprint $table) {
14+
Schema::create('knowledge_base_chunks', function (Blueprint $table) {
1515
$table->uuid('id')->primary();
16+
$table->unsignedInteger('order');
17+
$table->mediumText('text');
1618
$table->morphs('model');
1719
});
1820
}

src/Client/KnowledgeBaseClient.php

+33
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Borah\KnowledgeBase\Client;
44

5+
use Borah\KnowledgeBase\DTO\KnowledgeBaseChunkItem;
56
use Borah\KnowledgeBase\DTO\KnowledgeBaseQueryResponse;
67
use Illuminate\Http\Client\PendingRequest;
78
use Illuminate\Support\Facades\Http;
@@ -27,6 +28,7 @@ public function __construct()
2728
*/
2829
public function upsert(array $data): bool
2930
{
31+
logger()->debug('[KnowledgeBaseClient] Upserting data', $data);
3032
return $this->client->post('/insert', [
3133
'data' => $data,
3234
])
@@ -42,6 +44,7 @@ public function upsert(array $data): bool
4244
*/
4345
public function destroy(mixed $id): bool
4446
{
47+
logger()->debug('[KnowledgeBaseClient] Deleting record', ['id' => $id]);
4548
return $this->client->delete('/delete/'.$id)
4649
->throw()
4750
->json('success') ?: false;
@@ -57,6 +60,12 @@ public function destroy(mixed $id): bool
5760
*/
5861
public function query(string $text, int $k = 10, ?array $entities = null, ?array $where = null): KnowledgeBaseQueryResponse
5962
{
63+
logger()->debug('[KnowledgeBaseClient] Querying knowledge base', [
64+
'text' => $text,
65+
'k' => $k,
66+
'entities' => $entities,
67+
'where' => $where,
68+
]);
6069
$params = [
6170
'query' => $text,
6271
'k' => $k,
@@ -76,4 +85,28 @@ public function query(string $text, int $k = 10, ?array $entities = null, ?array
7685

7786
return KnowledgeBaseQueryResponse::from($response);
7887
}
88+
89+
/**
90+
* Generates chunks of text for the selected records.
91+
*
92+
* @param array<\Borah\KnowledgeBase\DTO\KnowledgeBaseChunkItem> $records
93+
*
94+
* @return array<\Borah\KnowledgeBase\DTO\KnowledgeBaseChunkItem>
95+
*/
96+
public function chunk(array $records, ?int $chunkSize = null, ?int $chunkOverlap = null): array
97+
{
98+
logger()->debug('[KnowledgeBaseClient] Chunking data');
99+
$response = $this->client->post('/chunk', [
100+
'data' => $records,
101+
'chunk_size' => $chunkSize ?? config('knowledge_base.chunking.size'),
102+
'chunk_overlap' => $chunkOverlap ?? config('knowledge_base.chunking.overlap'),
103+
])
104+
->throw()
105+
->collect('chunks');
106+
107+
return $response->map(fn (array $item) => new KnowledgeBaseChunkItem(
108+
id: $item['id'],
109+
text: $item['text'],
110+
))->all();
111+
}
79112
}

src/DTO/KnowledgeBaseChunkItem.php

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?php
2+
3+
namespace Borah\KnowledgeBase\DTO;
4+
5+
use Illuminate\Contracts\Support\Arrayable;
6+
7+
class KnowledgeBaseChunkItem implements Arrayable
8+
{
9+
public function __construct(
10+
public readonly mixed $id,
11+
public readonly string $text,
12+
) {
13+
14+
}
15+
16+
public function toArray(): array
17+
{
18+
return [
19+
'id' => $this->id,
20+
'text' => $this->text,
21+
];
22+
}
23+
}

src/DTO/KnowledgeBaseQueryResponse.php

+5-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
namespace Borah\KnowledgeBase\DTO;
44

5-
use Borah\KnowledgeBase\Models\KnowledgeBaseId;
5+
use Borah\KnowledgeBase\Models\KnowledgeBaseChunk;
66
use Illuminate\Database\Eloquent\Collection;
77

88
class KnowledgeBaseQueryResponse
@@ -25,14 +25,16 @@ public static function from(array $response): static
2525
public function models(): Collection
2626
{
2727
$ids = collect($this->results)->pluck('id');
28-
$intermediateRecords = KnowledgeBaseId::query()
28+
$intermediateRecords = KnowledgeBaseChunk::query()
2929
->with('model')
3030
->whereIn('id', $ids)
3131
->get();
3232

3333
// sort the records in the same order as the results
3434
$intermediateRecords = $intermediateRecords->sortBy(fn ($record) => array_search($record->id, $ids->toArray()));
3535

36-
return $intermediateRecords->map(fn ($record) => $record->model);
36+
return $intermediateRecords
37+
->map(fn ($record) => $record->model)
38+
->unique();
3739
}
3840
}

src/DTO/KnowledgeBaseQueryResponseItem.php

-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ public function __construct(
88
public readonly mixed $id,
99
public readonly string $entity,
1010
public readonly string $text,
11-
public readonly float $vectorScore,
1211
public readonly float $rerankingScore,
1312
public readonly ?array $payload = null,
1413
) {
@@ -21,7 +20,6 @@ public static function from(array $result): static
2120
id: $result['id'],
2221
entity: $result['entity'],
2322
text: $result['text'],
24-
vectorScore: $result['vector_score'],
2523
rerankingScore: $result['reranking_score'],
2624
payload: $result['payload'] ?? null,
2725
);

src/KnowledgeBase.php

+19-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ public function destroy(Model $model): bool
3232
$client = new KnowledgeBaseClient();
3333
$items = $model->knowledgeInsertItems();
3434

35-
return $client->destroy($items[0]->id);
35+
foreach ($items as $item) {
36+
$client->destroy($item->id);
37+
}
38+
39+
return true;
3640
}
3741

3842
public function query(string $text, int $k = 10, ?array $entities = null, ?array $where = null): KnowledgeBaseQueryResponse
@@ -41,4 +45,18 @@ public function query(string $text, int $k = 10, ?array $entities = null, ?array
4145

4246
return $client->query($text, $k, $entities, $where);
4347
}
48+
49+
/**
50+
* Generates chunks of text for the selected records.
51+
*
52+
* @param array<\Borah\KnowledgeBase\DTO\KnowledgeBaseChunkItem> $records
53+
*
54+
* @return array<\Borah\KnowledgeBase\DTO\KnowledgeBaseChunkItem>
55+
*/
56+
public function chunk(array $records, ?int $chunkSize = null, ?int $chunkOverlap = null): array
57+
{
58+
$client = new KnowledgeBaseClient();
59+
60+
return $client->chunk($records, $chunkSize, $chunkOverlap);
61+
}
4462
}

src/KnowledgeBaseServiceProvider.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public function configurePackage(Package $package): void
1818
$package
1919
->name('knowledge-base-laravel')
2020
->hasConfigFile('knowledge_base')
21-
->hasMigration('2024_01_01_000000_create_knowledge_base_ids_table')
21+
->hasMigration('2024_01_01_000000_create_knowledge_base_chunks_table')
2222
->hasCommand(ReimportKnowledgeBaseCommand::class);
2323
}
2424
}

src/Models/KnowledgeBaseId.php renamed to src/Models/KnowledgeBaseChunk.php

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
use Illuminate\Database\Eloquent\Concerns\HasUuids;
66
use Illuminate\Database\Eloquent\Model;
77

8-
class KnowledgeBaseId extends Model
8+
class KnowledgeBaseChunk extends Model
99
{
1010
use HasUuids;
1111

1212
protected $fillable = [
1313
'id',
1414
'model_type',
1515
'model_id',
16+
'text',
1617
];
1718

1819
public $timestamps = false;

src/Traits/BelongsToKnowledgeBase.php

+39-11
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22

33
namespace Borah\KnowledgeBase\Traits;
44

5+
use Borah\KnowledgeBase\Client\KnowledgeBaseClient;
56
use Borah\KnowledgeBase\DTO\KnowledgeBaseQueryResponse;
67
use Borah\KnowledgeBase\DTO\KnowledgeEmbeddingText;
78
use Borah\KnowledgeBase\DTO\KnowledgeInsertItem;
89
use Borah\KnowledgeBase\Facades\KnowledgeBase;
910
use Illuminate\Database\Eloquent\Model;
10-
use Illuminate\Database\Eloquent\Relations\MorphOne;
11+
use Illuminate\Database\Eloquent\Relations\MorphMany;
1112
use function Illuminate\Events\queueable;
1213

1314
trait BelongsToKnowledgeBase
@@ -27,33 +28,60 @@ public static function bootBelongsToKnowledgeBase()
2728
}));
2829
}
2930

30-
public function knowledgeBaseId(): MorphOne
31+
public function knowledgeBaseChunks(): MorphMany
3132
{
32-
return $this->morphOne(config('knowledge_base.models.knowledge_base_id'), 'model');
33+
return $this->morphMany(config('knowledge_base.models.knowledge_base_chunk'), 'model');
34+
}
35+
36+
public function knowledgeBasePayload(): array
37+
{
38+
return [];
3339
}
3440

3541
/**
3642
* @return KnowledgeInsertItem[]
3743
*/
3844
public function knowledgeInsertItems(): array
3945
{
40-
$knowledgeBaseId = $this->knowledgeBaseId ?? $this->knowledgeBaseId()->create();
4146
$texts = $this->getEmbeddingsTexts();
42-
if (! is_array($texts)) {
47+
if (!is_array($texts)) {
4348
$texts = [$texts];
4449
}
4550

46-
return collect($texts)
47-
->map(fn (KnowledgeEmbeddingText $text) => new KnowledgeInsertItem(
48-
id: $knowledgeBaseId->id,
51+
$items = [];
52+
$existingChunks = $this->knowledgeBaseChunks()->get()->sortBy('order');
53+
foreach ($texts as $i => $text) {
54+
$chunk = $existingChunks[$i] ?? $this->knowledgeBaseChunks()->create([
55+
'text' => $text->text,
56+
'order' => $i,
57+
]);
58+
59+
if (!$chunk->wasRecentlyCreated && $chunk->text !== $text->text) {
60+
$chunk->update(['text' => $text->text]);
61+
}
62+
63+
$items[] = new KnowledgeInsertItem(
64+
id: $chunk->id,
4965
entity: $text->entity,
5066
text: $text->text,
5167
payload: [
52-
...$this->toArray(),
68+
...$this->knowledgeBasePayload(),
5369
'original_record_id' => $this->getKey(),
5470
],
55-
))
56-
->toArray();
71+
);
72+
}
73+
74+
$chunksToDelete = $this->knowledgeBaseChunks()
75+
->where('order', '>=', count($texts))
76+
->select('id', 'order')
77+
->get();
78+
$client = new KnowledgeBaseClient();
79+
foreach ($chunksToDelete as $chunk) {
80+
$client->destroy($chunk->id);
81+
$chunk->delete();
82+
}
83+
84+
return $items;
5785
}
5886

5987
public static function searchInKnowledgeBase(string $query, int $k = 10, ?array $where = null): KnowledgeBaseQueryResponse

0 commit comments

Comments
 (0)