Skip to content

Commit c2a1e8e

Browse files
authored
[ENH]: refactoring log service (#1958)
## Description of changes The goal was to refactor the log service to make it simpler and use simpler abstraction on top of postgres. A next pr in the stack is adding model/property testing. *Summarize the changes made by this PR.* - Improvements & Bug fixes - Create a new log service with update offset position ## Test plan Not tested for now in this branch, another pr is adding property testing.
1 parent 193988d commit c2a1e8e

27 files changed

+1358
-186
lines changed

.gitattributes

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
*_pb2.py* linguist-generated
22
*_pb2_grpc.py* linguist-generated
3+
go/database/**/db/** linguist-generated=true
4+
go/pkg/proto/** linguist-generated=true

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,6 @@ target/
3838

3939
# environment file generated by the Javascript tests
4040
.chroma_env
41+
42+
# Rapid test data
43+
testdata

go/Makefile

+14
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,17 @@ clean:
1616

1717
docker:
1818
docker build -t chroma-coordinator:latest .
19+
20+
21+
DATABABASE_LOG_DIR := database/log
22+
23+
log_db_clean:
24+
rm -rf $(DATABABASE_LOG_DIR)/db
25+
26+
.PHONY: quota_db_generate
27+
log_db_generate: log_db_clean
28+
sqlc generate -f $(DATABABASE_LOG_DIR)/sqlc.yaml
29+
30+
log_db_migration:
31+
atlas migrate diff initial --to file://$(DATABABASE_LOG_DIR)/schema --dev-url "docker://postgres/15/dev" --format '{{ sql . " " }}' --dir file://$(DATABABASE_LOG_DIR)/migrations
32+

go/database/log/atlas.hcl

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
env "dev" {
3+
url = "postgresql://chroma:[email protected]:5432/log?sslmode=disable"
4+
migration {
5+
dir = "file://migrations"
6+
}
7+
}
8+
env "prod" {
9+
url = getenv("DB_URL")
10+
migration {
11+
dir = "file://migrations"
12+
}
13+
}

go/database/log/db/copyfrom.go

+44
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/database/log/db/db.go

+33
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/database/log/db/models.go

+20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/database/log/db/queries.sql.go

+154
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
-- Create "collection" table
2+
CREATE TABLE "public"."collection" (
3+
"id" text NOT NULL,
4+
"record_compaction_offset_position" bigint NOT NULL,
5+
"record_enumeration_offset_position" bigint NOT NULL,
6+
PRIMARY KEY ("id")
7+
);
8+
-- Create "record_log" table
9+
CREATE TABLE "public"."record_log" (
10+
"offset" bigint NOT NULL,
11+
"collection_id" text NOT NULL,
12+
"timestamp" integer NOT NULL DEFAULT (EXTRACT(epoch FROM now()))::integer,
13+
"record" bytea NOT NULL,
14+
PRIMARY KEY ("collection_id", "offset")
15+
);

go/database/log/migrations/atlas.sum

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
h1:l718NRul/xO5Vz4nKzlWAR9ML+kOkn4TTgIlMQYcUZA=
2+
20240401221053_initial.sql h1:RPywT3bZIeCHgfStvajW3fcDhqadDY5xI9MFjE/Un4U=

go/database/log/queries/queries.sql

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
-- name: GetCollectionForUpdate :one
2+
SELECT *
3+
FROM collection
4+
WHERE id = $1
5+
FOR UPDATE;
6+
7+
-- name: InsertRecord :copyfrom
8+
INSERT INTO record_log (collection_id, "offset", record) values($1, $2, $3);
9+
10+
-- name: GetRecordsForCollection :many
11+
SELECT * FROM record_log r WHERE r.collection_id = $1 AND r.offset > $2 ORDER BY r.offset DESC limit $3 ;
12+
13+
-- name: GetAllCollectionsToCompact :many
14+
with summary as (
15+
select r.collection_id, r.offset, r.timestamp, row_number() over(partition by r.collection_id order by r.offset) as rank
16+
from record_log r, collection c
17+
where r.collection_id = c.id
18+
and r.offset > c.record_compaction_offset_position
19+
)
20+
select * from summary
21+
where rank=1
22+
order by timestamp;
23+
24+
-- name: UpdateCollectionCompactionOffsetPosition :exec
25+
UPDATE collection set record_compaction_offset_position = $2 where id = $1;
26+
27+
-- name: UpdateCollectionEnumerationOffsetPosition :exec
28+
UPDATE collection set record_enumeration_offset_position = $2 where id = $1;
29+
30+
-- name: InsertCollection :one
31+
INSERT INTO collection (id, record_enumeration_offset_position, record_compaction_offset_position) values($1, $2, $3) returning *;

go/database/log/schema/collection.sql

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE TABLE collection (
2+
id text PRIMARY KEY,
3+
record_compaction_offset_position bigint NOT NULL,
4+
record_enumeration_offset_position bigint NOT NULL
5+
);
6+
7+
-- The `record_compaction_offset_position` column indicates the offset position of the latest compaction.
8+
-- The `record_enenumeration_offset_position` column denotes the incremental offset for the most recent record in a collection.

go/database/log/schema/record_log.sql

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE TABLE record_log (
2+
"offset" BIGINT NOT NULL,
3+
collection_id text NOT NULL,
4+
timestamp int NOT NULL default extract(epoch from now())::int,
5+
record bytea NOT NULL,
6+
PRIMARY KEY(collection_id, "offset")
7+
);
8+

go/database/log/sqlc.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
version: "2"
2+
sql:
3+
- engine: "postgresql"
4+
queries: "queries/"
5+
schema: "schema/"
6+
gen:
7+
go:
8+
package: "log"
9+
out: "db"
10+
sql_package: "pgx/v5"

0 commit comments

Comments
 (0)