Skip to content

Commit 9c2865a

Browse files
committed
Optimise sync_pull to pull frames in batches
Previously, we pulled frames one by one. This patch changes it pull frames in batches. Currently, the batch size is set to 128 (the maximum supported by the server)
1 parent 395b2ca commit 9c2865a

File tree

1 file changed

+40
-11
lines changed

1 file changed

+40
-11
lines changed

libsql/src/sync.rs

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const METADATA_VERSION: u32 = 0;
2020

2121
const DEFAULT_MAX_RETRIES: usize = 5;
2222
const DEFAULT_PUSH_BATCH_SIZE: u32 = 128;
23+
const DEFAULT_PULL_BATCH_SIZE: u32 = 128;
2324

2425
#[derive(thiserror::Error, Debug)]
2526
#[non_exhaustive]
@@ -66,6 +67,8 @@ pub enum SyncError {
6667
InvalidLocalGeneration(u32, u32),
6768
#[error("invalid local state: {0}")]
6869
InvalidLocalState(String),
70+
#[error("server returned invalid length of frames: {0}")]
71+
InvalidPullFrameBytes(usize),
6972
}
7073

7174
impl SyncError {
@@ -98,8 +101,8 @@ pub enum PushStatus {
98101
}
99102

100103
pub enum PullResult {
101-
/// A frame was successfully pulled.
102-
Frame(Bytes),
104+
/// Frames were successfully pulled.
105+
Frames(Bytes),
103106
/// We've reached the end of the generation.
104107
EndOfGeneration { max_generation: u32 },
105108
}
@@ -122,6 +125,7 @@ pub struct SyncContext {
122125
auth_token: Option<HeaderValue>,
123126
max_retries: usize,
124127
push_batch_size: u32,
128+
pull_batch_size: u32,
125129
/// The current durable generation.
126130
durable_generation: u32,
127131
/// Represents the max_frame_no from the server.
@@ -154,6 +158,7 @@ impl SyncContext {
154158
auth_token,
155159
max_retries: DEFAULT_MAX_RETRIES,
156160
push_batch_size: DEFAULT_PUSH_BATCH_SIZE,
161+
pull_batch_size: DEFAULT_PULL_BATCH_SIZE,
157162
client,
158163
durable_generation: 0,
159164
durable_frame_num: 0,
@@ -175,7 +180,7 @@ impl SyncContext {
175180
}
176181

177182
#[tracing::instrument(skip(self))]
178-
pub(crate) async fn pull_one_frame(
183+
pub(crate) async fn pull_frames(
179184
&mut self,
180185
generation: u32,
181186
frame_no: u32,
@@ -185,9 +190,11 @@ impl SyncContext {
185190
self.sync_url,
186191
generation,
187192
frame_no,
188-
frame_no + 1
193+
// the server expects the range of [start, end) frames, i.e. end is exclusive
194+
// so we add +1 so that we can pull in the batch size of `self.pull_batch_size`
195+
frame_no + self.pull_batch_size
189196
);
190-
tracing::debug!("pulling frame");
197+
tracing::debug!("pulling frame (uri={})", uri);
191198
self.pull_with_retry(uri, self.max_retries).await
192199
}
193200

@@ -420,7 +427,7 @@ impl SyncContext {
420427
let frame = hyper::body::to_bytes(res.into_body())
421428
.await
422429
.map_err(SyncError::HttpBody)?;
423-
return Ok(PullResult::Frame(frame));
430+
return Ok(PullResult::Frames(frame));
424431
}
425432
// BUG ALERT: The server returns a 500 error if the remote database is empty.
426433
// This is a bug and should be fixed.
@@ -887,6 +894,11 @@ async fn try_push(
887894
})
888895
}
889896

897+
/// PAGE_SIZE used by the sync / diskless server
898+
const PAGE_SIZE: usize = 4096;
899+
const FRAME_HEADER_SIZE: usize = 24;
900+
const FRAME_SIZE: usize = PAGE_SIZE + FRAME_HEADER_SIZE;
901+
890902
pub async fn try_pull(
891903
sync_ctx: &mut SyncContext,
892904
conn: &Connection,
@@ -898,10 +910,27 @@ pub async fn try_pull(
898910
loop {
899911
let generation = sync_ctx.durable_generation();
900912
let frame_no = sync_ctx.durable_frame_num() + 1;
901-
match sync_ctx.pull_one_frame(generation, frame_no).await {
902-
Ok(PullResult::Frame(frame)) => {
903-
insert_handle.insert(&frame)?;
904-
sync_ctx.durable_frame_num = frame_no;
913+
match sync_ctx.pull_frames(generation, frame_no).await {
914+
Ok(PullResult::Frames(frames)) => {
915+
tracing::trace!(
916+
"pull_frames: generation={}, start_frame_no={} (batch_size={}), frame_size={}",
917+
generation,
918+
frame_no,
919+
sync_ctx.pull_batch_size,
920+
frames.len(),
921+
);
922+
if frames.len() % FRAME_SIZE != 0 {
923+
tracing::error!(
924+
"frame size {} is not a multiple of the expected size {}",
925+
frames.len(),
926+
FRAME_SIZE,
927+
);
928+
return Err(SyncError::InvalidPullFrameBytes(frames.len()).into());
929+
}
930+
for chunk in frames.chunks(FRAME_SIZE) {
931+
insert_handle.insert(&chunk)?;
932+
sync_ctx.durable_frame_num += 1;
933+
}
905934
}
906935
Ok(PullResult::EndOfGeneration { max_generation }) => {
907936
// If there are no more generations to pull, we're done.
@@ -920,7 +949,7 @@ pub async fn try_pull(
920949
insert_handle.begin()?;
921950
}
922951
Err(e) => {
923-
tracing::debug!("pull_one_frame error: {:?}", e);
952+
tracing::debug!("pull_frames error: {:?}", e);
924953
err.replace(e);
925954
break;
926955
}

0 commit comments

Comments
 (0)