Skip to content

Commit de39008

Browse files
authored
Merge pull request #3472 from autonomys/object-piece-reuse
Re-use the last piece for the next object in a batch
2 parents e7d7eb8 + c56c084 commit de39008

File tree

13 files changed

+623
-103
lines changed

13 files changed

+623
-103
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/subspace-core-primitives/src/objects.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,12 @@
44
//! * for objects within a block
55
//! * for global objects in the global history of the blockchain (inside a piece)
66
7-
#[cfg(not(feature = "std"))]
8-
extern crate alloc;
9-
107
use crate::hashes::Blake3Hash;
118
use crate::pieces::PieceIndex;
12-
#[cfg(not(feature = "std"))]
13-
use alloc::vec::Vec;
149
use core::default::Default;
1510
use parity_scale_codec::{Decode, Encode};
11+
use scale_info::prelude::vec;
12+
use scale_info::prelude::vec::Vec;
1613
use scale_info::TypeInfo;
1714
#[cfg(feature = "serde")]
1815
use serde::{Deserialize, Serialize};
@@ -143,6 +140,14 @@ impl GlobalObjectMapping {
143140
}
144141
}
145142

143+
/// Returns a newly created GlobalObjectMapping from a single object mapping
144+
#[inline]
145+
pub fn from_object(object: GlobalObject) -> Self {
146+
Self::V0 {
147+
objects: vec![object],
148+
}
149+
}
150+
146151
/// Returns the object mappings
147152
pub fn objects(&self) -> &[GlobalObject] {
148153
match self {

crates/subspace-gateway-rpc/README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ RPC API for Subspace Gateway.
44

55
### Using the gateway RPCs
66

7+
#### Getting Object Mappings
8+
79
The gateway RPCs can fetch data using object mappings supplied by a node.
810

911
Launch a node with `--create-object-mappings blockNumber --sync full`, and wait for mappings from
@@ -41,7 +43,9 @@ subspace_subscribeObjectMappings
4143
}
4244
```
4345

44-
Then use those mappings to get object data from the gateway RPCs:
46+
#### Using Object Mappings to get Objects
47+
48+
Use those mappings to get object data from the gateway RPCs:
4549
```sh
4650
$ websocat --jsonrpc ws://127.0.0.1:9955
4751
subspace_fetchObject {"mappings": {"v0": {"objects": [["0000000000000000000000000000000000000000000000000000000000000000", 0, 0]]}}}
@@ -54,6 +58,16 @@ subspace_fetchObject {"mappings": {"v0": {"objects": [["000000000000000000000000
5458
}
5559
```
5660

61+
For efficiency, objects in a batch should be sorted by increasing piece index. And objects with
62+
the same piece index should be sorted by increasing offset. This allows the last piece to be
63+
re-used for the next object in the batch.
64+
65+
Batches should be split if the gap between object piece indexes is 6 or more. Those objects
66+
can't share any pieces, because a maximum-sized object only uses 6 pieces. (Batches should also
67+
be split so that the response stays within the RPC response size limit.)
68+
69+
### Advanced Usage
70+
5771
#### Missed Mappings
5872

5973
The node doesn't make sure the client has processed the previous mapping before generating the next

crates/subspace-gateway-rpc/src/lib.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,15 @@ impl DerefMut for HexData {
8383
/// Provides rpc methods for interacting with a Subspace DSN Gateway.
8484
#[rpc(client, server)]
8585
pub trait SubspaceGatewayRpcApi {
86-
/// Get object data from DSN object mappings.
86+
/// Get object data from a DSN object mapping batch.
8787
/// Returns an error if any object fetch was unsuccessful.
88+
///
89+
/// For efficiency, objects in a batch should be sorted by increasing piece index. Objects with
90+
/// the same piece index should be sorted by increasing offset. This allows the last piece to
91+
/// be re-used for the next object in the batch.
92+
///
93+
/// Batches should be split if the gap between object piece indexes is 6 or more. Those objects
94+
/// can't share any pieces, because a maximum-sized object only uses 6 pieces.
8895
#[method(name = "subspace_fetchObject")]
8996
async fn fetch_object(&self, mappings: GlobalObjectMapping) -> Result<Vec<HexData>, Error>;
9097
}

crates/subspace-gateway/src/commands/http/server.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,11 @@ where
109109
}
110110
};
111111

112-
// TODO: return a multi-part response, with one part per object
112+
// TODO:
113+
// - return a multi-part response, with one part per object.
114+
// - add the object hash to each part, so we can sort mappings by piece index and offset,
115+
// for more efficient piece re-use. See the `ObjectFetcher::fetch_objects` performance docs
116+
// for more details.
113117
HttpResponse::Ok()
114118
.content_type("application/octet-stream")
115119
.body(objects.concat())

crates/subspace-service/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ sp-session.workspace = true
102102
frame-system-rpc-runtime-api.workspace = true
103103
pallet-transaction-payment-rpc-runtime-api.workspace = true
104104

105+
[dev-dependencies]
106+
static_assertions.workspace = true
107+
105108
[features]
106109
runtime-benchmarks = [
107110
"dep:frame-benchmarking",

crates/subspace-service/src/lib.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,3 +1392,20 @@ fn extract_confirmation_depth(chain_spec: &dyn ChainSpec) -> Option<u32> {
13921392
.ok()?;
13931393
u32::decode(&mut encoded_confirmation_depth.as_slice()).ok()
13941394
}
1395+
1396+
#[cfg(test)]
1397+
mod test {
1398+
use static_assertions::const_assert_eq;
1399+
use subspace_data_retrieval::object_fetcher::MAX_BLOCK_LENGTH as ARCHIVER_MAX_BLOCK_LENGTH;
1400+
use subspace_runtime_primitives::MAX_BLOCK_LENGTH as CONSENSUS_RUNTIME_MAX_BLOCK_LENGTH;
1401+
1402+
/// Runtime and archiver code must agree on the consensus block length.
1403+
/// (This avoids importing all the runtime primitives code into the farmer and gateway.)
1404+
#[test]
1405+
fn max_block_length_consistent() {
1406+
const_assert_eq!(
1407+
CONSENSUS_RUNTIME_MAX_BLOCK_LENGTH,
1408+
ARCHIVER_MAX_BLOCK_LENGTH,
1409+
);
1410+
}
1411+
}

shared/subspace-data-retrieval/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ parity-scale-codec = { workspace = true, features = ["derive"] }
2020
subspace-archiving.workspace = true
2121
subspace-core-primitives = { workspace = true, features = ["std"] }
2222
subspace-erasure-coding.workspace = true
23-
subspace-runtime-primitives = { workspace = true, features = ["std"] }
23+
# This crate can't depend on any runtime code, because it needs to be independent of Substrate.
2424
thiserror.workspace = true
2525
tokio = { workspace = true, features = ["sync", "rt"] }
2626
tracing = { workspace = true, features = ["std"] }
2727

2828
[dev-dependencies]
29-
rand.workspace = true
29+
rand = { workspace = true, features = ["std", "std_rng"] }
3030
subspace-logging.workspace = true
3131
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
3232

shared/subspace-data-retrieval/src/object_fetcher.rs

Lines changed: 75 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ mod segment_header;
2020
#[cfg(test)]
2121
mod tests;
2222

23+
pub use segment_header::MAX_BLOCK_LENGTH;
24+
2325
/// The maximum object length the implementation in this module can reliably handle.
2426
///
2527
/// Currently objects are limited by the largest block size in the consensus chain, which is 5 MB.
@@ -52,6 +54,9 @@ pub fn max_supported_object_length() -> usize {
5254
/// The length of the compact encoding of `max_supported_object_length()`.
5355
const MAX_ENCODED_LENGTH_SIZE: usize = 4;
5456

57+
/// Used to store the last piece downloaded in an object fetcher batch.
58+
pub type LastPieceCache = (PieceIndex, Piece);
59+
5560
/// Object fetching errors.
5661
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
5762
pub enum Error {
@@ -238,11 +243,19 @@ where
238243
/// putting the objects' bytes together.
239244
///
240245
/// Checks the objects' hashes to make sure the correct bytes are returned.
246+
///
247+
/// For efficiency, objects in a batch should be sorted by increasing piece index. Objects with
248+
/// the same piece index should be sorted by increasing offset. This allows the last piece to
249+
/// be re-used for the next object in the batch.
250+
///
251+
/// Batches should be split if the gap between object piece indexes is 6 or more. Those objects
252+
/// can't share any pieces, because a maximum-sized object only uses 6 pieces.
241253
pub async fn fetch_objects(
242254
&self,
243255
mappings: GlobalObjectMapping,
244256
) -> Result<Vec<Vec<u8>>, Error> {
245257
let mut objects = Vec::with_capacity(mappings.objects().len());
258+
let mut piece_cache = None;
246259

247260
// TODO:
248261
// - keep the last downloaded piece until it's no longer needed
@@ -292,7 +305,7 @@ where
292305

293306
// All objects can be assembled from individual pieces, we handle segments by checking
294307
// all possible padding, and parsing and discarding segment headers.
295-
let data = self.fetch_object(mapping).await?;
308+
let data = self.fetch_object(mapping, &mut piece_cache).await?;
296309

297310
objects.push(data);
298311
}
@@ -312,7 +325,11 @@ where
312325
/// or fetch more data.
313326
//
314327
// TODO: return last downloaded piece from fetch_object() and pass them to the next fetch_object()
315-
async fn fetch_object(&self, mapping: GlobalObject) -> Result<Vec<u8>, Error> {
328+
async fn fetch_object(
329+
&self,
330+
mapping: GlobalObject,
331+
piece_cache: &mut Option<LastPieceCache>,
332+
) -> Result<Vec<u8>, Error> {
316333
let GlobalObject {
317334
piece_index,
318335
offset,
@@ -327,7 +344,9 @@ where
327344

328345
// Get pieces until we have enough data to calculate the object's length(s).
329346
// Objects with their length bytes at the end of a piece are a rare edge case.
330-
let piece = self.read_piece(next_source_piece_index, mapping).await?;
347+
let piece = self
348+
.read_piece(next_source_piece_index, mapping, piece_cache)
349+
.await?;
331350

332351
// Discard piece data before the offset.
333352
// If this is the first piece in a segment, this automatically skips the segment header.
@@ -367,7 +386,9 @@ where
367386
);
368387

369388
// Get the second piece for the object
370-
let piece = self.read_piece(next_source_piece_index, mapping).await?;
389+
let piece = self
390+
.read_piece(next_source_piece_index, mapping, piece_cache)
391+
.await?;
371392
// We want all the piece data
372393
let piece_data = piece
373394
.record()
@@ -427,7 +448,7 @@ where
427448
// TODO: turn this into a concurrent stream, which cancels piece downloads if they aren't
428449
// needed
429450
let pieces = self
430-
.read_pieces(remaining_piece_indexes.clone(), mapping)
451+
.read_pieces(remaining_piece_indexes.clone(), mapping, piece_cache)
431452
.await?
432453
.into_iter()
433454
.zip(remaining_piece_indexes.iter().copied())
@@ -476,22 +497,31 @@ where
476497
&self,
477498
piece_indexes: Arc<[PieceIndex]>,
478499
mapping: GlobalObject,
500+
piece_cache: &mut Option<LastPieceCache>,
479501
) -> Result<Vec<Piece>, Error> {
480-
download_pieces(piece_indexes.clone(), &self.piece_getter)
481-
.await
482-
.map_err(|source| {
483-
debug!(
484-
?piece_indexes,
485-
error = ?source,
486-
?mapping,
487-
"Error fetching pieces during object assembling"
488-
);
502+
download_pieces(
503+
piece_indexes.clone(),
504+
&piece_cache.clone().with_fallback(self.piece_getter.clone()),
505+
)
506+
.await
507+
.inspect(|pieces| {
508+
if let (Some(piece_index), Some(piece)) = (piece_indexes.last(), pieces.last()) {
509+
*piece_cache = Some((*piece_index, piece.clone()))
510+
}
511+
})
512+
.map_err(|source| {
513+
debug!(
514+
?piece_indexes,
515+
error = ?source,
516+
?mapping,
517+
"Error fetching pieces during object assembling"
518+
);
489519

490-
Error::PieceGetterError {
491-
error: format!("{source:?}"),
492-
mapping,
493-
}
494-
})
520+
Error::PieceGetterError {
521+
error: format!("{source:?}"),
522+
mapping,
523+
}
524+
})
495525
}
496526

497527
/// Read and return a single piece.
@@ -501,28 +531,34 @@ where
501531
&self,
502532
piece_index: PieceIndex,
503533
mapping: GlobalObject,
534+
piece_cache: &mut Option<LastPieceCache>,
504535
) -> Result<Piece, Error> {
505-
download_pieces(vec![piece_index].into(), &self.piece_getter)
506-
.await
507-
.map(|pieces| {
508-
pieces
509-
.first()
510-
.expect("download_pieces always returns exact pieces or error")
511-
.clone()
512-
})
513-
.map_err(|source| {
514-
debug!(
515-
%piece_index,
516-
error = ?source,
517-
?mapping,
518-
"Error fetching piece during object assembling"
519-
);
536+
let piece_indexes = Arc::<[PieceIndex]>::from(vec![piece_index]);
537+
download_pieces(
538+
piece_indexes.clone(),
539+
&piece_cache.clone().with_fallback(self.piece_getter.clone()),
540+
)
541+
.await
542+
.inspect(|pieces| *piece_cache = Some((piece_index, pieces[0].clone())))
543+
.map(|pieces| {
544+
pieces
545+
.first()
546+
.expect("download_pieces always returns exact pieces or error")
547+
.clone()
548+
})
549+
.map_err(|source| {
550+
debug!(
551+
%piece_index,
552+
error = ?source,
553+
?mapping,
554+
"Error fetching piece during object assembling"
555+
);
520556

521-
Error::PieceGetterError {
522-
error: format!("{source:?}"),
523-
mapping,
524-
}
525-
})
557+
Error::PieceGetterError {
558+
error: format!("{source:?}"),
559+
mapping,
560+
}
561+
})
526562
}
527563
}
528564

shared/subspace-data-retrieval/src/object_fetcher/segment_header.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use subspace_core_primitives::objects::GlobalObject;
1212
use subspace_core_primitives::segments::{
1313
ArchivedBlockProgress, LastArchivedBlock, SegmentCommitment, SegmentHeader, SegmentIndex,
1414
};
15-
use subspace_runtime_primitives::MAX_BLOCK_LENGTH;
1615

1716
/// The maximum amount of segment padding.
1817
///
@@ -22,6 +21,10 @@ use subspace_runtime_primitives::MAX_BLOCK_LENGTH;
2221
/// <https://docs.substrate.io/reference/scale-codec/#fn-1>
2322
pub const MAX_SEGMENT_PADDING: usize = 3;
2423

24+
/// Maximum block length for non-`Normal` extrinsic is 5 MiB.
25+
/// This is a copy of the constant in `subspace_runtime_primitives`.
26+
pub const MAX_BLOCK_LENGTH: u32 = 5 * 1024 * 1024;
27+
2528
/// The segment version this code knows how to parse.
2629
const SEGMENT_VERSION_VARIANT: u8 = 0;
2730

@@ -156,7 +159,6 @@ mod test {
156159
use parity_scale_codec::{Compact, CompactLen};
157160
use subspace_archiving::archiver::Segment;
158161
use subspace_core_primitives::objects::BlockObjectMapping;
159-
use subspace_runtime_primitives::MAX_BLOCK_LENGTH;
160162

161163
#[test]
162164
fn max_segment_padding_constant() {

0 commit comments

Comments
 (0)