diff --git a/Cargo.lock b/Cargo.lock index 326787cb8..74f7f7c73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5082,7 +5082,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.21", + "zerocopy 0.8.22", ] [[package]] @@ -7833,11 +7833,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf01143b2dd5d134f11f545cf9f1431b13b749695cb33bcce051e7568f99478" +checksum = "09612fda0b63f7cb9e0af7e5916fe5a1f8cdcb066829f10f36883207628a4872" dependencies = [ - "zerocopy-derive 0.8.21", + "zerocopy-derive 0.8.22", ] [[package]] @@ -7853,9 +7853,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712c8386f4f4299382c9abee219bee7084f78fb939d88b6840fcc1320d5f6da2" +checksum = "79f81d38d7a2ed52d8f034e62c568e111df9bf8aba2f7cf19ddc5bf7bd89d520" dependencies = [ "proc-macro2", "quote", diff --git a/inspect/src/index.rs b/inspect/src/index.rs index cd3cbc642..ed9c631ea 100644 --- a/inspect/src/index.rs +++ b/inspect/src/index.rs @@ -14,7 +14,7 @@ use crate::repl::PayloadFormatter; pub(crate) type IndexDb = MerkleTreeKvDb< BlockTree, IndexNode, - PgsqlStorage>, + PgsqlStorage, false>, >; struct IndexPayloadFormatterDisplay { diff --git a/inspect/src/main.rs b/inspect/src/main.rs index 411ec61bb..99c3adc14 100644 --- a/inspect/src/main.rs +++ b/inspect/src/main.rs @@ -5,7 +5,7 @@ use repl::Repl; use rows::{RowDb, RowPayloadFormatter}; use ryhope::{ storage::pgsql::{SqlServerConnection, SqlStorageSettings, ToFromBytea}, - Epoch, InitSettings, + InitSettings, UserEpoch, }; use serde::Serialize; @@ -26,7 +26,7 @@ struct Args { #[arg(short = 'E', long = "at")] /// If set, try to view the tree at this epoch - epoch: Option, + epoch: Option, #[command(subcommand)] /// The type of tree to load from the database @@ -77,6 +77,8 @@ async fn main() -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(args.db_uri.clone()), table: args.db_table, + external_mapper: None, // not necessary even if there is an external epoch mapper, + // since we are initializing the tree with `InitSettings::MustExist` }, ) .await?; @@ -91,7 +93,7 @@ async fn main() -> Result<()> { let mut repl = Repl::new(tree_db, payload_fmt).await?; if let Some(epoch) = args.epoch { - repl.set_epoch(epoch)?; + repl.set_epoch(epoch).await?; } repl.run().await } @@ -101,6 +103,7 @@ async fn main() -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(args.db_uri.clone()), table: args.db_table, + external_mapper: None, }, ) .await?; @@ -109,7 +112,7 @@ async fn main() -> Result<()> { let mut repl = Repl::new(tree_db, payload_fmt).await?; if let Some(epoch) = args.epoch { - repl.set_epoch(epoch)?; + repl.set_epoch(epoch).await?; } repl.run().await } diff --git a/inspect/src/repl.rs b/inspect/src/repl.rs index c7748f2b9..84e765658 100644 --- a/inspect/src/repl.rs +++ b/inspect/src/repl.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, bail}; +use anyhow::{anyhow, bail, Result}; use colored::Colorize; use dialoguer::{console, theme::ColorfulTheme, FuzzySelect, Input}; use itertools::Itertools; @@ -8,7 +8,7 @@ use ryhope::{ TreeStorage, }, tree::{MutableTree, PrintableTree, TreeTopology}, - Epoch, MerkleTreeKvDb, NodePayload, + MerkleTreeKvDb, NodePayload, UserEpoch, }; use std::io::Write; use tabled::{builder::Builder, settings::Style}; @@ -57,7 +57,7 @@ pub(crate) struct Repl< F: PayloadFormatter, > { current_key: T::Key, - current_epoch: Epoch, + current_epoch: UserEpoch, db: MerkleTreeKvDb, tty: console::Term, payload_fmt: F, @@ -77,7 +77,7 @@ impl< { pub async fn new(db: MerkleTreeKvDb, payload_fmt: F) -> anyhow::Result { let current_key = db.root().await?.ok_or(anyhow!("tree is empty"))?; - let current_epoch = db.current_epoch(); + let current_epoch = db.current_epoch().await?; Ok(Self { current_key, @@ -105,19 +105,19 @@ impl< .unwrap(); } - pub fn set_epoch(&mut self, epoch: Epoch) -> anyhow::Result<()> { - if epoch < self.db.initial_epoch() { + pub async fn set_epoch(&mut self, epoch: UserEpoch) -> Result<()> { + if epoch < self.db.initial_epoch().await { bail!( "epoch `{}` is older than initial epoch `{}`", epoch, - self.db.initial_epoch() + self.db.initial_epoch().await ); } - if epoch > self.db.current_epoch() { + if epoch > self.db.current_epoch().await? { bail!( "epoch `{}` is newer than latest epoch `{}`", epoch, - self.db.current_epoch() + self.db.current_epoch().await? ); } @@ -147,9 +147,9 @@ impl< async fn travel(&mut self) -> anyhow::Result<()> { loop { - let epoch: Epoch = Input::new().with_prompt("target epoch:").interact_text()?; + let epoch: UserEpoch = Input::new().with_prompt("target epoch:").interact_text()?; - self.set_epoch(epoch)?; + self.set_epoch(epoch).await?; } } diff --git a/inspect/src/rows.rs b/inspect/src/rows.rs index af859e80d..b4a963b9a 100644 --- a/inspect/src/rows.rs +++ b/inspect/src/rows.rs @@ -17,7 +17,7 @@ use crate::repl::PayloadFormatter; pub(crate) type RowDb = MerkleTreeKvDb< RowTree, RowPayload, - PgsqlStorage>, + PgsqlStorage, true>, >; struct RowPayloadFormatterDisplay { diff --git a/mp2-test/src/cells_tree.rs b/mp2-test/src/cells_tree.rs index 513ed28fa..5f7b5fd8c 100644 --- a/mp2-test/src/cells_tree.rs +++ b/mp2-test/src/cells_tree.rs @@ -22,15 +22,15 @@ use plonky2::{ use rand::{thread_rng, Rng}; use ryhope::{ storage::{memory::InMemory, updatetree::UpdateTree, EpochKvStorage, TreeTransactionalStorage}, - tree::{sbbst, TreeTopology}, + tree::{sbbst::IncrementalTree, TreeTopology}, InitSettings, MerkleTreeKvDb, NodePayload, }; use serde::{Deserialize, Serialize}; use std::iter; -pub type CellTree = sbbst::Tree; +pub type CellTree = IncrementalTree; pub type CellTreeKey = ::Key; -type CellStorage = InMemory; +type CellStorage = InMemory; pub type MerkleCellTree = MerkleTreeKvDb; /// Test node of the cells tree @@ -116,7 +116,7 @@ impl NodePayload for TestCell { pub async fn build_cell_tree( row: Vec, ) -> Result<(MerkleCellTree, UpdateTree<::Key>)> { - let mut cell_tree = MerkleCellTree::new(InitSettings::Reset(sbbst::Tree::empty()), ()) + let mut cell_tree = MerkleCellTree::new(InitSettings::Reset(IncrementalTree::empty()), ()) .await .unwrap(); let update_tree = cell_tree diff --git a/mp2-v1/src/indexing/block.rs b/mp2-v1/src/indexing/block.rs index 6556a93bd..7966d0845 100644 --- a/mp2-v1/src/indexing/block.rs +++ b/mp2-v1/src/indexing/block.rs @@ -1,14 +1,60 @@ //! Module to handle the block number as a primary index -use ryhope::tree::{sbbst, TreeTopology}; - -/// The index tree when the primary index is the block number of a blockchain is a sbbst since it -/// is a highly optimized tree for monotonically increasing index. It produces very little -/// tree-manipulating operations on update, and therefore, requires the least amount of reproving -/// when adding a new index. -/// NOTE: when dealing with another type of index, i.e. a general index such as what can happen on -/// a result table, then this tree does not work anymore. -pub type BlockTree = sbbst::Tree; +use anyhow::anyhow; +use ryhope::{ + storage::{pgsql::PgsqlStorage, RoEpochKvStorage}, + tree::{sbbst, TreeTopology}, + MerkleTreeKvDb, +}; + +use crate::query::planner::TreeFetcher; + +use super::index::IndexNode; + +/// The index tree when the primary index is an epoch in a time-series DB, like the block number for a blockchain. +/// It is a sbbst since it is a highly optimized tree for monotonically increasing index. +/// It produces very little tree-manipulating operations on update, and therefore, requires the least amount +/// of reproving when adding a new index. +/// NOTE: it is still required that monotonically increasing indexes are inserted in the tree, +/// i.e. a general index such as what can happen on a result table wouldn't work with this tree. +pub type BlockTree = sbbst::EpochTree; /// The key used to refer to a table where the block number is the primary index. pub type BlockTreeKey = ::Key; /// Just an alias that give more meaning depending on the context pub type BlockPrimaryIndex = BlockTreeKey; + +pub type IndexStorage = PgsqlStorage, false>; +pub type MerkleIndexTree = MerkleTreeKvDb, IndexStorage>; + +/// Get the previous epoch of `epoch` in `tree` +pub async fn get_previous_epoch( + tree: &MerkleIndexTree, + epoch: BlockPrimaryIndex, +) -> anyhow::Result> { + let current_epoch = tree.current_epoch().await?; + let epoch_ctx = tree + .node_context(&epoch) + .await? + .ok_or(anyhow!("epoch {epoch} not found in the tree"))?; + + Ok(tree + .get_predecessor(&epoch_ctx, current_epoch) + .await + .map(|(ctx, _)| ctx.node_id)) +} + +/// Get the next epoch of `epoch` in `tree` +pub async fn get_next_epoch( + tree: &MerkleIndexTree, + epoch: BlockPrimaryIndex, +) -> anyhow::Result> { + let current_epoch = tree.current_epoch().await?; + let epoch_ctx = tree + .node_context(&epoch) + .await? + .ok_or(anyhow!("epoch {epoch} not found in the tree"))?; + + Ok(tree + .get_successor(&epoch_ctx, current_epoch) + .await + .map(|(ctx, _)| ctx.node_id)) +} diff --git a/mp2-v1/src/indexing/cell.rs b/mp2-v1/src/indexing/cell.rs index 7ad8461a2..a05372768 100644 --- a/mp2-v1/src/indexing/cell.rs +++ b/mp2-v1/src/indexing/cell.rs @@ -25,13 +25,13 @@ use super::ColumnID; /// By default the cells tree is a sbbst tree since it is fixed for a given table and this is the /// simplest/fastest tree. -pub type CellTree = sbbst::Tree; +pub type CellTree = sbbst::IncrementalTree; /// The key used to refer to a cell in the tree pub type CellTreeKey = ::Key; /// The storage of cell tree is "in memory" since it is never really saved on disk. Rather, it is /// always reconstructed on the fly given it is very small. Moreover, storing it on disk would /// require as many sql tables as there would be rows, making this solution highly unpracticable. -pub type CellStorage = InMemory>; +pub type CellStorage = InMemory, false>; /// The cells tree is a Merkle tree with cryptographically secure hash function committing to its /// content. pub type MerkleCellTree = @@ -50,7 +50,7 @@ pub async fn new_tree< + Serialize + for<'a> Deserialize<'a>, >() -> MerkleCellTree { - MerkleCellTree::new(InitSettings::Reset(sbbst::Tree::empty()), ()) + MerkleCellTree::new(InitSettings::Reset(sbbst::IncrementalTree::empty()), ()) .await .unwrap() } diff --git a/mp2-v1/src/indexing/mod.rs b/mp2-v1/src/indexing/mod.rs index 90de676e0..29e8d4480 100644 --- a/mp2-v1/src/indexing/mod.rs +++ b/mp2-v1/src/indexing/mod.rs @@ -1,6 +1,15 @@ +use anyhow::Result; + use crate::indexing::{index::IndexNode, row::RowPayload}; use alloy::primitives::U256; +use block::MerkleIndexTree; use mp2_common::types::HashOutput; +use row::MerkleRowTree; +use ryhope::{ + storage::pgsql::{SqlServerConnection, SqlStorageSettings}, + tree::scapegoat, + InitSettings, UserEpoch, +}; pub mod block; pub mod cell; @@ -9,6 +18,77 @@ pub mod row; pub type ColumnID = u64; +/// Build `MerkleIndexTree` and `MerkleRowTree` trees from tables +/// `index_table_name` and `row_table_name` in the DB with URL `db_url`. +pub async fn load_trees( + db_url: &str, + index_table_name: String, + row_table_name: String, +) -> Result<(MerkleIndexTree, MerkleRowTree)> { + let index_tree = MerkleIndexTree::new( + InitSettings::MustExist, + SqlStorageSettings { + source: SqlServerConnection::NewConnection(db_url.to_string()), + table: index_table_name.clone(), + external_mapper: None, + }, + ) + .await?; + let row_tree = MerkleRowTree::new( + InitSettings::MustExist, + SqlStorageSettings { + table: row_table_name, + source: SqlServerConnection::NewConnection(db_url.to_string()), + external_mapper: Some(index_table_name), + }, + ) + .await?; + + Ok((index_tree, row_tree)) +} + +/// Build `MerkleIndexTree` and `MerkleRowTree` trees starting from +/// `genesis_block`. The tables employed in the DB with URL `db_url` +/// to store the trees are `index_table_name` and `row_table_name`, +/// respectively. The following additional parameters are required: +/// - `alpha`: Parameter of the Scapegoat tree employed for the `MerkleRowTree` +/// - `reset_if_exist`: if true, an existing tree would be deleted +pub async fn build_trees( + db_url: &str, + index_table_name: String, + row_table_name: String, + genesis_block: UserEpoch, + alpha: scapegoat::Alpha, + reset_if_exist: bool, +) -> Result<(MerkleIndexTree, MerkleRowTree)> { + let db_settings_index = SqlStorageSettings { + source: SqlServerConnection::NewConnection(db_url.to_string()), + table: index_table_name.clone(), + external_mapper: None, + }; + let db_settings_row = SqlStorageSettings { + source: SqlServerConnection::NewConnection(db_url.to_string()), + table: row_table_name, + external_mapper: Some(index_table_name), + }; + + let index_tree = ryhope::new_index_tree( + genesis_block as UserEpoch, + db_settings_index, + reset_if_exist, + ) + .await?; + let row_tree = ryhope::new_row_tree( + genesis_block as UserEpoch, + alpha, + db_settings_row, + reset_if_exist, + ) + .await?; + + Ok((index_tree, row_tree)) +} + // NOTE this might be good to have on public API ? // cc/ @andrus pub trait LagrangeNode { diff --git a/mp2-v1/src/indexing/row.rs b/mp2-v1/src/indexing/row.rs index 5bfd453de..bf7dc2527 100644 --- a/mp2-v1/src/indexing/row.rs +++ b/mp2-v1/src/indexing/row.rs @@ -1,4 +1,4 @@ -use super::{cell::CellTreeKey, ColumnID}; +use super::{block::BlockPrimaryIndex, cell::CellTreeKey, ColumnID}; use alloy::primitives::U256; use anyhow::Result; use derive_more::{Deref, From}; @@ -14,12 +14,19 @@ use plonky2::{ hash::hash_types::HashOut, plonk::config::{GenericHashOut, Hasher}, }; -use ryhope::{storage::pgsql::ToFromBytea, tree::scapegoat, NodePayload}; +use ryhope::{ + storage::pgsql::{PgsqlStorage, ToFromBytea}, + tree::scapegoat, + MerkleTreeKvDb, NodePayload, +}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; pub type RowTree = scapegoat::Tree; pub type RowTreeKeyNonce = Vec; +pub type RowStorage = PgsqlStorage, true>; +pub type MerkleRowTree = MerkleTreeKvDb, RowStorage>; + pub trait ToNonce { fn to_nonce(&self) -> RowTreeKeyNonce; } diff --git a/mp2-v1/src/query/batching_planner.rs b/mp2-v1/src/query/batching_planner.rs index ee61c4ffc..6b6cc87d1 100644 --- a/mp2-v1/src/query/batching_planner.rs +++ b/mp2-v1/src/query/batching_planner.rs @@ -8,7 +8,7 @@ use itertools::Itertools; use parsil::symbols::ContextProvider; use ryhope::{ storage::{updatetree::UpdateTree, WideLineage}, - Epoch, + UserEpoch, }; use serde::{Deserialize, Serialize}; use verifiable_db::query::{ @@ -26,7 +26,7 @@ use crate::{ query::planner::TreeFetcher, }; -use super::planner::NonExistenceInput; +use super::planner::NonExistenceInputRow; async fn compute_input_for_row>>( tree: &T, @@ -36,12 +36,12 @@ async fn compute_input_for_row RowInput { let row_path = tree - .compute_path(row_key, index_value as Epoch) + .compute_path(row_key, index_value as UserEpoch) .await .unwrap_or_else(|| panic!("node with key {:?} not found in cache", row_key)); let path = NodePath::new(row_path, index_path.clone()); let (_, row_payload) = tree - .fetch_ctx_and_payload_at(row_key, index_value as Epoch) + .fetch_ctx_and_payload_at(row_key, index_value as UserEpoch) .await .unwrap_or_else(|| panic!("node with key {:?} not found in cache", row_key)); // build row cells @@ -92,8 +92,8 @@ pub async fn generate_chunks_and_update_tree< row_cache: WideLineage>, index_cache: WideLineage>, column_ids: &ColumnIDs, - non_existence_inputs: NonExistenceInput<'_, C>, - epoch: Epoch, + non_existence_inputs: NonExistenceInputRow<'_, C>, + epoch: UserEpoch, ) -> Result<( HashMap, Vec>, UTForChunks, @@ -108,12 +108,12 @@ async fn generate_chunks( row_cache: WideLineage>, index_cache: WideLineage>, column_ids: &ColumnIDs, - non_existence_inputs: NonExistenceInput<'_, C>, + non_existence_inputs: NonExistenceInputRow<'_, C>, ) -> Result>> { let index_keys_by_epochs = index_cache.keys_by_epochs(); assert_eq!(index_keys_by_epochs.len(), 1); let row_keys_by_epochs = row_cache.keys_by_epochs(); - let current_epoch = *index_keys_by_epochs.keys().next().unwrap() as Epoch; + let current_epoch = *index_keys_by_epochs.keys().next().unwrap() as UserEpoch; let sorted_index_values = index_keys_by_epochs[¤t_epoch] .iter() .cloned() @@ -125,7 +125,7 @@ async fn generate_chunks( .await .unwrap_or_else(|| panic!("node with key {index_value} not found in index tree cache")); let proven_rows = if let Some(matching_rows) = - row_keys_by_epochs.get(&(index_value as Epoch)) + row_keys_by_epochs.get(&(index_value as UserEpoch)) { let sorted_rows = matching_rows.iter().collect::>(); stream::iter(sorted_rows.iter()) @@ -137,13 +137,13 @@ async fn generate_chunks( .await } else { let proven_node = non_existence_inputs - .find_row_node_for_non_existence(index_value) + .find_node_for_non_existence(index_value) .await - .unwrap_or_else(|_| { - panic!("node for non-existence not found for index value {index_value}") + .unwrap_or_else(|e| { + panic!("node for non-existence not found for index value {index_value}: {e:?}") }); let row_input = compute_input_for_row( - non_existence_inputs.row_tree, + non_existence_inputs.tree, &proven_node, index_value, &index_path, @@ -447,7 +447,7 @@ impl UTForChunksBuilder { /// to the proving task for that chunk fn build_update_tree_with_base_chunks( self, - epoch: Epoch, + epoch: UserEpoch, ) -> ( HashMap, Vec>, UTForChunks, diff --git a/mp2-v1/src/query/planner.rs b/mp2-v1/src/query/planner.rs index a01fe91e7..a24fa56f7 100644 --- a/mp2-v1/src/query/planner.rs +++ b/mp2-v1/src/query/planner.rs @@ -1,22 +1,25 @@ use alloy::primitives::U256; -use anyhow::Context; +use anyhow::{ensure, Context}; use bb8::Pool; use bb8_postgres::PostgresConnectionManager; use core::hash::Hash; use futures::stream::TryStreamExt; use itertools::Itertools; use mp2_common::types::HashOutput; -use parsil::{bracketer::bracket_secondary_index, symbols::ContextProvider, ParsilSettings}; +use parsil::{ + bracketer::{bracket_primary_index, bracket_secondary_index}, + symbols::ContextProvider, + ParsilSettings, +}; use ryhope::{ storage::{ - pgsql::{PgsqlStorage, ToFromBytea}, - updatetree::UpdateTree, - FromSettings, PayloadStorage, TransactionalStorage, TreeStorage, WideLineage, + pgsql::ToFromBytea, updatetree::UpdateTree, FromSettings, PayloadStorage, + TransactionalStorage, TreeStorage, WideLineage, }, tree::{MutableTree, NodeContext, TreeTopology}, - Epoch, MerkleTreeKvDb, NodePayload, + MerkleTreeKvDb, NodePayload, UserEpoch, }; -use std::{fmt::Debug, future::Future}; +use std::{fmt::Debug, future::Future, marker::PhantomData}; use tokio_postgres::{row::Row as PsqlRow, types::ToSql, NoTls}; use verifiable_db::query::{ api::TreePathInputs, @@ -24,14 +27,12 @@ use verifiable_db::query::{ }; use crate::indexing::{ - block::BlockPrimaryIndex, - row::{RowPayload, RowTree, RowTreeKey}, + block::{BlockPrimaryIndex, MerkleIndexTree}, + index::IndexNode, + row::{MerkleRowTree, RowPayload, RowTreeKey}, LagrangeNode, }; -/// There is only the PSQL storage fully supported for the non existence case since one needs to -/// executor particular requests on the DB in this case. -pub type DBRowStorage = PgsqlStorage>; /// The type of connection to psql backend pub type DBPool = Pool>; @@ -39,58 +40,172 @@ pub struct NonExistenceInfo { pub proving_plan: UpdateTree, } +pub type NonExistenceInputRow<'a, C> = + NonExistenceInput<'a, C, RowTreeKey, RowPayload, MerkleRowTree, true>; +pub type NonExistenceInputIndex<'a, C> = NonExistenceInput< + 'a, + C, + BlockPrimaryIndex, + IndexNode, + MerkleIndexTree, + false, +>; #[derive(Clone)] -pub struct NonExistenceInput<'a, C: ContextProvider> { - pub(crate) row_tree: &'a MerkleTreeKvDb, DBRowStorage>, +pub struct NonExistenceInput< + 'a, + C: ContextProvider, + K: Debug + Clone + Eq + PartialEq, + V: LagrangeNode, + T: TreeFetcher, + const ROWS_TREE: bool, +> { + pub(crate) tree: &'a T, pub(crate) table_name: String, pub(crate) pool: &'a DBPool, pub(crate) settings: &'a ParsilSettings, pub(crate) bounds: QueryBounds, + _k: PhantomData, + _v: PhantomData, } -impl<'a, C: ContextProvider> NonExistenceInput<'a, C> { +impl< + 'a, + C: ContextProvider, + K: Debug + Clone + Eq + PartialEq + ToFromBytea, + V: LagrangeNode, + T: TreeFetcher, + const ROWS_TREE: bool, + > NonExistenceInput<'a, C, K, V, T, ROWS_TREE> +{ pub fn new( - row_tree: &'a MerkleTreeKvDb, DBRowStorage>, + tree: &'a T, table_name: String, pool: &'a DBPool, settings: &'a ParsilSettings, bounds: &'a QueryBounds, ) -> Self { Self { - row_tree, + tree, table_name, pool, settings, bounds: bounds.clone(), + _k: PhantomData, + _v: PhantomData, } } - pub async fn find_row_node_for_non_existence( + pub async fn find_node_for_non_existence( &self, primary: BlockPrimaryIndex, - ) -> anyhow::Result { - let (query_for_min, query_for_max) = bracket_secondary_index( - &self.table_name, - self.settings, - primary as Epoch, - &self.bounds, + ) -> anyhow::Result { + let (preliminary_query, query_for_min, query_for_max) = if ROWS_TREE { + bracket_secondary_index( + &self.table_name, + self.settings, + primary as UserEpoch, + &self.bounds, + ) + } else { + bracket_primary_index(&self.table_name, primary as UserEpoch, &self.bounds) + }; + + let params = execute_row_query(self.pool, &preliminary_query, &[]).await?; + ensure!( + params.len() == 1, + "Preliminary query returned more than one row" ); + let param = params[0].get::<_, U256>(0); // try first with lower node than secondary min query bound - let to_be_proven_node = - match find_node_for_proof(self.pool, self.row_tree, query_for_min, primary, true) + let to_be_proven_node = match self + .find_node_for_proof(query_for_min.map(|q| (q, param)), primary, true) + .await? + { + Some(node) => node, + None => self + .find_node_for_proof(query_for_max.map(|q| (q, param)), primary, false) .await? - { - Some(node) => node, - None => { - find_node_for_proof(self.pool, self.row_tree, query_for_max, primary, false) - .await? - .expect("No valid node found to prove non-existence, something is wrong") - } - }; + .expect("No valid node found to prove non-existence, something is wrong"), + }; Ok(to_be_proven_node) } + + async fn find_node_for_proof( + &self, + query_with_param: Option<(String, U256)>, + primary: BlockPrimaryIndex, + is_min_query: bool, + ) -> anyhow::Result> { + let rows = if let Some((query, param)) = query_with_param { + execute_row_query(self.pool, &query, &[param]).await? + } else { + return Ok(None); + }; + if rows.is_empty() { + // no node found, return None + return Ok(None); + } + let row_key = rows[0] + .get::<_, Option>>(0) + .map(K::from_bytea) + .context("unable to parse row key tree") + .expect(""); + // among the nodes with the same index value of the node with `row_key`, we need to find + // the one that satisfies the following property: all its successor nodes have values bigger + // than `max_query_secondary`, and all its predecessor nodes have values smaller than + // `min_query_secondary`. Such a node can be found differently, depending on the case: + // - if `is_min_query = true`, then we are looking among nodes with the highest value smaller + // than `min_query_secondary` bound (call this value `min_value`); + // therefore, we need to find the "last" node among the nodes with value `min_value`, that + // is the node whose successor (if exists) has a value bigger than `min_value`. Since there + // are no nodes in the tree in the range [`min_query_secondary, max_query_secondary`], then + // the value of the successor of the "last" node is necessarily bigger than `max_query_secondary`, + // and so it implies that we found the node satisfying the property mentioned above + // - if `is_min_query = false`, then we are looking among nodes with the smallest value higher + // than `max_query_secondary` bound (call this value `max_value`); + // therefore, we need to find the "first" node among the nodes with value `max_value`, that + // is the node whose predecessor (if exists) has a value smaller than `max_value`. Since there + // are no nodes in the tree in the range [`min_query_secondary, max_query_secondary`], then + // the value of the predecessor of the "first" node is necessarily smaller than `min_query_secondary`, + // and so it implies that we found the node satisfying the property mentioned above + let (mut node_ctx, node_value) = self + .tree + .fetch_ctx_and_payload_at(&row_key, primary as UserEpoch) + .await + .unwrap(); + let value = node_value.value(); + + if is_min_query { + // starting from the node with key `row_key`, we iterate over its successor nodes in the tree, + // until we found a node that either has no successor or whose successor stores a value different + // from the value `value` stored in the node with key `row_key`; the node found is the one to be + // employed to generate the non-existence proof + let mut successor_ctx = + get_successor_node_with_same_value(self.tree, &node_ctx, value, primary).await; + while successor_ctx.is_some() { + node_ctx = successor_ctx.unwrap(); + successor_ctx = + get_successor_node_with_same_value(self.tree, &node_ctx, value, primary).await; + } + } else { + // starting from the node with key `row_key`, we iterate over its predecessor nodes in the tree, + // until we found a node that either has no predecessor or whose predecessor stores a value different + // from the value `value` stored in the node with key `row_key`; the node found is the one to be + // employed to generate the non-existence proof + let mut predecessor_ctx = + get_predecessor_node_with_same_value(self.tree, &node_ctx, value, primary).await; + while predecessor_ctx.is_some() { + node_ctx = predecessor_ctx.unwrap(); + predecessor_ctx = + get_predecessor_node_with_same_value(self.tree, &node_ctx, value, primary) + .await; + } + } + + Ok(Some(node_ctx.node_id)) + } } pub trait TreeFetcher: Sized { @@ -100,13 +215,13 @@ pub trait TreeFetcher: Sized fn fetch_ctx_and_payload_at( &self, k: &K, - epoch: Epoch, + epoch: UserEpoch, ) -> impl Future, V)>> + Send; fn compute_path( &self, node_key: &K, - epoch: Epoch, + epoch: UserEpoch, ) -> impl Future> { async move { let (node_ctx, node_payload) = self.fetch_ctx_and_payload_at(node_key, epoch).await?; @@ -152,7 +267,7 @@ pub trait TreeFetcher: Sized &self, node_ctx: NodeContext, node_payload: V, - at: Epoch, + at: UserEpoch, ) -> impl Future { async move { let child_hash = async |k: Option| -> Option { @@ -183,7 +298,7 @@ pub trait TreeFetcher: Sized fn get_successor( &self, node_ctx: &NodeContext, - epoch: Epoch, + epoch: UserEpoch, ) -> impl Future, V)>> where K: Clone + Debug + Eq + PartialEq, @@ -264,7 +379,7 @@ pub trait TreeFetcher: Sized fn get_predecessor( &self, node_ctx: &NodeContext, - epoch: Epoch, + epoch: UserEpoch, ) -> impl Future, V)>> where K: Clone + Debug + Eq + PartialEq, @@ -349,7 +464,11 @@ where { const IS_WIDE_LINEAGE: bool = true; - async fn fetch_ctx_and_payload_at(&self, k: &K, epoch: Epoch) -> Option<(NodeContext, V)> { + async fn fetch_ctx_and_payload_at( + &self, + k: &K, + epoch: UserEpoch, + ) -> Option<(NodeContext, V)> { self.ctx_and_payload_at(epoch, k) } } @@ -369,7 +488,7 @@ impl< async fn fetch_ctx_and_payload_at( &self, k: &T::Key, - epoch: Epoch, + epoch: UserEpoch, ) -> Option<(NodeContext, V)> { self.try_fetch_with_context_at(k, epoch) .await @@ -386,7 +505,7 @@ impl< async fn fetch_existing_node_from_tree>( tree: &T, k: &K, - epoch: Epoch, + epoch: UserEpoch, ) -> Option<(NodeContext, V)> where K: Clone + Debug + Eq + PartialEq, @@ -408,14 +527,17 @@ where // this method returns the `NodeContext` of the successor of the node provided as input, // if the successor exists in the row tree and it stores the same value of the input node (i.e., `value`); // returns `None` otherwise, as it means that the input node can be used to prove non-existence -async fn get_successor_node_with_same_value( - row_tree: &MerkleTreeKvDb, DBRowStorage>, - node_ctx: &NodeContext, +async fn get_successor_node_with_same_value< + K: Debug + Clone + Eq + PartialEq, + V: LagrangeNode, + T: TreeFetcher, +>( + tree: &T, + node_ctx: &NodeContext, value: U256, primary: BlockPrimaryIndex, -) -> Option> { - row_tree - .get_successor(node_ctx, primary as Epoch) +) -> Option> { + tree.get_successor(node_ctx, primary as UserEpoch) .await .and_then(|(successor_ctx, successor_payload)| { if successor_payload.value() != value { @@ -431,14 +553,17 @@ async fn get_successor_node_with_same_value( // this method returns the `NodeContext` of the predecessor of the node provided as input, // if the predecessor exists in the row tree and it stores the same value of the input node (i.e., `value`); // returns `None` otherwise, as it means that the input node can be used to prove non-existence -async fn get_predecessor_node_with_same_value( - row_tree: &MerkleTreeKvDb, DBRowStorage>, - node_ctx: &NodeContext, +async fn get_predecessor_node_with_same_value< + K: Debug + Clone + Eq + PartialEq, + V: LagrangeNode, + T: TreeFetcher, +>( + tree: &T, + node_ctx: &NodeContext, value: U256, primary: BlockPrimaryIndex, -) -> Option> { - row_tree - .get_predecessor(node_ctx, primary as Epoch) +) -> Option> { + tree.get_predecessor(node_ctx, primary as UserEpoch) .await .and_then(|(predecessor_ctx, predecessor_payload)| { if predecessor_payload.value() != value { @@ -451,78 +576,6 @@ async fn get_predecessor_node_with_same_value( }) } -async fn find_node_for_proof( - db: &DBPool, - row_tree: &MerkleTreeKvDb, DBRowStorage>, - query: Option, - primary: BlockPrimaryIndex, - is_min_query: bool, -) -> anyhow::Result> { - if query.is_none() { - return Ok(None); - } - let rows = execute_row_query(db, &query.unwrap(), &[]).await?; - if rows.is_empty() { - // no node found, return None - return Ok(None); - } - let row_key = rows[0] - .get::<_, Option>>(0) - .map(RowTreeKey::from_bytea) - .context("unable to parse row key tree") - .expect(""); - // among the nodes with the same index value of the node with `row_key`, we need to find - // the one that satisfies the following property: all its successor nodes have values bigger - // than `max_query_secondary`, and all its predecessor nodes have values smaller than - // `min_query_secondary`. Such a node can be found differently, depending on the case: - // - if `is_min_query = true`, then we are looking among nodes with the highest value smaller - // than `min_query_secondary` bound (call this value `min_value`); - // therefore, we need to find the "last" node among the nodes with value `min_value`, that - // is the node whose successor (if exists) has a value bigger than `min_value`. Since there - // are no nodes in the tree in the range [`min_query_secondary, max_query_secondary`], then - // the value of the successor of the "last" node is necessarily bigger than `max_query_secondary`, - // and so it implies that we found the node satisfying the property mentioned above - // - if `is_min_query = false`, then we are looking among nodes with the smallest value higher - // than `max_query_secondary` bound (call this value `max_value`); - // therefore, we need to find the "first" node among the nodes with value `max_value`, that - // is the node whose predecessor (if exists) has a value smaller than `max_value`. Since there - // are no nodes in the tree in the range [`min_query_secondary, max_query_secondary`], then - // the value of the predecessor of the "first" node is necessarily smaller than `min_query_secondary`, - // and so it implies that we found the node satisfying the property mentioned above - let (mut node_ctx, node_value) = row_tree - .fetch_with_context_at(&row_key, primary as Epoch) - .await? - .unwrap(); - let value = node_value.value(); - - if is_min_query { - // starting from the node with key `row_key`, we iterate over its successor nodes in the tree, - // until we found a node that either has no successor or whose successor stores a value different - // from the value `value` stored in the node with key `row_key`; the node found is the one to be - // employed to generate the non-existence proof - let mut successor_ctx = - get_successor_node_with_same_value(row_tree, &node_ctx, value, primary).await; - while successor_ctx.is_some() { - node_ctx = successor_ctx.unwrap(); - successor_ctx = - get_successor_node_with_same_value(row_tree, &node_ctx, value, primary).await; - } - } else { - // starting from the node with key `row_key`, we iterate over its predecessor nodes in the tree, - // until we found a node that either has no predecessor or whose predecessor stores a value different - // from the value `value` stored in the node with key `row_key`; the node found is the one to be - // employed to generate the non-existence proof - let mut predecessor_ctx = - get_predecessor_node_with_same_value(row_tree, &node_ctx, value, primary).await; - while predecessor_ctx.is_some() { - node_ctx = predecessor_ctx.unwrap(); - predecessor_ctx = - get_predecessor_node_with_same_value(row_tree, &node_ctx, value, primary).await; - } - } - - Ok(Some(node_ctx.node_id)) -} pub async fn execute_row_query2( pool: &DBPool, query: &str, @@ -569,7 +622,7 @@ async fn get_node_info_from_ctx_and_payload< tree: &T, node_ctx: NodeContext, node_payload: V, - at: Epoch, + at: UserEpoch, ) -> (NodeInfo, Option, Option) { // this looks at the value of a child node (left and right), and fetches the grandchildren // information to be able to build their respective node info. @@ -642,7 +695,7 @@ pub async fn get_node_info< >( tree: &T, k: &K, - at: Epoch, + at: UserEpoch, ) -> (NodeInfo, Option, Option) { let (node_ctx, node_payload) = tree .fetch_ctx_and_payload_at(k, at) diff --git a/mp2-v1/tests/common/cases/indexing.rs b/mp2-v1/tests/common/cases/indexing.rs index 568466b68..d839be4fa 100644 --- a/mp2-v1/tests/common/cases/indexing.rs +++ b/mp2-v1/tests/common/cases/indexing.rs @@ -247,7 +247,7 @@ impl TableIndexing { columns, row_unique_id, ) - .await; + .await?; Ok(( Self { value_column, @@ -337,7 +337,7 @@ impl TableIndexing { columns, row_unique_id, ) - .await; + .await?; Ok(( Self { value_column: "".to_string(), @@ -959,6 +959,7 @@ async fn build_mapping_table( row_unique_id, ) .await + .unwrap() } /// Build the mapping of mappings table. @@ -1071,6 +1072,7 @@ async fn build_mapping_of_mappings_table( row_unique_id, ) .await + .unwrap() } #[derive(Clone, Debug)] diff --git a/mp2-v1/tests/common/cases/query/aggregated_queries.rs b/mp2-v1/tests/common/cases/query/aggregated_queries.rs index 0d4194f84..e981ae97e 100644 --- a/mp2-v1/tests/common/cases/query/aggregated_queries.rs +++ b/mp2-v1/tests/common/cases/query/aggregated_queries.rs @@ -10,14 +10,13 @@ use crate::common::{ table_source::BASE_VALUE, }, proof_storage::{ProofKey, ProofStorage}, - rowtree::MerkleRowTree, table::Table, TableInfo, }; use crate::context::TestContext; use alloy::primitives::U256; -use anyhow::{bail, Result}; +use anyhow::Result; use futures::{stream, FutureExt, StreamExt}; use itertools::Itertools; @@ -34,11 +33,11 @@ use mp2_v1::{ self, block::BlockPrimaryIndex, cell::MerkleCell, - row::{Row, RowPayload, RowTreeKey}, + row::{MerkleRowTree, Row, RowPayload, RowTreeKey}, }, query::{ batching_planner::{generate_chunks_and_update_tree, UTForChunkProofs, UTKey}, - planner::{execute_row_query, NonExistenceInput, TreeFetcher}, + planner::{execute_row_query, NonExistenceInputIndex, NonExistenceInputRow, TreeFetcher}, }, }; use parsil::{ @@ -51,7 +50,7 @@ use ryhope::{ updatetree::{Next, WorkplanItem}, EpochKvStorage, RoEpochKvStorage, TreeTransactionalStorage, }, - Epoch, + UserEpoch, }; use sqlparser::ast::Query; use tokio_postgres::Row as PsqlRow; @@ -79,55 +78,56 @@ pub(crate) async fn prove_query( metadata: MetadataHash, planner: &mut QueryPlanner<'_>, ) -> Result<()> { - let row_cache = planner + let current_epoch = planner.table.index.current_epoch().await? as BlockPrimaryIndex; + let index_query = core_keys_for_index_tree( + current_epoch as UserEpoch, + (planner.query.min_block, planner.query.max_block), + &planner.table.index_table_name(), + )?; + let big_index_cache = planner .table - .row + .index + // The bounds here means between which versions of the tree should we look. For index tree, + // we only look at _one_ version of the tree. .wide_lineage_between( - planner.table.row.current_epoch(), - &core_keys_for_row_tree( - &planner.query.query, - planner.settings, - &planner.pis.bounds, - &planner.query.placeholders, - )?, - ( - planner.query.min_block as Epoch, - planner.query.max_block as Epoch, - ), + current_epoch as UserEpoch, + &index_query, + (current_epoch as UserEpoch, current_epoch as UserEpoch), ) .await?; // prove the index tree, on a single version. Both path can be taken depending if we do have // some nodes or not - let initial_epoch = planner.table.index.initial_epoch() as BlockPrimaryIndex; - let current_epoch = planner.table.index.current_epoch() as BlockPrimaryIndex; + let initial_epoch = planner.table.genesis_block; let block_range = - planner.query.min_block.max(initial_epoch + 1)..=planner.query.max_block.min(current_epoch); + planner.query.min_block.max(initial_epoch)..=planner.query.max_block.min(current_epoch); + let num_blocks_in_range = big_index_cache.num_touched_rows(); info!( "found {} blocks in range: {:?}", - block_range.clone().count(), - block_range + num_blocks_in_range, block_range ); let column_ids = ColumnIDs::from(&planner.table.columns); - let query_proof_id = if block_range.is_empty() { + let query_proof_id = if num_blocks_in_range == 0 { info!("Running INDEX TREE proving for EMPTY query"); - // no valid blocks in the query range, so we need to choose a block to prove - // non-existence. Either the one after genesis or the last one - let to_be_proven_node = if planner.query.max_block < initial_epoch { - initial_epoch + 1 - } else if planner.query.min_block > current_epoch { - current_epoch - } else { - bail!( + let to_be_proven_node = NonExistenceInputIndex::new( + &planner.table.index, + planner.table.index_table_name().to_string(), + &planner.table.db_pool, + planner.settings, + &planner.pis.bounds, + ) + .find_node_for_non_existence(current_epoch as BlockPrimaryIndex) + .await + .unwrap_or_else(|_| { + panic!( "Empty block range to be proven for query bounds {}, {}, but no node to be proven with non-existence circuit was found. Something is wrong", - planner.query.min_block, - planner.query.max_block - ); - } as BlockPrimaryIndex; + planner.query.min_block, planner.query.max_block + ) + }); let index_path = planner .table .index - .compute_path(&to_be_proven_node, current_epoch as Epoch) + .compute_path(&to_be_proven_node, current_epoch as UserEpoch) .await .unwrap_or_else(|| { panic!("Compute path for index node with key {to_be_proven_node} failed") @@ -154,36 +154,46 @@ pub(crate) async fn prove_query( .store_proof(proof_key.clone(), query_proof)?; proof_key } else { - info!("Running INDEX tree proving from cache"); - // Only here we can run the SQL query for index so it doesn't crash - let index_query = core_keys_for_index_tree( - current_epoch as Epoch, - (planner.query.min_block, planner.query.max_block), - )?; - let big_index_cache = planner + info!( + "Row cache query: {}", + &core_keys_for_row_tree( + &planner.query.query, + planner.settings, + &planner.pis.bounds, + &planner.query.placeholders, + )? + ); + let row_cache = planner .table - .index - // The bounds here means between which versions of the tree should we look. For index tree, - // we only look at _one_ version of the tree. + .row .wide_lineage_between( - current_epoch as Epoch, - &index_query, - (current_epoch as Epoch, current_epoch as Epoch), + planner.table.row.current_epoch().await?, + &core_keys_for_row_tree( + &planner.query.query, + planner.settings, + &planner.pis.bounds, + &planner.query.placeholders, + )?, + ( + planner.query.min_block as UserEpoch, + planner.query.max_block as UserEpoch, + ), ) .await?; + info!("Running INDEX tree proving from cache"); let (proven_chunks, update_tree) = generate_chunks_and_update_tree::( row_cache, big_index_cache, &column_ids, - NonExistenceInput::new( + NonExistenceInputRow::new( &planner.table.row, planner.table.public_name.clone(), &planner.table.db_pool, planner.settings, &planner.pis.bounds, ), - current_epoch as Epoch, + current_epoch as UserEpoch, ) .await?; info!("Root of update tree is {:?}", update_tree.root()); @@ -252,7 +262,7 @@ pub(crate) async fn prove_query( planner.ctx, &planner.query, planner.pis, - planner.table.index.current_epoch(), + planner.table.index.current_epoch().await?, &query_proof_id, ) .await?; @@ -280,7 +290,7 @@ pub(crate) async fn prove_query( planner.table, &planner.query, &pis, - planner.table.index.current_epoch(), + planner.table.index.current_epoch().await?, num_touched_rows, res, metadata, @@ -293,7 +303,7 @@ async fn prove_revelation( ctx: &TestContext, query: &QueryCooking, pis: &DynamicCircuitPis, - tree_epoch: Epoch, + tree_epoch: UserEpoch, query_proof_id: &ProofKey, ) -> Result> { // load the query proof, which is at the root of the tree @@ -325,7 +335,7 @@ pub(crate) fn check_final_outputs( table: &Table, query: &QueryCooking, pis: &StaticCircuitPis, - tree_epoch: Epoch, + tree_epoch: UserEpoch, num_touched_rows: usize, res: Vec, offcircuit_md: MetadataHash, @@ -428,7 +438,7 @@ pub(crate) async fn cook_query_between_blocks( table: &Table, info: &TableInfo, ) -> Result { - let max = table.row.current_epoch(); + let max = table.row.current_epoch().await?; let min = max - 1; let value_column = &info.value_column; @@ -634,7 +644,7 @@ pub(crate) async fn cook_query_partial_block_range( let key_column = table.columns.secondary.name.clone(); let value_column = info.value_column.clone(); let table_name = &table.public_name; - let initial_epoch = table.row.initial_epoch(); + let initial_epoch = table.row.initial_epoch().await; // choose a min query bound smaller than initial epoch let min_block = initial_epoch - 1; let placeholders = Placeholders::new_empty(U256::from(min_block), U256::from(max_block)); @@ -660,7 +670,7 @@ pub(crate) async fn cook_query_no_matching_entries( table: &Table, info: &TableInfo, ) -> Result { - let initial_epoch = table.row.initial_epoch(); + let initial_epoch = table.row.initial_epoch().await; // choose query bounds outside of the range [initial_epoch, last_epoch] let min_block = 0; let max_block = initial_epoch - 1; @@ -704,8 +714,8 @@ pub(crate) async fn cook_query_non_matching_entries_some_blocks( let table_name = &table.public_name; // in this query we set query bounds on block numbers to the widest range, so that we // are sure that there are blocks where the chosen key is not alive - let min_block = table.row.initial_epoch() + 1; - let max_block = table.row.current_epoch(); + let min_block = table.genesis_block; + let max_block = table.row.current_epoch().await?; let placeholders = Placeholders::new_empty(U256::from(min_block), U256::from(max_block)); let query_str = format!( @@ -727,13 +737,13 @@ pub(crate) async fn cook_query_non_matching_entries_some_blocks( /// Utility function to associated to each row in the tree, the blocks where the row /// was valid -async fn extract_row_liveness(table: &Table) -> Result>> { +async fn extract_row_liveness(table: &Table) -> Result>> { let mut all_table = HashMap::new(); - let max = table.row.current_epoch(); - let min = table.row.initial_epoch() + 1; - for block in (min..=max).rev() { + let current_epoch = table.index.current_epoch().await?; + let epochs = table.index.keys_at(current_epoch).await; + for block in epochs { println!("Querying for block {block}"); - let rows = collect_all_at(&table.row, block).await?; + let rows = collect_all_at(&table.row, block as UserEpoch).await?; debug!( "Collecting {} rows at epoch {} (rows_keys {:?})", rows.len(), @@ -742,7 +752,7 @@ async fn extract_row_liveness(table: &Table) -> Result Result<(RowTreeKey, BlockRange)> { - let initial_epoch = table.row.initial_epoch() + 1; - let last_epoch = table.row.current_epoch(); + let initial_epoch = table.genesis_block as UserEpoch; + let last_epoch = table.row.current_epoch().await?; let all_table = extract_row_liveness(table).await?; + let consecutive_epochs = { + let mut epochs = table.index.keys_at(last_epoch).await; + epochs.sort_unstable(); + epochs + .windows(2) + .map(|w| (w[0] as i64, w[1] as i64)) + .collect::>() + }; // find the longest running row - let (longest_key, longest_sequence, starting) = all_table + let (longest_key, _, starting, ending) = all_table .iter() .filter_map(|(k, epochs)| { // simplification here to start at first epoch where this row was. Otherwise need to do // longest consecutive sequence etc... - let (l, start) = find_longest_consecutive_sequence(epochs.to_vec()); + let (l, start, end) = find_longest_consecutive_sequence(epochs, &consecutive_epochs); debug!("finding sequence of {l} blocks for key {k:?} (epochs {epochs:?}"); if must_not_be_alive_in_some_blocks { - if start > initial_epoch || (start + l as i64) < last_epoch { - Some((k, l, start)) + if start > initial_epoch || end < last_epoch { + Some((k, l, start, end)) } else { None // it's live for all blocks, so we drop this row } } else { - Some((k, l, start)) + Some((k, l, start, end)) } }) - .max_by_key(|(_k, l, _start)| *l) + .max_by_key(|(_k, l, _start, _end)| *l) .unwrap_or_else(|| { panic!( "unable to find longest row? -> length all _table {}, max {}", @@ -790,11 +808,14 @@ pub(crate) async fn find_longest_lived_key( }); // we set the block bounds let min_block = starting as BlockPrimaryIndex; - let max_block = min_block + longest_sequence; + let max_block = ending as BlockPrimaryIndex; Ok((longest_key.clone(), (min_block, max_block))) } -async fn collect_all_at(tree: &MerkleRowTree, at: Epoch) -> Result>> { +async fn collect_all_at( + tree: &MerkleRowTree, + at: UserEpoch, +) -> Result>> { let root_key = tree.root_at(at).await?.unwrap(); let (ctx, payload) = tree .try_fetch_with_context_at(&root_key, at) @@ -835,18 +856,29 @@ async fn collect_all_at(tree: &MerkleRowTree, at: Epoch) -> Result) -> (usize, i64) { - let mut longest = 0; +fn find_longest_consecutive_sequence( + v: &[i64], + consecutive_epochs: &HashMap, +) -> (usize, i64, i64) { + let mut current = 0; let mut starting_idx = 0; + let mut longest = (0, 0); + let mut update_longest = |current, idx| { + if current > (longest.1 - longest.0) { + longest = (starting_idx, idx) + } + starting_idx = idx + 1; + }; for i in 0..v.len() - 1 { - if v[i] + 1 == v[i + 1] { - longest += 1; + if *consecutive_epochs.get(&v[i]).unwrap() == v[i + 1] { + current += 1; } else { - longest = 0; - starting_idx = i + 1; + update_longest(current, i); + current = 0; } } - (longest, v[starting_idx]) + update_longest(current, v.len() - 1); + (longest.1 - longest.0, v[longest.0], v[longest.1]) } #[allow(dead_code)] diff --git a/mp2-v1/tests/common/cases/query/simple_select_queries.rs b/mp2-v1/tests/common/cases/query/simple_select_queries.rs index 18a4d9804..00bcb1782 100644 --- a/mp2-v1/tests/common/cases/query/simple_select_queries.rs +++ b/mp2-v1/tests/common/cases/query/simple_select_queries.rs @@ -19,7 +19,7 @@ use parsil::{ }; use ryhope::{ storage::{pgsql::ToFromBytea, RoEpochKvStorage}, - Epoch, NodePayload, + NodePayload, UserEpoch, }; use sqlparser::ast::Query; use std::{fmt::Debug, hash::Hash}; @@ -69,7 +69,7 @@ pub(crate) async fn prove_query( .iter() .map(|row| { let key = RowTreeKey::from_bytea(row.try_get::<_, &[u8]>(0)?.to_vec()); - let epoch = row.try_get::<_, Epoch>(1)?; + let epoch = row.try_get::<_, UserEpoch>(1)?; // all the other items are query results let result = (2..row.len()) .filter_map(|i| { @@ -82,7 +82,7 @@ pub(crate) async fn prove_query( }) .collect::>>()?; // compute input for each matching row - let current_epoch = planner.table.index.current_epoch(); + let current_epoch = planner.table.index.current_epoch().await?; let mut matching_rows_input = vec![]; for (key, epoch, result) in matching_rows.into_iter() { let row_proof = prove_single_row( @@ -157,7 +157,7 @@ pub(crate) async fn prove_query( async fn get_path_info>( key: &K, tree_info: &T, - epoch: Epoch, + epoch: UserEpoch, ) -> Result<(Vec<(NodeInfo, ChildPosition)>, Vec>)> where K: Debug + Hash + Clone + Send + Sync + Eq, @@ -253,7 +253,7 @@ pub(crate) async fn prove_single_row Result { - let initial_epoch = table.index.initial_epoch(); - let current_epoch = table.index.current_epoch(); + let initial_epoch = table.index.initial_epoch().await; + let current_epoch = table.index.current_epoch().await?; let min_block = initial_epoch as BlockPrimaryIndex; let max_block = current_epoch as BlockPrimaryIndex; diff --git a/mp2-v1/tests/common/index_tree.rs b/mp2-v1/tests/common/index_tree.rs index 9fd473a28..df5eb08e3 100644 --- a/mp2-v1/tests/common/index_tree.rs +++ b/mp2-v1/tests/common/index_tree.rs @@ -4,7 +4,7 @@ use mp2_common::{poseidon::empty_poseidon_hash, proof::ProofWithVK}; use mp2_v1::{ api, indexing::{ - block::{BlockPrimaryIndex, BlockTree, BlockTreeKey}, + block::{get_previous_epoch, BlockPrimaryIndex, BlockTreeKey, MerkleIndexTree}, index::IndexNode, }, values_extraction::identifier_block_column, @@ -12,11 +12,10 @@ use mp2_v1::{ use plonky2::plonk::config::GenericHashOut; use ryhope::{ storage::{ - pgsql::PgsqlStorage, updatetree::{Next, UpdateTree}, RoEpochKvStorage, }, - MerkleTreeKvDb, + UserEpoch, }; use verifiable_db::block_tree::compute_final_digest; @@ -28,9 +27,6 @@ use super::{ TestContext, }; -pub type IndexStorage = PgsqlStorage>; -pub type MerkleIndexTree = MerkleTreeKvDb, IndexStorage>; - impl TestContext { /// NOTE: we require the added_index information because we need to distinguish if a new node /// added has a leaf or a as parent. The rest of the nodes in the update tree are to be proven @@ -170,7 +166,14 @@ impl TestContext { // here we are simply proving the new updated nodes from the new node to // the root. We fetch the same node but at the previous version of the // tree to prove the update. - let previous_node = t.try_fetch_at(k, t.current_epoch() - 1).await?.unwrap(); + let previous_epoch = + get_previous_epoch(t, t.current_epoch().await? as BlockPrimaryIndex) + .await? + .expect("No previous epoch found, we shouldn't be in this case"); + let previous_node = t + .try_fetch_at(k, previous_epoch as UserEpoch) + .await? + .unwrap(); let left_key = context.left.expect("should always be a left child"); let left_node = t.try_fetch(&left_key).await?.unwrap(); // this should be one of the nodes we just proved in this loop before diff --git a/mp2-v1/tests/common/ivc.rs b/mp2-v1/tests/common/ivc.rs index 1203ea1ff..467a9efe7 100644 --- a/mp2-v1/tests/common/ivc.rs +++ b/mp2-v1/tests/common/ivc.rs @@ -1,11 +1,13 @@ use super::{ context::TestContext, - index_tree::MerkleIndexTree, proof_storage::{IndexProofIdentifier, ProofKey, ProofStorage}, table::TableID, }; use mp2_common::{proof::ProofWithVK, types::HashOutput, F}; -use mp2_v1::{api, indexing::block::BlockPrimaryIndex}; +use mp2_v1::{ + api, + indexing::block::{get_previous_epoch, BlockPrimaryIndex, MerkleIndexTree}, +}; use plonky2::{hash::hash_types::HashOut, plonk::config::GenericHashOut}; use verifiable_db::ivc::PublicInputs; @@ -30,12 +32,13 @@ impl TestContext { // load the previous IVC proof if there is one // we simply can try to load from the storage at block -1 // TODO: generalize that to a better more generic method for any index tree - let previous_ivc_key = ProofKey::IVC(bn - 1); - let input = match self.storage.get_proof_exact(&previous_ivc_key) { - Ok(previous_proof) => { - verifiable_db::ivc::CircuitInput::new_subsequent_input(root_proof, previous_proof) - } - Err(_) => verifiable_db::ivc::CircuitInput::new_first_input(root_proof), + let previous_block = get_previous_epoch(index_tree, bn).await?; + let input = if let Some(prev_bn) = previous_block { + let previous_ivc_key = ProofKey::IVC(prev_bn); + let previous_proof = self.storage.get_proof_exact(&previous_ivc_key)?; + verifiable_db::ivc::CircuitInput::new_subsequent_input(root_proof, previous_proof) + } else { + verifiable_db::ivc::CircuitInput::new_first_input(root_proof) } .expect("unable to create ivc circuit inputs"); let ivc_proof = self diff --git a/mp2-v1/tests/common/rowtree.rs b/mp2-v1/tests/common/rowtree.rs index dfe894346..4730dd709 100644 --- a/mp2-v1/tests/common/rowtree.rs +++ b/mp2-v1/tests/common/rowtree.rs @@ -7,7 +7,7 @@ use mp2_v1::{ block::BlockPrimaryIndex, cell::Cell, index::IndexNode, - row::{RowPayload, RowTree, RowTreeKey, ToNonce}, + row::{RowTreeKey, ToNonce}, }, values_extraction::{ row_unique_data_for_mapping_leaf, row_unique_data_for_mapping_of_mappings_leaf, @@ -15,13 +15,9 @@ use mp2_v1::{ }, }; use plonky2::plonk::config::GenericHashOut; -use ryhope::{ - storage::{ - pgsql::PgsqlStorage, - updatetree::{Next, UpdateTree}, - RoEpochKvStorage, - }, - MerkleTreeKvDb, +use ryhope::storage::{ + updatetree::{Next, UpdateTree}, + RoEpochKvStorage, }; use verifiable_db::{ cells_tree, @@ -73,9 +69,6 @@ impl From<&SecondaryIndexCell> for RowTreeKey { } } -pub type RowStorage = PgsqlStorage>; -pub type MerkleRowTree = MerkleTreeKvDb, RowStorage>; - impl TestContext { /// Given a row tree (i.e. secondary index tree) and its update tree, prove /// it. diff --git a/mp2-v1/tests/common/table.rs b/mp2-v1/tests/common/table.rs index 5a5ce3b8f..5b55d3bf4 100644 --- a/mp2-v1/tests/common/table.rs +++ b/mp2-v1/tests/common/table.rs @@ -9,10 +9,12 @@ use itertools::Itertools; use log::debug; use mp2_v1::{ indexing::{ - block::{BlockPrimaryIndex, BlockTreeKey}, + block::{BlockPrimaryIndex, BlockTreeKey, MerkleIndexTree}, + build_trees, cell::{self, Cell, CellTreeKey, MerkleCell, MerkleCellTree}, index::IndexNode, - row::{CellCollection, Row, RowTreeKey}, + load_trees, + row::{CellCollection, MerkleRowTree, Row, RowTreeKey}, ColumnID, }, values_extraction::gadgets::column_info::ColumnInfo, @@ -20,13 +22,9 @@ use mp2_v1::{ use parsil::symbols::{ColumnKind, ContextProvider, ZkColumn, ZkTable}; use plonky2::field::types::PrimeField64; use ryhope::{ - storage::{ - pgsql::{SqlServerConnection, SqlStorageSettings}, - updatetree::UpdateTree, - EpochKvStorage, RoEpochKvStorage, TreeTransactionalStorage, - }, + storage::{updatetree::UpdateTree, EpochKvStorage, RoEpochKvStorage, TreeTransactionalStorage}, tree::scapegoat::Alpha, - Epoch, InitSettings, + UserEpoch, }; use serde::{Deserialize, Serialize}; use std::{hash::Hash, iter::once}; @@ -37,8 +35,6 @@ use super::{ MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, }, - index_tree::MerkleIndexTree, - rowtree::MerkleRowTree, ColumnIdentifier, }; @@ -210,24 +206,12 @@ impl Table { row_unique_id: TableRowUniqueID, ) -> Result { let db_url = std::env::var("DB_URL").unwrap_or("host=localhost dbname=storage".to_string()); - let row_tree = MerkleRowTree::new( - InitSettings::MustExist, - SqlStorageSettings { - table: row_table_name(&public_name), - source: SqlServerConnection::NewConnection(db_url.clone()), - }, - ) - .await - .unwrap(); - let index_tree = MerkleIndexTree::new( - InitSettings::MustExist, - SqlStorageSettings { - source: SqlServerConnection::NewConnection(db_url.clone()), - table: index_table_name(&public_name), - }, + let (index_tree, row_tree) = load_trees( + db_url.as_str(), + index_table_name(&public_name), + row_table_name(&public_name), ) - .await - .unwrap(); + .await?; let genesis = index_tree.storage_state().await?.shift; columns.self_assert(); @@ -246,36 +230,28 @@ impl Table { row_table_name(&self.public_name) } + pub(crate) fn index_table_name(&self) -> String { + index_table_name(&self.public_name) + } + pub async fn new( genesis_block: u64, root_table_name: String, columns: TableColumns, row_unique_id: TableRowUniqueID, - ) -> Self { + ) -> Result { let db_url = std::env::var("DB_URL").unwrap_or("host=localhost dbname=storage".to_string()); - let db_settings_index = SqlStorageSettings { - source: SqlServerConnection::NewConnection(db_url.clone()), - table: index_table_name(&root_table_name), - }; - let db_settings_row = SqlStorageSettings { - source: SqlServerConnection::NewConnection(db_url.clone()), - table: row_table_name(&root_table_name), - }; - - let row_tree = ryhope::new_row_tree( - genesis_block as Epoch, + let (index_tree, row_tree) = build_trees( + db_url.as_str(), + index_table_name(&root_table_name), + row_table_name(&root_table_name), + genesis_block as UserEpoch, Alpha::new(0.8), - db_settings_row, true, ) - .await - .unwrap(); - let index_tree = ryhope::new_index_tree(genesis_block as Epoch, db_settings_index, true) - .await - .unwrap(); - + .await?; columns.self_assert(); - Self { + Ok(Self { db_pool: new_db_pool(&db_url) .await .expect("unable to create db pool"), @@ -285,7 +261,7 @@ impl Table { public_name: root_table_name, row: row_tree, index: index_tree, - } + }) } // Function to call each time we need to build the index tree, i.e. for each row and @@ -408,8 +384,7 @@ impl Table { &mut self, new_primary: BlockPrimaryIndex, updates: Vec, - ) -> anyhow::Result { - let current_epoch = self.row.current_epoch(); + ) -> Result { let out = self .row .in_transaction(|t| { @@ -474,13 +449,6 @@ impl Table { // debugging println!("\n+++++++++++++++++++++++++++++++++\n"); let root = self.row.root_data().await?.unwrap(); - let new_epoch = self.row.current_epoch(); - assert!( - current_epoch != new_epoch, - "new epoch {} vs previous epoch {}", - new_epoch, - current_epoch - ); println!( " ++ After row update, row cell tree root tree proof hash = {:?}", hex::encode(root.cell_root_hash.unwrap().0) diff --git a/parsil/src/bracketer.rs b/parsil/src/bracketer.rs index 7a4908716..44bb9174a 100644 --- a/parsil/src/bracketer.rs +++ b/parsil/src/bracketer.rs @@ -1,12 +1,79 @@ use alloy::primitives::U256; -use ryhope::{KEY, PAYLOAD, VALID_FROM, VALID_UNTIL}; +use ryhope::{ + mapper_table_name, INCREMENTAL_EPOCH, KEY, PAYLOAD, USER_EPOCH, VALID_FROM, VALID_UNTIL, +}; use verifiable_db::query::utils::QueryBounds; use crate::{symbols::ContextProvider, ParsilSettings}; +/// Return two queries, respectively returning the largest primary index value smaller than the +/// given lower bound, and the smallest primary index value larger than the given higher bound. +/// +/// The method returns also a preliminary query to be run in order to compute the value of +/// the epoch parameter to be provided to the two queries. Such a parameter is the actual +/// epoch in the DB that corresponds to the `block_number` provided as an argument to this +/// method. Note that the epoch parameter is the same for both queries, so the preliminary +/// query can be just run once and the result used for either of the two queries. +/// +/// If the lower or higher bound are the extrema of the U256 definition domain, +/// the associated query is `None`, reflecting the impossibility for a node +/// satisfying the condition to exist in the database. +pub fn bracket_primary_index( + table_name: &str, + block_number: i64, + bounds: &QueryBounds, +) -> (String, Option, Option) { + let min_bound = bounds.min_query_primary(); + let max_bound = bounds.max_query_primary(); + let mapper_table_name = mapper_table_name(table_name); + + let preliminary_query = format!(" + SELECT {INCREMENTAL_EPOCH} as epoch FROM {mapper_table_name} WHERE {USER_EPOCH} = {block_number} LIMIT 1; + " + ); + + // A simple alias for the primary index values + let primary_index = format!("({PAYLOAD} -> 'row_tree_root_primary')::NUMERIC"); + + // Select the largest of all the primary index values that remains smaller than + // the provided primay index lower bound if it is provided. + let largest_below = if min_bound == U256::ZERO { + None + } else { + Some(format!( + "SELECT {KEY} FROM + {table_name} + WHERE {VALID_FROM} <= $1 AND {VALID_UNTIL} >= $1 + AND {primary_index} < '{min_bound}'::DECIMAL + ORDER BY {KEY} DESC LIMIT 1" + )) + }; + + // Symmetric situation for the upper bound. + let smallest_above = if max_bound == U256::MAX { + None + } else { + Some(format!( + "SELECT {KEY} FROM + {table_name} + WHERE {VALID_FROM} <= $1 AND {VALID_UNTIL} >= $1 + AND {primary_index} > '{max_bound}'::DECIMAL + ORDER BY {KEY} ASC LIMIT 1" + )) + }; + + (preliminary_query, largest_below, smallest_above) +} + /// Return two queries, respectively returning the largest sec. ind. value smaller than the /// given lower bound, and the smallest sec. ind. value larger than the given higher bound. /// +/// The method returns also a preliminary query to be run in order to compute the value of +/// the epoch parameter to be provided to the two queries. Such a parameter is the actual +/// epoch in the DB that corresponds to the `block_number` provided as an argument to this +/// method. Note that the epoch parameter is the same for both queries, so the preliminary +/// query can be just run once and the result used for either of the two queries. +/// /// If the lower or higher bound are the extrema of the U256 definition domain, /// the associated query is `None`, reflecting the impossibility for a node /// satisfying the condition to exist in the database. @@ -15,7 +82,7 @@ pub fn bracket_secondary_index( settings: &ParsilSettings, block_number: i64, bounds: &QueryBounds, -) -> (Option, Option) { +) -> (String, Option, Option) { let secondary_lo = bounds.min_query_secondary().value(); let secondary_hi = bounds.max_query_secondary().value(); _bracket_secondary_index( @@ -33,11 +100,17 @@ pub(crate) fn _bracket_secondary_index( block_number: i64, secondary_lo: &U256, secondary_hi: &U256, -) -> (Option, Option) { +) -> (String, Option, Option) { let zk_table = settings.context.fetch_table(table_name).unwrap(); let zktable_name = &zk_table.zktable_name; + let mapper_table_name = mapper_table_name(zktable_name); let sec_ind_column = zk_table.secondary_index_column().id; + let preliminary_query = format!(" + SELECT {INCREMENTAL_EPOCH} as epoch FROM {mapper_table_name} WHERE {USER_EPOCH} = {block_number} LIMIT 1; + " + ); + // A simple alias for the sec. ind. values let sec_index = format!("({PAYLOAD} -> 'cells' -> '{sec_ind_column}' ->> 'value')::NUMERIC"); @@ -46,19 +119,27 @@ pub(crate) fn _bracket_secondary_index( let largest_below = if *secondary_lo == U256::ZERO { None } else { - Some(format!("SELECT {KEY} FROM {zktable_name} - WHERE {sec_index} < '{secondary_lo}'::DECIMAL AND {VALID_FROM} <= {block_number} AND {VALID_UNTIL} >= {block_number} - ORDER BY {sec_index} DESC LIMIT 1")) + Some(format!( + "SELECT {KEY} FROM + {zktable_name} + WHERE {VALID_FROM} <= $1 AND {VALID_UNTIL} >= $1 + AND {sec_index} < '{secondary_lo}'::DECIMAL + ORDER BY {sec_index} DESC LIMIT 1" + )) }; // Symmetric situation for the upper bound. let smallest_above = if *secondary_hi == U256::MAX { None } else { - Some(format!("SELECT {KEY} FROM {zktable_name} - WHERE {sec_index} > '{secondary_hi}'::DECIMAL AND {VALID_FROM} <= {block_number} AND {VALID_UNTIL} >= {block_number} - ORDER BY {sec_index} ASC LIMIT 1")) + Some(format!( + "SELECT {KEY} FROM + {zktable_name} + WHERE {VALID_FROM} <= $1 AND {VALID_UNTIL} >= $1 + AND {sec_index} > '{secondary_hi}'::DECIMAL + ORDER BY {sec_index} ASC LIMIT 1" + )) }; - (largest_below, smallest_above) + (preliminary_query, largest_below, smallest_above) } diff --git a/parsil/src/executor.rs b/parsil/src/executor.rs index f597a4940..0f6ed18e0 100644 --- a/parsil/src/executor.rs +++ b/parsil/src/executor.rs @@ -3,11 +3,14 @@ //! row tree tables. use alloy::primitives::U256; use anyhow::*; -use ryhope::{EPOCH, KEY, PAYLOAD, VALID_FROM, VALID_UNTIL}; +use ryhope::{ + mapper_table_name, EPOCH, INCREMENTAL_EPOCH, KEY, PAYLOAD, USER_EPOCH, VALID_FROM, VALID_UNTIL, +}; use sqlparser::ast::{ BinaryOperator, CastKind, DataType, Distinct, ExactNumberInfo, Expr, Function, FunctionArg, - FunctionArgExpr, FunctionArgumentList, FunctionArguments, GroupByExpr, Ident, ObjectName, - Query, Select, SelectItem, SetExpr, TableAlias, TableFactor, TableWithJoins, Value, + FunctionArgExpr, FunctionArgumentList, FunctionArguments, GroupByExpr, Ident, Join, + JoinConstraint, JoinOperator, ObjectName, Query, Select, SelectItem, SetExpr, TableAlias, + TableFactor, TableWithJoins, Value, }; use std::collections::HashMap; use verifiable_db::query::{ @@ -17,7 +20,7 @@ use verifiable_db::query::{ use crate::{ placeholders, - symbols::{ColumnKind, ContextProvider}, + symbols::{ColumnKind, ContextProvider, ZkTable}, utils::str_to_u256, visitor::{AstMutator, VisitMut}, ParsilSettings, @@ -293,30 +296,116 @@ fn convert_funcalls(expr: &mut Expr) -> Result<()> { Ok(()) } -fn expand_block_range(settings: &ParsilSettings) -> Expr { - funcall( - "generate_series", - vec![ - funcall( - "GREATEST", - vec![ - Expr::Identifier(Ident::new(VALID_FROM)), - Expr::Value(Value::Placeholder( - settings.placeholders.min_block_placeholder.to_owned(), - )), - ], - ), - funcall( - "LEAST", - vec![ - Expr::Identifier(Ident::new(VALID_UNTIL)), - Expr::Value(Value::Placeholder( - settings.placeholders.max_block_placeholder.to_owned(), - )), - ], - ), - ], - ) +/// Build the subquery that will be used as the source of epochs and block numbers +/// in the internal queries generated by the executor visitors implemented in this module. +/// More specifically, this method builds the following JOIN table: +/// {table} JOIN ( +/// SELECT {USER_EPOCH}, {INCREMENTAL_EPOCH} FROM {mapper_table} +/// WHERE {USER_EPOCH} >= $min_block AND {USER_EPOCH} <= $max_block +/// ) ON {VALID_FROM} <= {INCREMENTAL_EPOCH} AND {VALID_UNTIL} >= {INCREMENTAL_EPOCH} +fn executor_range_table( + settings: &ParsilSettings, + table: &ZkTable, +) -> TableWithJoins { + let mapper_table_name = mapper_table_name(&table.zktable_name); + TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new(table.zktable_name.clone())]), + alias: None, + args: None, + with_hints: vec![], + version: None, + with_ordinality: false, + partitions: vec![], + }, + joins: vec![Join { + relation: TableFactor::Derived { + lateral: false, + subquery: Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(USER_EPOCH))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new( + INCREMENTAL_EPOCH, + ))), + ], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new(mapper_table_name)]), + alias: None, + args: None, + with_hints: vec![], + version: None, + with_ordinality: false, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + prewhere: None, + selection: Some(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(USER_EPOCH))), + op: BinaryOperator::GtEq, + right: Box::new(Expr::Value(Value::Placeholder( + settings.placeholders.min_block_placeholder.to_owned(), + ))), + }), + op: BinaryOperator::And, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(USER_EPOCH))), + op: BinaryOperator::LtEq, + right: Box::new(Expr::Value(Value::Placeholder( + settings.placeholders.max_block_placeholder.to_owned(), + ))), + }), + }), + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + }), + // Subqueries *MUST* have an alias in PgSQL + alias: Some(TableAlias { + name: Ident::new("_mapper"), + columns: vec![], + }), + }, + join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(VALID_FROM))), + op: BinaryOperator::LtEq, + right: Box::new(Expr::Identifier(Ident::new(INCREMENTAL_EPOCH))), + }), + op: BinaryOperator::And, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(VALID_UNTIL))), + op: BinaryOperator::GtEq, + right: Box::new(Expr::Identifier(Ident::new(INCREMENTAL_EPOCH))), + }), + })), + }], + } } /// Generate an [`Expr`] encoding for `PAYLOAD -> cells -> '{id}' -> value @@ -405,6 +494,151 @@ impl<'a, C: ContextProvider> KeyFetcher<'a, C> { Ok(()) } + + const MIN_EPOCH_ALIAS: &'static str = "min_epoch"; + const MAX_EPOCH_ALIAS: &'static str = "max_epoch"; + + fn expand_block_range() -> Expr { + funcall( + "generate_series", + vec![ + funcall( + "GREATEST", + vec![ + Expr::Identifier(Ident::new(VALID_FROM)), + Expr::Identifier(Ident::new(Self::MIN_EPOCH_ALIAS)), + ], + ), + funcall( + "LEAST", + vec![ + Expr::Identifier(Ident::new(VALID_UNTIL)), + Expr::Identifier(Ident::new(Self::MAX_EPOCH_ALIAS)), + ], + ), + ], + ) + } + + // Build the subquery that will be used as the source of epochs and block numbers + // in the internal queries generated by the executor visitors implemented in this module. + // More specifically, this method builds the following JOIN table: + // {table} JOIN ( + // SELECT MIN{INCREMENTAL_EPOCH} as {MIN_EPOCH_ALIAS}, MAX{INCREMENTAL_EPOCH} as {MAX_EPOCH_ALIAS} + // FROM {mapper_table} + // WHERE {USER_EPOCH} >= $min_block AND {USER_EPOCH} <= $max_block + // ) ON {VALID_FROM} <= {MAX_EPOCH_ALIAS} AND {VALID_UNTIL} >= {MIN_EPOCH_ALIAS} + fn range_table(&self, table: &ZkTable) -> TableWithJoins { + let mapper_table_name = mapper_table_name(&table.zktable_name); + TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new(table.zktable_name.clone())]), + alias: None, + args: None, + with_hints: vec![], + version: None, + with_ordinality: false, + partitions: vec![], + }, + joins: vec![Join { + relation: TableFactor::Derived { + lateral: false, + subquery: Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![ + SelectItem::ExprWithAlias { + expr: funcall( + "MIN", + vec![Expr::Identifier(Ident::new(INCREMENTAL_EPOCH))], + ), + alias: Ident::new(Self::MIN_EPOCH_ALIAS), + }, + SelectItem::ExprWithAlias { + expr: funcall( + "MAX", + vec![Expr::Identifier(Ident::new(INCREMENTAL_EPOCH))], + ), + alias: Ident::new(Self::MAX_EPOCH_ALIAS), + }, + ], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new(mapper_table_name)]), + alias: None, + args: None, + with_hints: vec![], + version: None, + with_ordinality: false, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + prewhere: None, + selection: Some(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(USER_EPOCH))), + op: BinaryOperator::GtEq, + right: Box::new(Expr::Value(Value::Placeholder( + self.settings.placeholders.min_block_placeholder.to_owned(), + ))), + }), + op: BinaryOperator::And, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(USER_EPOCH))), + op: BinaryOperator::LtEq, + right: Box::new(Expr::Value(Value::Placeholder( + self.settings.placeholders.max_block_placeholder.to_owned(), + ))), + }), + }), + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + }), + // Subqueries *MUST* have an alias in PgSQL + alias: Some(TableAlias { + name: Ident::new("_mapper"), + columns: vec![], + }), + }, + join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(VALID_FROM))), + op: BinaryOperator::LtEq, + right: Box::new(Expr::Identifier(Ident::new(Self::MAX_EPOCH_ALIAS))), + }), + op: BinaryOperator::And, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new(VALID_UNTIL))), + op: BinaryOperator::GtEq, + right: Box::new(Expr::Identifier(Ident::new(Self::MIN_EPOCH_ALIAS))), + }), + })), + }], + } + } } impl AstMutator for KeyFetcher<'_, C> { type Error = anyhow::Error; @@ -456,7 +690,7 @@ impl AstMutator for KeyFetcher<'_, C> { std::iter::once(SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(KEY)))) .chain(std::iter::once( SelectItem::ExprWithAlias { - expr: expand_block_range(self.settings), + expr: Self::expand_block_range(), alias: Ident::new(EPOCH) } )) @@ -469,9 +703,23 @@ impl AstMutator for KeyFetcher<'_, C> { .unwrap_or(column.name.as_str()), ); match column.kind { - // primary index column := generate_series(VALID_FROM, VALID_UNTIL) AS name + // primary index column := $MIN_BLOCK AS name. + // We return a constant value as a trick to avoid extracting USER_EPOCH from + // epoch mapper table, which would require a costly JOIN. + // Indeed, given that: + // - The filtering over the primary index have already been applied in + // the epoch mapper table + // - This column is later ignored in the overall query + // We just need to provide as block_number a column value that satisfies the + // filtering over the primary index specified in the existing query, + // which is `block_number >= $MIN_BLOCK AND block_number <= $MAX_BLOCK`, as + // any other predicate is removed from the query by the isolator + // ToDo: remove this column once we merge the new version of the isolator, + // which will remove the block_number range filtering ColumnKind::PrimaryIndex => SelectItem::ExprWithAlias { - expr: expand_block_range(self.settings), + expr: Expr::Value(Value::Placeholder( + self.settings.placeholders.min_block_placeholder.to_owned(), + )), alias, }, // other columns := payload->'cells'->'id'->'value' AS name @@ -494,18 +742,7 @@ impl AstMutator for KeyFetcher<'_, C> { top: None, projection: select_items, into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new(table.zktable_name)]), - alias: None, - args: None, - with_hints: vec![], - version: None, - with_ordinality: false, - partitions: vec![], - }, - joins: vec![], - }], + from: vec![self.range_table(&table)], lateral_views: vec![], prewhere: None, selection: None, @@ -546,6 +783,147 @@ impl AstMutator for KeyFetcher<'_, C> { } } +/// Implementation of `post_table_factor` shared both by `Executor` and by +/// `ExecutorWithKey`. If the flag `return_keys` is true, `key` and `epoch` +/// columns are returned as well as `SELECT` items in the constructed sub-query, +/// as required in the `ExecutorWithKey` implementation of `post_table_factor` +fn post_table_factor( + settings: &ParsilSettings, + table_factor: &mut TableFactor, + return_keys: bool, +) -> Result<()> { + if let Some(replacement) = match &table_factor { + TableFactor::Table { + name, alias, args, .. + } => { + // In this case, we handle + // + // ... FROM table [AS alias [(col1, // col2, ...)]] + // + // so both the table name and its columns may be aliased. + if args.is_some() { + unreachable!() + } else { + // The actual table being referenced + let concrete_table_name = &name.0[0].value; + + // Fetch all the column declared in this table + let table = settings.context.fetch_table(concrete_table_name)?; + let table_columns = &table.columns; + + // Extract the apparent table name (either the concrete one + // or its alia), and, if they exist, the aliased column + // names. + let (apparent_table_name, column_aliases) = if let Some(table_alias) = alias { + ( + table_alias.name.value.to_owned(), + if table_alias.columns.is_empty() { + None + } else { + table_alias.columns.clone().into() + }, + ) + } else { + (concrete_table_name.to_owned(), None) + }; + + // Create one `SelectItem` for each column of the table, as they have to be returned + // in `SELECT` in the constructed sub-query + let current_columns_select_items = + table_columns.iter().enumerate().map(|(i, column)| { + let alias = Ident::new( + column_aliases + .as_ref() + .map(|a| a[i].value.as_str()) + .unwrap_or(column.name.as_str()), + ); + match column.kind { + // primary index column := USER_EPOCH AS name + ColumnKind::PrimaryIndex => SelectItem::ExprWithAlias { + expr: Expr::Identifier(Ident::new(USER_EPOCH)), + alias, + }, + // other columns := PAYLOAD->'cells'->'id'->'value' AS name + ColumnKind::SecondaryIndex | ColumnKind::Standard => { + SelectItem::ExprWithAlias { + expr: fetch_from_payload(column.id), + alias, + } + } + } + }); + + let select_items = if return_keys { + // Insert the `key` and `epoch` columns in the selected values... + std::iter::once(SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(KEY)))) + .chain(std::iter::once(SelectItem::ExprWithAlias { + expr: Expr::Identifier(Ident::new(USER_EPOCH)), + alias: Ident::new(EPOCH), + })) + .chain(current_columns_select_items) + .collect() + } else { + current_columns_select_items.collect() + }; + + Some(TableFactor::Derived { + lateral: false, + subquery: Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: select_items, + into: None, + from: vec![executor_range_table(settings, &table)], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + }), + // Subqueries *MUST* have an alias in PgSQL + alias: Some(TableAlias { + name: Ident::new(apparent_table_name), + columns: vec![], + }), + }) + } + } + TableFactor::Derived { .. } => None, + TableFactor::TableFunction { .. } => todo!(), + TableFactor::Function { .. } => todo!(), + TableFactor::UNNEST { .. } => todo!(), + TableFactor::JsonTable { .. } => todo!(), + TableFactor::NestedJoin { .. } => todo!(), + TableFactor::Pivot { .. } => todo!(), + TableFactor::Unpivot { .. } => todo!(), + TableFactor::MatchRecognize { .. } => todo!(), + } { + *table_factor = replacement; + } + + Ok(()) +} + struct Executor<'a, C: ContextProvider> { settings: &'a ParsilSettings, } @@ -566,135 +944,7 @@ impl AstMutator for Executor<'_, C> { } fn post_table_factor(&mut self, table_factor: &mut TableFactor) -> Result<()> { - if let Some(replacement) = match &table_factor { - TableFactor::Table { - name, alias, args, .. - } => { - // In this case, we handle - // - // ... FROM table [AS alias [(col1, // col2, ...)]] - // - // so both the table name and its columns may be aliased. - if args.is_some() { - unreachable!() - } else { - // The actual table being referenced - let concrete_table_name = &name.0[0].value; - - // Fetch all the column declared in this table - let table = self.settings.context.fetch_table(concrete_table_name)?; - let table_columns = &table.columns; - - // Extract the apparent table name (either the concrete one - // or its alia), and, if they exist, the aliased column - // names. - let (apparent_table_name, column_aliases) = if let Some(table_alias) = alias { - ( - table_alias.name.value.to_owned(), - if table_alias.columns.is_empty() { - None - } else { - table_alias.columns.clone().into() - }, - ) - } else { - (concrete_table_name.to_owned(), None) - }; - - let select_items = table_columns - .iter() - .enumerate() - .map(|(i, column)| { - let alias = Ident::new( - column_aliases - .as_ref() - .map(|a| a[i].value.as_str()) - .unwrap_or(column.name.as_str()), - ); - match column.kind { - // primary index column := generate_series(VALID_FROM, VALID_UNTIL) AS name - ColumnKind::PrimaryIndex => SelectItem::ExprWithAlias { - expr: expand_block_range(self.settings), - alias, - }, - // other columns := PAYLOAD->'cells'->'id'->'value' AS name - ColumnKind::SecondaryIndex | ColumnKind::Standard => { - SelectItem::ExprWithAlias { - expr: fetch_from_payload(column.id), - alias, - } - } - } - }) - .collect(); - - Some(TableFactor::Derived { - lateral: false, - subquery: Box::new(Query { - with: None, - body: Box::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: select_items, - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new(table.zktable_name)]), - alias: None, - args: None, - with_hints: vec![], - version: None, - with_ordinality: false, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: None, - }))), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - }), - // Subqueries *MUST* have an alias in PgSQL - alias: Some(TableAlias { - name: Ident::new(apparent_table_name), - columns: vec![], - }), - }) - } - } - TableFactor::Derived { .. } => None, - TableFactor::TableFunction { .. } => todo!(), - TableFactor::Function { .. } => todo!(), - TableFactor::UNNEST { .. } => todo!(), - TableFactor::JsonTable { .. } => todo!(), - TableFactor::NestedJoin { .. } => todo!(), - TableFactor::Pivot { .. } => todo!(), - TableFactor::Unpivot { .. } => todo!(), - TableFactor::MatchRecognize { .. } => todo!(), - } { - *table_factor = replacement; - } - - Ok(()) + post_table_factor(self.settings, table_factor, false) } } @@ -721,10 +971,7 @@ impl AstMutator for ExecutorWithKey<'_, C> { } fn post_table_factor(&mut self, table_factor: &mut TableFactor) -> Result<()> { - let mut key_fetcher = KeyFetcher { - settings: self.settings, - }; - key_fetcher.post_table_factor(table_factor) + post_table_factor(self.settings, table_factor, true) } fn post_select(&mut self, select: &mut Select) -> Result<()> { @@ -843,49 +1090,3 @@ pub fn generate_query_keys( TranslatedQuery::make(SafeQuery::ZkQuery(key_query), settings) } - -/// Return two queries, respectively returning the largest sec. ind. value -/// smaller than the given lower bound, and the smallest sec. ind. value larger -/// than the given higher bound. -/// -/// If the lower or higher bound are the extrema of the U256 definition domain, -/// the associated query is `None`, reflecting the impossibility for a node -/// satisfying the condition to exist in the database. -pub fn bracket_secondary_index( - table_name: &str, - settings: &ParsilSettings, - block_number: i64, - secondary_lo: U256, - secondary_hi: U256, -) -> (Option, Option) { - let sec_ind_column = settings - .context - .fetch_table(table_name) - .unwrap() - .secondary_index_column() - .id; - - // A simple alias for the sec. ind. values - let sec_index = format!("({PAYLOAD} -> 'cells' -> '{sec_ind_column}' ->> 'value')::NUMERIC"); - - // Select the largest of all the sec. ind. values that remains smaller than - // the provided sec. ind. lower bound if it is provided, -1 otherwise. - let largest_below = if secondary_lo == U256::MIN { - None - } else { - Some(format!("SELECT key FROM {table_name} - WHERE {sec_index} < '{secondary_lo}'::DECIMAL AND {VALID_FROM} <= {block_number} AND {VALID_UNTIL} >= {block_number} - ORDER BY {sec_index} DESC LIMIT 1")) - }; - - // Symmetric situation for the upper bound. - let smallest_above = if secondary_hi == U256::MAX { - None - } else { - Some(format!("SELECT key FROM {table_name} - WHERE {sec_index} > '{secondary_hi}'::DECIMAL AND {VALID_FROM} <= {block_number} AND {VALID_UNTIL} >= {block_number} - ORDER BY {sec_index} ASC LIMIT 1")) - }; - - (largest_below, smallest_above) -} diff --git a/parsil/src/main.rs b/parsil/src/main.rs index 6a3978c01..79c659690 100644 --- a/parsil/src/main.rs +++ b/parsil/src/main.rs @@ -6,8 +6,9 @@ use assembler::assemble_static; use clap::{Parser, Subcommand}; use log::Level; use parsil::queries::{core_keys_for_index_tree, core_keys_for_row_tree}; -use ryhope::{tree::sbbst::NodeIdx, Epoch}; -use symbols::FileContextProvider; +use ryhope::{tree::sbbst::NodeIdx, UserEpoch}; +use sqlparser::ast::Query; +use symbols::{ContextProvider, FileContextProvider}; use utils::{parse_and_validate, ParsilSettings, PlaceholderSettings}; mod assembler; @@ -85,12 +86,14 @@ enum Command { to_keys: bool, }, Core { + /// The query to execute if tree_type is "row", or the table name if + /// tree_type is "index" #[arg(long, short = 'Q')] request: String, /// The epoch at which to run the query #[arg(short = 'E', long)] - epoch: Epoch, + epoch: UserEpoch, /// Primary index lower bound #[arg(short = 'm', long)] @@ -216,9 +219,11 @@ fn main() -> Result<()> { // todo!(), // )? } - "index" => { - core_keys_for_index_tree(epoch, (min_block as NodeIdx, max_block as NodeIdx))? - } + "index" => core_keys_for_index_tree( + epoch, + (min_block as NodeIdx, max_block as NodeIdx), + &request, + )?, _ => unreachable!(), }; diff --git a/parsil/src/queries.rs b/parsil/src/queries.rs index 506fdb731..fcf83a671 100644 --- a/parsil/src/queries.rs +++ b/parsil/src/queries.rs @@ -3,42 +3,48 @@ use crate::{keys_in_index_boundaries, symbols::ContextProvider, ParsilSettings}; use anyhow::*; -use ryhope::{tree::sbbst::NodeIdx, Epoch, EPOCH, KEY, VALID_FROM, VALID_UNTIL}; +use ryhope::{ + mapper_table_name, tree::sbbst::NodeIdx, UserEpoch, EPOCH, INCREMENTAL_EPOCH, KEY, USER_EPOCH, +}; use verifiable_db::query::{ universal_circuit::universal_circuit_inputs::Placeholders, utils::QueryBounds, }; -/// Return a query read to be injected in the wide lineage computation for the +/// Return a query ready to be injected in the wide lineage computation for the /// index tree. /// /// * execution_epoch: the epoch (block number) at which the query is executed; /// * query_epoch_bounds: the min. and max. block numbers onto which the query -/// is executed. +/// is executed; +/// * table_name: the name of the index tree table over which the query is executed; pub fn core_keys_for_index_tree( - execution_epoch: Epoch, + execution_epoch: UserEpoch, query_epoch_bounds: (NodeIdx, NodeIdx), + table_name: &str, ) -> Result { let (query_min_block, query_max_block) = query_epoch_bounds; - ensure!( - query_max_block as i64 <= execution_epoch, - "query can not be executed in the past ({} < {})", - execution_epoch, - query_max_block - ); - // Integer default to i32 in PgSQL, they must be cast to i64, a.k.a. BIGINT. - Ok(format!( - "SELECT {}::BIGINT as {EPOCH}, - generate_series( - GREATEST((SELECT MIN({VALID_FROM}))::BIGINT, {}::BIGINT), - LEAST((SELECT MAX({VALID_UNTIL}))::BIGINT, {}::BIGINT)) AS {KEY}", - execution_epoch, + let mapper_table_name = mapper_table_name(table_name); + + let (lower_epoch, higher_epoch) = ( query_min_block, query_max_block.min( execution_epoch .try_into() - .with_context(|| format!("unable to convert {} to i64", execution_epoch))? - ) + .with_context(|| format!("unable to convert {} to i64", execution_epoch))?, + ), + ); + + // Integer default to i32 in PgSQL, they must be cast to i64, a.k.a. BIGINT. + Ok(format!( + " + SELECT {execution_epoch}::BIGINT as {EPOCH}, + {USER_EPOCH} as {KEY} + FROM {mapper_table_name} + WHERE {USER_EPOCH} >= {lower_epoch}::BIGINT AND {USER_EPOCH} <= {higher_epoch}::BIGINT + AND NOT {INCREMENTAL_EPOCH} = 0 + ORDER BY {USER_EPOCH} + " )) } diff --git a/ryhope/src/error.rs b/ryhope/src/error.rs index 64e287388..ffba2fbfe 100644 --- a/ryhope/src/error.rs +++ b/ryhope/src/error.rs @@ -1,6 +1,8 @@ use thiserror::Error; use tokio_postgres::error::Error as PgError; +use crate::IncrementalEpoch; + #[derive(Error, Debug)] pub enum RyhopeError { /// An error that occured while interacting with the DB. @@ -34,6 +36,12 @@ pub enum RyhopeError { #[error("key not found in tree")] KeyNotFound, + + #[error("Current epoch is undefined: internal epoch is {0}, but no corresponding user epoch was found")] + CurrenEpochUndefined(IncrementalEpoch), + + #[error("Error in epoch mapper operation: {0}")] + EpochMapperError(String), } impl RyhopeError { pub fn from_db>(msg: S, err: PgError) -> Self { @@ -64,6 +72,10 @@ impl RyhopeError { pub fn fatal>(msg: S) -> Self { RyhopeError::Fatal(msg.as_ref().to_string()) } + + pub fn epoch_error>(msg: S) -> Self { + RyhopeError::EpochMapperError(msg.as_ref().to_string()) + } } pub fn ensure>(cond: bool, msg: S) -> Result<(), RyhopeError> { diff --git a/ryhope/src/lib.rs b/ryhope/src/lib.rs index 3caaf90f7..f493fb443 100644 --- a/ryhope/src/lib.rs +++ b/ryhope/src/lib.rs @@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, fmt::Debug, + future::Future, hash::Hash, marker::PhantomData, }; @@ -34,10 +35,30 @@ pub const EPOCH: &str = "__epoch"; pub const VALID_FROM: &str = "__valid_from"; /// The column containing the last epoch of validity of the row in the zkTable pub const VALID_UNTIL: &str = "__valid_until"; +/// The column containing epoch values that are meaningful for the user-exposed table +pub const USER_EPOCH: &str = "__user_epoch"; +/// The column containing the incremental epochs employed in the zkTable +pub const INCREMENTAL_EPOCH: &str = "__incremental_epoch"; + +/// A timestamp in a versioned storage. It corresponds to the actual epochs used internally in +/// the storage implementations, which are assumed to be sequential. +/// Using a signed type allows for easy detection & debugging of erroneous subtractions. +pub type IncrementalEpoch = i64; + +/// Represents the epochs of the storage as defined by the user. +/// The storages provided here allows to decouple these epochs from the `IncrementalEpoch`s +/// being used internally, allowing users to define epochs for the storage which are not +/// necessarily incremental. The only assumption is that these user-defined epochs +/// are monotonically increasing. +pub type UserEpoch = i64; + +pub fn mapper_table_name(table_name: &str) -> String { + format!("{}_mapper", table_name) +} -/// A timestamp in a versioned storage. Using a signed type allows for easy -/// detection & debugging of erroneous subtractions. -pub type Epoch = i64; +pub(crate) fn metadata_table_name(table_name: &str) -> String { + format!("{}_meta", table_name) +} /// A payload attached to a node, that may need to compute aggregated values /// from the bottom of the tree to the top. If not, simply do not override the @@ -73,13 +94,13 @@ pub enum InitSettings { MustNotExist(T), /// Fail to initialize if the tree already exists, create with the given /// state and starting at the given epoch otherwise. - MustNotExistAt(T, Epoch), + MustNotExistAt(T, UserEpoch), /// Ensure that the tree is re-created with the given settings, erasing it /// if it exists. Reset(T), /// Ensure that the tree is re-created with the given settings and at the /// given initial epoch, erasing it if it exists. - ResetAt(T, Epoch), + ResetAt(T, UserEpoch), } /// An `MerkleTreeKvDb` wraps together: @@ -206,7 +227,7 @@ where } /// Return the key mapped to the root of the Merkle tree at the given epoch. - pub async fn root_at(&self, epoch: Epoch) -> Result, RyhopeError> { + pub async fn root_at(&self, epoch: UserEpoch) -> Result, RyhopeError> { self.tree.root(&self.storage.view_at(epoch)).await } @@ -220,7 +241,7 @@ where } /// Return the payload of the Merkle tree root at the given epoch. - pub async fn root_data_at(&self, epoch: Epoch) -> Result, RyhopeError> { + pub async fn root_data_at(&self, epoch: UserEpoch) -> Result, RyhopeError> { Ok( if let Some(root) = self.tree.root(&self.storage.view_at(epoch)).await? { self.storage.data().try_fetch_at(&root, epoch).await? @@ -254,7 +275,7 @@ where pub async fn try_fetch_with_context_at( &self, k: &T::Key, - epoch: Epoch, + epoch: UserEpoch, ) -> Result, V)>, RyhopeError> { if let Some(ctx) = self .tree @@ -282,7 +303,7 @@ where pub async fn fetch_with_context_at( &self, k: &T::Key, - epoch: Epoch, + epoch: UserEpoch, ) -> Result, V)>, RyhopeError> { self.try_fetch_with_context_at(k, epoch).await } @@ -307,7 +328,7 @@ where pub async fn node_context_at( &self, k: &T::Key, - epoch: Epoch, + epoch: UserEpoch, ) -> Result>, RyhopeError> { self.tree .node_context(k, &self.storage.view_at(epoch)) @@ -326,7 +347,7 @@ where pub async fn lineage_at( &self, k: &T::Key, - epoch: Epoch, + epoch: UserEpoch, ) -> Result>, RyhopeError> { let s = TreeStorageView::<'_, T, S>::new(&self.storage, epoch); self.tree.lineage(k, &s).await @@ -337,7 +358,7 @@ where pub async fn ascendance_at>( &self, ks: I, - epoch: Epoch, + epoch: UserEpoch, ) -> Result, RyhopeError> { self.tree.ascendance(ks, &self.view_at(epoch)).await } @@ -350,16 +371,18 @@ where /// Return an epoch-locked, read-only, [`TreeStorage`] offering a view on /// this Merkle tree as it was at the given epoch. - pub fn view_at(&self, epoch: Epoch) -> TreeStorageView<'_, T, S> { + pub fn view_at(&self, epoch: UserEpoch) -> TreeStorageView<'_, T, S> { TreeStorageView::<'_, T, S>::new(&self.storage, epoch) } /// Return the update tree generated by the transaction defining the given /// epoch. - pub async fn diff_at(&self, epoch: Epoch) -> Result>, RyhopeError> { - if epoch > self.current_epoch() { - Ok(None) - } else { + pub async fn diff_at( + &self, + epoch: UserEpoch, + ) -> Result>, RyhopeError> { + let current_epoch = self.current_epoch().await?; + Ok(if epoch <= current_epoch { let dirtied = self.storage.born_at(epoch).await; let s = TreeStorageView::<'_, T, S>::new(&self.storage, epoch); @@ -371,8 +394,10 @@ where } let ut = UpdateTree::from_paths(paths, epoch); - Ok(Some(ut)) - } + Some(ut) + } else { + None + }) } } @@ -388,19 +413,19 @@ impl< { pub async fn wide_update_trees_at( &self, - at: Epoch, + at: UserEpoch, keys_query: &S::KeySource, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> Result>, RyhopeError> { self.storage .wide_update_trees(at, &self.tree, keys_query, bounds) .await } - pub async fn try_fetch_many_at + Send>( + pub async fn try_fetch_many_at + Send>( &self, data: I, - ) -> Result, V)>, RyhopeError> + ) -> Result, V)>, RyhopeError> where ::IntoIter: Send, { @@ -409,9 +434,9 @@ impl< pub async fn wide_lineage_between( &self, - at: Epoch, + at: UserEpoch, keys_query: &S::KeySource, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> Result, RyhopeError> { self.storage .wide_lineage_between(at, &self.tree, keys_query, bounds) @@ -432,31 +457,39 @@ impl< > RoEpochKvStorage for MerkleTreeKvDb { /// Return the first registered time stamp of the storage - fn initial_epoch(&self) -> Epoch { + fn initial_epoch(&self) -> impl Future + Send { self.storage.data().initial_epoch() } - fn current_epoch(&self) -> Epoch { + fn current_epoch(&self) -> impl Future> + Send { self.storage.data().current_epoch() } - async fn try_fetch_at(&self, k: &T::Key, epoch: Epoch) -> Result, RyhopeError> { + async fn try_fetch_at(&self, k: &T::Key, epoch: UserEpoch) -> Result, RyhopeError> { self.storage.data().try_fetch_at(k, epoch).await } - async fn size_at(&self, epoch: Epoch) -> usize { + async fn try_fetch(&self, k: &T::Key) -> Result, RyhopeError> { + self.storage.data().try_fetch(k).await + } + + async fn size_at(&self, epoch: UserEpoch) -> usize { self.storage.data().size_at(epoch).await } - async fn keys_at(&self, epoch: Epoch) -> Vec { + async fn size(&self) -> usize { + self.storage.data().size().await + } + + async fn keys_at(&self, epoch: UserEpoch) -> Vec { self.storage.data().keys_at(epoch).await } - async fn random_key_at(&self, epoch: Epoch) -> Option { + async fn random_key_at(&self, epoch: UserEpoch) -> Option { self.storage.data().random_key_at(epoch).await } - async fn pairs_at(&self, epoch: Epoch) -> Result, RyhopeError> { + async fn pairs_at(&self, epoch: UserEpoch) -> Result, RyhopeError> { self.storage.data().pairs_at(epoch).await } } @@ -505,10 +538,15 @@ impl< /// Rollback this storage to the given epoch. Please note that this is a /// destructive and irreversible operation; to merely get a view on the /// storage at a given epoch, use the `view_at` method. - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { trace!("[MerkleTreeKvDb] rolling back to {epoch}"); self.storage.rollback_to(epoch).await } + + async fn rollback(&mut self) -> Result<(), RyhopeError> { + trace!("[MerkleTreeKvDb] rolling back"); + self.storage.rollback().await + } } // Transaction-related operations must be forwared both to the node and the data @@ -522,7 +560,7 @@ impl< { async fn start_transaction(&mut self) -> Result<(), RyhopeError> { trace!("[MerkleTreeKvDb] calling start_transaction"); - self.storage.start_transaction()?; + self.storage.start_transaction().await?; Ok(()) } @@ -535,7 +573,7 @@ impl< } } - let update_tree = UpdateTree::from_paths(paths, self.current_epoch() + 1); + let update_tree = UpdateTree::from_paths(paths, self.current_epoch().await?); let plan = update_tree.clone().into_workplan(); @@ -564,7 +602,7 @@ impl< } } - let update_tree = UpdateTree::from_paths(paths, self.current_epoch() + 1); + let update_tree = UpdateTree::from_paths(paths, self.current_epoch().await?); let plan = update_tree.clone().into_workplan(); self.aggregate(plan.clone()).await?; self.storage.commit_in(tx).await?; @@ -572,14 +610,14 @@ impl< Ok(update_tree) } - fn commit_success(&mut self) { + async fn commit_success(&mut self) { trace!("[MerkleTreeKvDb] triggering commit_success"); - self.storage.commit_success() + self.storage.commit_success().await } - fn commit_failed(&mut self) { + async fn commit_failed(&mut self) { trace!("[MerkleTreeKvDb] triggering commit_failed"); - self.storage.commit_failed() + self.storage.commit_failed().await } } @@ -605,20 +643,20 @@ impl< pub async fn new_index_tree< V: NodePayload + Send + Sync, S: TransactionalStorage - + TreeStorage + + TreeStorage + PayloadStorage + FromSettings, >( - genesis_block: Epoch, + genesis_block: UserEpoch, storage_settings: S::Settings, reset_if_exist: bool, -) -> Result, RyhopeError> { +) -> Result, RyhopeError> { if genesis_block <= 0 { return Err(RyhopeError::fatal("the genesis block must be positive")); } let initial_epoch = genesis_block - 1; - let tree_settings = sbbst::Tree::with_shift(initial_epoch.try_into().unwrap()); + let tree_settings = sbbst::EpochTree::with_shift(initial_epoch.try_into().unwrap()); MerkleTreeKvDb::new( if reset_if_exist { @@ -647,7 +685,7 @@ pub async fn new_row_tree< + PayloadStorage + FromSettings>, >( - genesis_block: Epoch, + genesis_block: UserEpoch, alpha: scapegoat::Alpha, storage_settings: S::Settings, reset_if_exist: bool, diff --git a/ryhope/src/storage/memory.rs b/ryhope/src/storage/memory.rs index b6bc878fd..7715a00af 100644 --- a/ryhope/src/storage/memory.rs +++ b/ryhope/src/storage/memory.rs @@ -1,15 +1,15 @@ use serde::{Deserialize, Serialize}; -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use std::hash::Hash; use std::{collections::HashMap, fmt::Debug}; use crate::error::{ensure, RyhopeError}; use crate::tree::TreeTopology; -use crate::{Epoch, InitSettings}; +use crate::{IncrementalEpoch, InitSettings, UserEpoch}; use super::{ - EpochKvStorage, EpochStorage, FromSettings, PayloadStorage, RoEpochKvStorage, - TransactionalStorage, TreeStorage, + EpochKvStorage, EpochMapper, EpochStorage, FromSettings, PayloadStorage, RoEpochKvStorage, + RoSharedEpochMapper, SharedEpochMapper, TransactionalStorage, TreeStorage, }; /// A RAM-backed implementation of a transactional epoch storage for a single value. @@ -30,27 +30,55 @@ where in_tx: bool, /// The successive states of the persisted value. ts: Vec>, - /// The initial epoch - epoch_offset: Epoch, + /// The shared data structure used to map epochs + epoch_mapper: RoSharedEpochMapper, } impl VersionedStorage where - T: Debug + Send + Sync + Clone + Serialize + for<'a> Deserialize<'a>, + T: Debug + Send + Sync + Clone + Serialize + for<'b> Deserialize<'b>, { - fn new_at(initial_state: T, epoch: Epoch) -> Self { + fn new(initial_state: T, epoch_mapper: RoSharedEpochMapper) -> Self { Self { in_tx: false, ts: vec![Some(initial_state)], - epoch_offset: epoch, + epoch_mapper, } } + + fn inner_epoch(&self) -> IncrementalEpoch { + (self.ts.len() - 1).try_into().unwrap() + } + + fn fetch_at_incremental_epoch(&self, epoch: IncrementalEpoch) -> Result { + assert!(epoch >= 0); + self.ts[epoch as usize].clone().ok_or(RyhopeError::internal( + "No entry found in storage for epoch {epoch}", + )) + } + + fn rollback_to_incremental_epoch( + &mut self, + epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + ensure( + epoch <= self.inner_epoch(), + format!( + "unable to rollback to epoch `{}` more recent than current epoch `{}`", + epoch, + self.inner_epoch() + ), + )?; + + self.ts.resize((epoch + 1).try_into().unwrap(), None); + Ok(()) + } } impl TransactionalStorage for VersionedStorage where - T: Debug + Send + Sync + Clone + Serialize + for<'a> Deserialize<'a>, + T: Debug + Send + Sync + Clone + Serialize + for<'b> Deserialize<'b>, { - fn start_transaction(&mut self) -> Result<(), RyhopeError> { + async fn start_transaction(&mut self) -> Result<(), RyhopeError> { if self.in_tx { return Err(RyhopeError::AlreadyInTransaction); } @@ -74,17 +102,28 @@ where impl EpochStorage for VersionedStorage where - T: Debug + Send + Sync + Clone + Serialize + for<'a> Deserialize<'a>, + T: Debug + Send + Sync + Clone + Serialize + for<'b> Deserialize<'b>, { - fn current_epoch(&self) -> Epoch { - let inner_epoch: Epoch = (self.ts.len() - 1).try_into().unwrap(); - inner_epoch + self.epoch_offset + async fn current_epoch(&self) -> Result { + self.epoch_mapper + .try_to_user_epoch(self.inner_epoch()) + .await + .ok_or(RyhopeError::CurrenEpochUndefined(self.inner_epoch())) } - async fn fetch_at(&self, epoch: Epoch) -> Result { - let epoch = epoch - self.epoch_offset; - assert!(epoch >= 0); - Ok(self.ts[epoch as usize].clone().unwrap()) + async fn fetch_at(&self, epoch: UserEpoch) -> Result { + let epoch = self + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "IncrementalEpoch not found for epoch {epoch}" + )))?; + self.fetch_at_incremental_epoch(epoch) + } + + async fn fetch(&self) -> Result { + self.fetch_at_incremental_epoch(self.inner_epoch()) } async fn store(&mut self, t: T) -> Result<(), RyhopeError> { @@ -94,24 +133,21 @@ where Ok(()) } - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError> { - ensure( - epoch >= self.epoch_offset, - format!("unable to rollback before epoch {}", self.epoch_offset), - )?; - - let epoch = epoch - self.epoch_offset; - ensure( - epoch <= self.current_epoch(), - format!( - "unable to rollback to epoch `{}` more recent than current epoch `{}`", - epoch, - self.current_epoch() - ), - )?; + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { + let inner_epoch = self + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "trying to rollback to an invalid epoch {}", + epoch + )))?; + self.rollback_to_incremental_epoch(inner_epoch) + } - self.ts.resize((epoch + 1).try_into().unwrap(), None); - Ok(()) + async fn rollback(&mut self) -> Result<(), RyhopeError> { + ensure(self.inner_epoch() > 0, "unable to rollback before epoch 0")?; + self.rollback_to_incremental_epoch(self.inner_epoch() - 1) } } @@ -126,70 +162,113 @@ where /// as there is (at least for now) a usecase where a tree is non-empty at epoch /// 0. #[derive(Debug)] -pub struct VersionedKvStorage { +pub struct VersionedKvStorage< + K: Hash + Eq + Clone + Debug + Send + Sync, + V: Clone + Debug + Send + Sync, +> { /// In the diffs, the value carried by the insertion/modification of a key /// is represented as a Some, whereas a deletion is represented by /// associating k to None. mem: Vec>>, - /// The initial epoch - epoch_offset: Epoch, + /// The shared data structure used to map epochs + epoch_mapper: RoSharedEpochMapper, } -impl Default for VersionedKvStorage { +impl Default + for VersionedKvStorage +{ fn default() -> Self { Self::new() } } -impl VersionedKvStorage { +impl + VersionedKvStorage +{ pub fn new() -> Self { - Self::new_at(0) + let epoch_mapper = SharedEpochMapper::new(InMemoryEpochMapper::new_at(0)); + Self::new_with_mapper(epoch_mapper) } - pub fn new_at(initial_epoch: Epoch) -> Self { + pub fn new_with_mapper(mapper: RoSharedEpochMapper) -> Self { VersionedKvStorage { mem: vec![Default::default()], - epoch_offset: initial_epoch, + epoch_mapper: mapper, } } - pub fn new_epoch(&mut self) { + fn new_epoch(&mut self) { self.mem.push(Default::default()); } -} - -impl RoEpochKvStorage for VersionedKvStorage -where - K: Hash + Eq + Clone + Debug + Send + Sync, - V: Clone + Debug + Send + Sync, -{ - fn initial_epoch(&self) -> Epoch { - self.epoch_offset - } - fn current_epoch(&self) -> Epoch { + fn inner_epoch(&self) -> IncrementalEpoch { // There is a 1-1 mapping between the epoch and the position in the list of // diffs; epoch 0 being the initial empty state. - let inner_epoch: Epoch = (self.mem.len() - 1) as Epoch; - inner_epoch + self.epoch_offset + (self.mem.len() - 1).try_into().unwrap() } - async fn try_fetch_at(&self, k: &K, epoch: Epoch) -> Result, RyhopeError> { - assert!(epoch >= self.epoch_offset); - let epoch = epoch - self.epoch_offset; + fn try_fetch_at_incremental_epoch(&self, k: &K, epoch: IncrementalEpoch) -> Option { + assert!(epoch >= 0); // To fetch a key at a given epoch, the list of diffs up to the // requested epoch is iterated in reverse. The first occurence of k, // i.e. the most recent one, will be the current value. // // If this occurence is a None, it means that k has been deleted. - for i in (0..=epoch as usize).rev() { let maybe = self.mem[i].get(k); if let Some(found) = maybe { - return Ok(found.to_owned()); + return found.to_owned(); }; } - Ok(None) + None + } + + fn rollback_to_incremental_epoch( + &mut self, + epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + ensure(epoch >= 0, "unable to rollback before epoch 0")?; + ensure( + epoch <= self.inner_epoch(), + format!( + "unable to rollback to epoch `{}` more recent than current epoch `{}`", + epoch, + self.inner_epoch() + ), + )?; + + self.mem.truncate((epoch + 1).try_into().unwrap()); + + Ok(()) + } +} + +impl RoEpochKvStorage for VersionedKvStorage +where + K: Hash + Eq + Clone + Debug + Send + Sync, + V: Clone + Debug + Send + Sync, +{ + async fn initial_epoch(&self) -> UserEpoch { + self.epoch_mapper.to_user_epoch(0).await as UserEpoch + } + + async fn current_epoch(&self) -> Result { + self.epoch_mapper + .try_to_user_epoch(self.inner_epoch()) + .await + .ok_or(RyhopeError::CurrenEpochUndefined(self.inner_epoch())) + } + + async fn try_fetch(&self, k: &K) -> Result, RyhopeError> { + Ok(self.try_fetch_at_incremental_epoch(k, self.inner_epoch())) + } + + async fn try_fetch_at(&self, k: &K, epoch: UserEpoch) -> Result, RyhopeError> { + Ok(self + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .and_then(|inner_epoch| self.try_fetch_at_incremental_epoch(k, inner_epoch))) } // Expensive, but only used in test context. @@ -209,24 +288,24 @@ where count } - async fn size_at(&self, epoch: Epoch) -> usize { - assert!(epoch >= self.epoch_offset); - let epoch = epoch - self.epoch_offset; + async fn size_at(&self, epoch: UserEpoch) -> usize { + let inner_epoch = self.epoch_mapper.to_incremental_epoch(epoch).await; + assert!(inner_epoch >= 0); // To fetch a key at a given epoch, the list of diffs up to the let mut keys = HashSet::new(); - for i in 0..=epoch as usize { + for i in 0..=inner_epoch as usize { keys.extend(self.mem[i].keys()) } keys.len() } - async fn keys_at(&self, epoch: Epoch) -> Vec { - assert!(epoch >= self.epoch_offset); - let epoch = epoch - self.epoch_offset; + async fn keys_at(&self, epoch: UserEpoch) -> Vec { + let inner_epoch = self.epoch_mapper.to_incremental_epoch(epoch).await; + assert!(inner_epoch >= 0); let mut keys = HashSet::new(); - for i in 0..=epoch as usize { + for i in 0..=inner_epoch as usize { for (k, v) in self.mem[i].iter() { if v.is_some() { keys.insert(k); @@ -239,25 +318,36 @@ where keys.into_iter().cloned().collect() } - async fn random_key_at(&self, epoch: Epoch) -> Option { - assert!(epoch >= self.epoch_offset); - let epoch = epoch - self.epoch_offset; + async fn random_key_at(&self, epoch: UserEpoch) -> Option { + self.epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .and_then(|inner_epoch| { + assert!(inner_epoch >= 0); - for i in (0..=epoch as usize).rev() { - for (k, v) in self.mem[i].iter() { - if v.is_some() { - return Some(k.clone()); + for i in (0..=inner_epoch as usize).rev() { + for (k, v) in self.mem[i].iter() { + if v.is_some() { + return Some(k.clone()); + } + } } - } - } - None + None + }) } - async fn pairs_at(&self, epoch: Epoch) -> Result, RyhopeError> { - assert!(epoch >= self.epoch_offset); + async fn pairs_at(&self, epoch: UserEpoch) -> Result, RyhopeError> { + let inner_epoch = self + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "IncrementalEpoch not found for epoch {epoch}" + )))?; + assert!(inner_epoch >= 0); let mut pairs = HashMap::new(); - for i in 0..=epoch as usize { + for i in 0..=inner_epoch as usize { for (k, v) in self.mem[i].iter() { if let Some(v) = v.clone() { pairs.insert(k.clone(), v); @@ -292,55 +382,497 @@ where Ok(()) } - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { + let inner_epoch = self + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "Try to rollback to an invalid epoch {epoch}" + )))?; + self.rollback_to_incremental_epoch(inner_epoch) + } + + async fn rollback(&mut self) -> Result<(), RyhopeError> { + ensure(self.inner_epoch() > 0, "unable to rollback before epoch 0")?; + self.rollback_to_incremental_epoch(self.inner_epoch() - 1) + } +} + +/// Item representing a mapping between a `UserEpoch` and an `IncrementalEpoch`, which +/// is stored in an instance of `InMemoryEpochMapper`. The item can be `Complete` or +/// `Partial`, depending on whether it contains both a `UserEpoch` and an `IncrementalEpoch` +/// or only one of the 2. +/// Partial `EpochMapItem`s will never be stored as entries of `InMemoryEpochMapper`: they will +/// be employed only to implement the lookup methods defined in `EpochMapper` trait, which finds +/// the epoch mapping corresponding to either a given `UserEpoch` or a given `IncrementalEpoch`. +/// In layman terms, since both `UserEpoch`s and `IncrementalEpoch`s are expected to be monotonically +/// increasing in an epoch mapper, the epoch mappings can be easily kept sorted by both `UserEpoch` and +/// `IncrementalEpoch`. Therefore, finding an entry corresponding to a given `UserEpoch` (resp. `IncrementalEpoch`) +/// can be efficienctly done as follow: +/// - Define a Partial `EpochMapItem` wrapping the given `UserEpoch` (resp. `IncrementalEpoch`) +/// - Find the mapping with the given `UserEpoch` (resp. `IncrementalEpoch`) in the sorted set by compare +/// the defined Partial `EpochMapItem` with other entries found in the epoch mapper (which are all Complete); +/// the comparison is done by looking only at their `UserEpoch` (resp. `IncrementalEpoch`) values +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum EpochMapItem { + PartialUser(UserEpoch), + PartialIncremental(IncrementalEpoch), + Complete(UserEpoch, IncrementalEpoch), +} + +impl EpochMapItem { + /// Convert an `EpochMapItem` to the wrapped `UserEpoch` and + /// `IncrementalEpoch`. This method is expected to be called + /// only for complete `EpochMapItem`s, i.e., ones that wrap + /// both a `UserEpoch` and an `IncrementalEpoch`; + /// the method will panic if this assumption is not satisfied + fn to_epochs(self) -> (UserEpoch, IncrementalEpoch) { + if let EpochMapItem::Complete(user_epoch, incremental_epoch) = self { + (user_epoch, incremental_epoch) + } else { + panic!("Invalid `EpochMapItem` being unpacked") + } + } +} + +impl PartialOrd for EpochMapItem { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for EpochMapItem { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Implement the partial order relationship employed to compare + // `EpochMapItem`s. It is partial since by construction we will never + // compare 2 Partial `EpochMapItem`s + match (self, other) { + ( + EpochMapItem::PartialUser(first_user_epoch), + EpochMapItem::Complete(second_user_epoch, _), + ) => first_user_epoch.cmp(second_user_epoch), + ( + EpochMapItem::PartialIncremental(first_incremental_epoch), + EpochMapItem::Complete(_, second_incremental_epoch), + ) => first_incremental_epoch.cmp(second_incremental_epoch), + ( + EpochMapItem::Complete(first_user_epoch, _), + EpochMapItem::PartialUser(second_user_epoch), + ) => first_user_epoch.cmp(second_user_epoch), + ( + EpochMapItem::Complete(_, first_incremental_epoch), + EpochMapItem::PartialIncremental(second_incremental_epoch), + ) => first_incremental_epoch.cmp(second_incremental_epoch), + ( + EpochMapItem::Complete(first_user_epoch, first_incremental_epoch), + EpochMapItem::Complete(second_user_epoch, second_incremental_epoch), + ) => { + let user_epoch_cmp = first_user_epoch.cmp(second_user_epoch); + let incremental_epoch_cmp = first_incremental_epoch.cmp(second_incremental_epoch); + assert_eq!( + user_epoch_cmp, incremental_epoch_cmp, + "Breaking invariant of `EpochMapper`: both `UserEpoch` and `IncrementalEpoch` + must be monotonically increasing" + ); + user_epoch_cmp + } + _ => + // all other cases are partial `EpochMapItem`s, which are never compared + { + unreachable!() + } + } + } +} + +#[derive(Clone, Debug)] +/// Data structure employed both for in-memory implementation of an `EpochMapper`, +/// and as a memory cache for the DB-based `EpochMapper` implementation. +/// The flag `IS_CACHE` is employed to specify whether the data structure is employed +/// as a cache or as a standalone in-memory `EpochMapper`. +/// It basically handles two types of epochs mappings, depending on how the epoch maps +/// are inserted by users: +/// +/// - If the `UserEpoch`s being inserted are all incrementals, starting from an +/// initial offset, then an optimized implementation is employed for this conversion +/// - Otherwise, there is a more generic implementation that can handle any monotonically +/// increasing sequence of `UserEpoch`s +/// +/// The first implementation is used while the `UserEpoch`s being inserted followed the +/// incremental pattern; as soon as a non-incremental `UserEpoch` is inserted, then the +/// implementation falls back to the more generic generic implementation +pub struct InMemoryEpochMapperGeneric { + // Generic implementation to map monotonically increasing `UserEpoch`s to `IncrementalEpoch`s + generic_map: BTreeSet, + // Optimized implementation for incremental `UserEpoch`s + incremental_epochs_map: Option, +} +/// In-memory implementation of `EpochMapper`, which allows to map a +/// `UserEpoch` to an `IncrementalEpoch` used by storages +pub type InMemoryEpochMapper = InMemoryEpochMapperGeneric; +/// In-memory cache of the DB-based implementation of `EpochMapper` +pub(crate) type EpochMapperCache = + InMemoryEpochMapperGeneric; + +#[derive(Clone, Debug)] +/// Data structure employed to map `UserEpoch`s with `IncrementalEpoch`s in case +/// `UserEpoch`s are all sequential. In this case, it is sufficient to simply store: +/// - The initial offset to convert between `UserEpoch`s and `IncrementalEpoch`s +/// - The last inserted `UserEpoch` +struct IncrementalEpochMap { + offset: UserEpoch, + last_epoch: UserEpoch, +} + +impl + InMemoryEpochMapperGeneric +{ + pub(crate) fn new_at(initial_epoch: UserEpoch) -> Self { + // by default, we assume epochs are incremental, so we initialize + // the optimized epochs map + Self { + generic_map: BTreeSet::new(), + incremental_epochs_map: Some(IncrementalEpochMap { + offset: initial_epoch, + last_epoch: initial_epoch, + }), + } + } + + pub(crate) fn initial_epoch(&self) -> UserEpoch { + match self.incremental_epochs_map { + Some(IncrementalEpochMap { + offset: initial_epoch, + .. + }) => initial_epoch, + None => { + let (initial_epoch, initial_inner_epoch) = + self.generic_map.iter().next().expect( + "Initial epoch is always expected to be inserted at build-time in the storage", + ).to_epochs(); + assert_eq!(initial_inner_epoch, 0); + initial_epoch + } + } + } + + pub(crate) fn last_epoch(&self) -> UserEpoch { + match self.incremental_epochs_map { + Some(IncrementalEpochMap { last_epoch, .. }) => last_epoch, + None => { + self.generic_map + .iter() + .next_back() + .expect( + "No epoch found in `InMemoryEpochMapper`, + it is assumed there is always at least one epoch", + ) + .to_epochs() + .0 + } + } + } + + /// Return the maximum number of epoch mapping entries that can be stored in `self`, if any. + fn max_number_of_entries(&self) -> Option { + (IS_CACHE && self.incremental_epochs_map.is_none()).then_some(MAX_ENTRIES) + } + + fn try_to_user_epoch_inner(&self, epoch: IncrementalEpoch) -> Option { + match self.incremental_epochs_map { + Some(IncrementalEpochMap { + offset: initial_epoch, + last_epoch, + }) => { + let user_epoch = epoch + initial_epoch; + // return `user_epoch` only if it is at most `last_epoch` + (user_epoch <= last_epoch).then_some(user_epoch) + } + None => { + // To lookup an `IncrementalEpoch` in `self.generic_map`, we build + // an instance of `EpochMapItem::PartialIncremental` for the + // `IncrementalEpoch` `epoch`. + // The partial order relationship defined for `EpochMapItem` allows to + // efficiently find in the `BTreeSet` the epoch map with `IncrementalEpoch` + // corresponding to `epoch`, if any + let epoch_map_item = EpochMapItem::PartialIncremental(epoch); + self.generic_map + .get(&epoch_map_item) + .map(|item| item.to_epochs().0) + } + } + } + + /// Add a new epoch mapping for `IncrementalEpoch` `epoch`, assuming that `UserEpoch`s + /// are also computed incrementally from an initial shift. If there is already a mapping for + /// `IncrementalEpoch` `epoch`, then this function has no side effects, because it is assumed + /// that the mapping has already been provided according to another, non-incremental, logic. + /// This function returns the `UserEpoch` being mapper to `epoch`, in case a new mapping + /// is actually inserted. + pub(crate) fn new_incremental_epoch(&mut self, epoch: IncrementalEpoch) -> Option { + // compute last arbitrary epoch having been inserted in the map + let last_epoch = self.last_epoch(); + // check if `epoch` has already been inserted in the map + match self.try_to_user_epoch_inner(epoch) { + Some(matched_epoch) => { + // `epoch` has already been inserted, only check that + // `matched_epoch` corresponds to the last inserted `UserEpoch` + assert_eq!(last_epoch, matched_epoch); + None + } + None => { + // get arbitrary epoch corresponding to the new incremental epoch. + // in this implementation, it is computed assuming that also + // `UserEpoch`s are incremental, and so the epoch to be inserted + // is simply `last_epoch + 1` + let mapped_epoch = last_epoch + 1; + // add the epoch mapping to `self` + self.add_epoch(mapped_epoch, epoch) + .ok() + .map(|_| mapped_epoch) + } + } + } + + pub(crate) fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { + // first, check that we are rolling back to a valid epoch + let last_epoch = self.last_epoch(); ensure( - epoch >= self.epoch_offset, - format!("unable to rollback before epoch {}", self.epoch_offset), + epoch <= last_epoch, + "cannot rollback to epoch greater than last epoch", )?; - - let epoch = epoch - self.epoch_offset; + let initial_epoch = self.initial_epoch(); ensure( - epoch <= self.current_epoch(), - format!( - "unable to rollback to epoch `{}` more recent than current epoch `{}`", - epoch, - self.current_epoch() - ), + epoch >= initial_epoch, + "cannot rollback to epoch smaller than initial epoch", )?; + match self.incremental_epochs_map.as_mut() { + Some(IncrementalEpochMap { last_epoch, .. }) => { + *last_epoch = epoch; + } + None => { + // first, check that the epoch we are rolling back to exists + ensure( + self.generic_map.contains(&EpochMapItem::PartialUser(epoch)), + format!("Trying to rollback to non-existing epoch {epoch}"), + )?; + // now, erase all epochs greater than `epoch` + while self.generic_map.last().unwrap().to_epochs().0 > epoch { + self.generic_map.pop_last(); + } + } + } - self.mem.truncate((epoch + 1).try_into().unwrap()); + Ok(()) + } + + // Move from the optimized implementation for incremental `UserEpoch`s to the generic map + // implementation. This method is called when a request to add a non-incremental `UserEpoch` + // is detected + fn falback_to_generic_map(&mut self) { + let IncrementalEpochMap { + offset: initial_epoch, + last_epoch, + } = self.incremental_epochs_map.take().unwrap(); + self.generic_map = (initial_epoch..=last_epoch) + .enumerate() + .take(self.max_number_of_entries().unwrap_or( + usize::MAX, // this is practically unbounded + )) // fill up to the maximum number of entries allowed to be stored, if any + .map(|(i, epoch)| EpochMapItem::Complete(epoch, i as IncrementalEpoch)) + .collect(); + } + + // Add new mapping `user_epoch -> incremental_epoch` to `self` to the generic map implementation; + // this method has to be called only when the caller knows that the generic map implementation is + // used to map `UserEpoch`s to `IncrementalEpoch`s + fn add_epoch_to_generic_map( + &mut self, + user_epoch: UserEpoch, + incremental_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + // if we are replacing an existing `IncrementalEpoch`, ensure that + // we remove the old mapping entry + if let Some(epoch) = self.try_to_user_epoch_inner(incremental_epoch) { + let epoch_map_item = EpochMapItem::Complete(epoch, incremental_epoch); + self.generic_map.remove(&epoch_map_item); + } + + self.generic_map + .insert(EpochMapItem::Complete(user_epoch, incremental_epoch)); + + // check if we need to remove an item since we got to the maximum number of entries allowed + // to be stored + if let Some(max_entries) = self.max_number_of_entries() { + if self.generic_map.len() > max_entries { + // remove the second item in the mapping (as the first one contains the initial epoch) + let second_item = *self.generic_map.iter().nth(1).unwrap(); + self.generic_map.remove(&second_item); + } + } + + Ok(()) + } + + fn add_epoch( + &mut self, + user_epoch: UserEpoch, + incremental_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + match self.incremental_epochs_map { + Some(IncrementalEpochMap { + offset: initial_epoch, + last_epoch, + }) => { + ensure(user_epoch >= initial_epoch, + format!("Trying to insert an epoch {user_epoch} smaller than initial epoch {initial_epoch}") + )?; + // we need to fallback to the generic map implementation if: + // - either we are insering a new `user_epoch` which is no longer incremental + // - or we are updating the last inserted `incremental_epoch` with a bigger `user_epoch` + let last_incremental_epoch = last_epoch - initial_epoch; + if user_epoch > last_epoch + 1 + || (last_incremental_epoch == incremental_epoch && user_epoch > last_epoch) + { + // fallback to generic map + self.falback_to_generic_map(); + self.add_epoch_to_generic_map(user_epoch, incremental_epoch)?; + } else { + // In all other cases, we need to check that + // `incremental_epoch == user_epoch - initial_epoch`, to keep the epochs + // incremental + ensure(user_epoch - initial_epoch == incremental_epoch, + format!( + "Trying to insert an invalid incremental epoch: expected {}, found {incremental_epoch}", + user_epoch - initial_epoch, + ))?; + // If we are adding a new `user_epoch`, we update `last_epoch`; + // otherwise, it's a no-operation + if user_epoch == last_epoch + 1 { + self.incremental_epochs_map.as_mut().unwrap().last_epoch = user_epoch; + } + } + } + None => { + self.add_epoch_to_generic_map(user_epoch, incremental_epoch)?; + } + } Ok(()) } } +impl EpochMapper + for InMemoryEpochMapperGeneric +{ + async fn try_to_incremental_epoch(&self, epoch: UserEpoch) -> Option { + match self.incremental_epochs_map { + Some(IncrementalEpochMap { + offset: initial_epoch, + last_epoch, + }) => (epoch <= last_epoch && epoch >= initial_epoch).then(|| epoch - initial_epoch), + None => { + // To lookup an`UserEpoch` in `self.generic_map`, we build + // an instance of `EpochMapItem::PartialUser` for the + // `UserEpoch` `epoch`. + // The partial order relationship defined for `EpochMapItem` allows to + // efficiently find in the `BTreeSet` the epoch map with `UserEpoch` + // corresponding to `epoch`, if any + let epoch_map_item = EpochMapItem::PartialUser(epoch); + self.generic_map + .get(&epoch_map_item) + .map(|item| item.to_epochs().1) + } + } + } + + async fn try_to_user_epoch(&self, epoch: IncrementalEpoch) -> Option { + self.try_to_user_epoch_inner(epoch) + } + + async fn add_epoch_map( + &mut self, + user_epoch: UserEpoch, + incremental_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + self.add_epoch(user_epoch, incremental_epoch) + } +} + /// A RAM-backed storage for tree data. -pub struct InMemory { +pub struct InMemory { /// Storage for tree state. state: VersionedStorage<::State>, /// Storage for topological data. nodes: VersionedKvStorage<::Key, ::Node>, /// Storage for node-associated data. data: VersionedKvStorage<::Key, V>, + /// Mapper between used-defined epochs and internal incremental epochs + epoch_mapper: SharedEpochMapper, /// Whether a transaction is currently opened. in_tx: bool, } -impl InMemory { - pub fn new(tree_state: T::State) -> Self { - Self::new_at(tree_state, 0) + +impl + InMemory +{ + /// Initialize a new `InMemory` storage with read-only epoch mapper + pub fn new_with_mapper( + tree_state: T::State, + epoch_mapper: SharedEpochMapper, + ) -> Self { + Self { + state: VersionedStorage::new(tree_state, (&epoch_mapper).into()), + nodes: VersionedKvStorage::new_with_mapper((&epoch_mapper).into()), + data: VersionedKvStorage::new_with_mapper((&epoch_mapper).into()), + epoch_mapper, + in_tx: false, + } } - pub fn new_at(tree_state: T::State, initial_epoch: Epoch) -> Self { + pub fn new_with_epoch(tree_state: T::State, initial_epoch: UserEpoch) -> Self { + let epoch_mapper = SharedEpochMapper::new(InMemoryEpochMapper::new_at(initial_epoch)); Self { - state: VersionedStorage::new_at(tree_state, initial_epoch), - nodes: VersionedKvStorage::new_at(initial_epoch), - data: VersionedKvStorage::new_at(initial_epoch), + state: VersionedStorage::new(tree_state, (&epoch_mapper).into()), + nodes: VersionedKvStorage::new_with_mapper((&epoch_mapper).into()), + data: VersionedKvStorage::new_with_mapper((&epoch_mapper).into()), + epoch_mapper, in_tx: false, } } } -impl FromSettings for InMemory { +impl FromSettings + for InMemory +{ + type Settings = SharedEpochMapper; + + async fn from_settings( + init_settings: InitSettings, + storage_settings: Self::Settings, + ) -> Result { + match init_settings { + InitSettings::MustExist => unimplemented!(), + InitSettings::MustNotExist(tree_state) | InitSettings::Reset(tree_state) => { + Ok(Self::new_with_mapper(tree_state, storage_settings)) + } + InitSettings::MustNotExistAt(tree_state, initial_epoch) + | InitSettings::ResetAt(tree_state, initial_epoch) => { + // check that initial_epoch is in epoch_mapper + ensure( + storage_settings.read_access_ref().await.initial_epoch() == initial_epoch, + format!("Initial epoch {initial_epoch} not found in the epoch mapper provided as input") + )?; + Ok(Self::new_with_mapper(tree_state, storage_settings)) + } + } + } +} + +impl FromSettings + for InMemory +{ type Settings = (); async fn from_settings( @@ -350,17 +882,17 @@ impl FromSettings for InMemory match init_settings { InitSettings::MustExist => unimplemented!(), InitSettings::MustNotExist(tree_state) | InitSettings::Reset(tree_state) => { - Ok(Self::new(tree_state)) + Ok(Self::new_with_epoch(tree_state, 0)) } InitSettings::MustNotExistAt(tree_state, initial_epoch) | InitSettings::ResetAt(tree_state, initial_epoch) => { - Ok(Self::new_at(tree_state, initial_epoch)) + Ok(Self::new_with_epoch(tree_state, initial_epoch)) } } } } -impl TreeStorage for InMemory +impl TreeStorage for InMemory where T: TreeTopology, T::Node: Clone, @@ -368,6 +900,7 @@ where { type StateStorage = VersionedStorage; type NodeStorage = VersionedKvStorage; + type EpochMapper = SharedEpochMapper; fn nodes(&self) -> &Self::NodeStorage { &self.nodes @@ -385,28 +918,43 @@ where &mut self.state } - async fn born_at(&self, epoch: Epoch) -> Vec { - assert!(epoch >= self.nodes.epoch_offset); - self.nodes.mem[(epoch - self.nodes.epoch_offset) as usize] + async fn born_at(&self, epoch: UserEpoch) -> Vec { + let inner_epoch = self.epoch_mapper.to_incremental_epoch(epoch).await; + assert!(inner_epoch >= 0); + self.nodes.mem[inner_epoch as usize] .keys() .cloned() .collect() } - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { println!("Rolling back to {epoch}"); self.state.rollback_to(epoch).await?; self.nodes.rollback_to(epoch).await?; self.data.rollback_to(epoch).await?; - assert_eq!(self.state.current_epoch(), self.nodes.current_epoch()); - assert_eq!(self.state.current_epoch(), self.data.current_epoch()); + // Rollback epoch_mapper as well + self.epoch_mapper + .apply_fn(|mapper| mapper.rollback_to(epoch)) + .await?; + + assert_eq!(self.state.inner_epoch(), self.nodes.inner_epoch()); + assert_eq!(self.state.inner_epoch(), self.data.inner_epoch()); Ok(()) } + + fn epoch_mapper(&self) -> &Self::EpochMapper { + &self.epoch_mapper + } + + fn epoch_mapper_mut(&mut self) -> &mut Self::EpochMapper { + &mut self.epoch_mapper + } } -impl PayloadStorage<::Key, V> for InMemory +impl PayloadStorage<::Key, V> + for InMemory where T: TreeTopology, ::Key: Clone, @@ -423,20 +971,33 @@ where } } -impl TransactionalStorage for InMemory +impl TransactionalStorage for InMemory where T: TreeTopology, V: Clone + Debug + Send + Sync, { - fn start_transaction(&mut self) -> Result<(), RyhopeError> { + async fn start_transaction(&mut self) -> Result<(), RyhopeError> { if self.in_tx { return Err(RyhopeError::AlreadyInTransaction); } - self.state.start_transaction()?; + self.state.start_transaction().await?; self.data.new_epoch(); self.nodes.new_epoch(); self.in_tx = true; + + let new_epoch = self.state.inner_epoch(); + assert_eq!(new_epoch, self.nodes.inner_epoch()); + assert_eq!(new_epoch, self.data.inner_epoch()); + + // add new_epoch to epoch mapper, if it is not READ_ONLY + self.epoch_mapper + .apply_fn(|mapper| { + mapper.new_incremental_epoch(new_epoch); + Ok(()) + }) + .await?; + Ok(()) } diff --git a/ryhope/src/storage/mod.rs b/ryhope/src/storage/mod.rs index dfadd5312..74ced8915 100755 --- a/ryhope/src/storage/mod.rs +++ b/ryhope/src/storage/mod.rs @@ -7,7 +7,10 @@ use std::{ fmt::Debug, future::Future, hash::Hash, + ops::DerefMut, + sync::Arc, }; +use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use tokio_postgres::Transaction; use view::TreeStorageView; @@ -15,7 +18,7 @@ use self::updatetree::UpdateTree; use crate::{ error::RyhopeError, tree::{NodeContext, TreeTopology}, - Epoch, InitSettings, + IncrementalEpoch, InitSettings, UserEpoch, }; pub mod memory; @@ -54,10 +57,10 @@ where K: Debug + Hash + Eq + Clone + Sync + Send, { /// The keys touched by the query itself - pub core_keys: Vec<(Epoch, K)>, + pub core_keys: Vec<(UserEpoch, K)>, /// An epoch -> (K -> NodeContext, K -> Payload) mapping #[allow(clippy::type_complexity)] - epoch_lineages: HashMap>, HashMap)>, + epoch_lineages: HashMap>, HashMap)>, } impl WideLineage { @@ -68,7 +71,7 @@ impl WideLineage { self.core_keys.len() } - pub fn ctx_and_payload_at(&self, epoch: Epoch, key: &K) -> Option<(NodeContext, V)> { + pub fn ctx_and_payload_at(&self, epoch: UserEpoch, key: &K) -> Option<(NodeContext, V)> { match ( self.node_context_at(epoch, key), self.payload_at(epoch, key), @@ -77,13 +80,13 @@ impl WideLineage { _ => None, } } - pub fn node_context_at(&self, epoch: Epoch, key: &K) -> Option> { + pub fn node_context_at(&self, epoch: UserEpoch, key: &K) -> Option> { self.epoch_lineages .get(&epoch) .and_then(|h| h.0.get(key)) .cloned() } - pub fn payload_at(&self, epoch: Epoch, key: &K) -> Option { + pub fn payload_at(&self, epoch: UserEpoch, key: &K) -> Option { self.epoch_lineages .get(&epoch) .and_then(|h| h.1.get(key)) @@ -91,7 +94,7 @@ impl WideLineage { } /// Returns the list of keys touching the query associated with each epoch - pub fn keys_by_epochs(&self) -> HashMap> { + pub fn keys_by_epochs(&self) -> HashMap> { self.core_keys .iter() .fold(HashMap::new(), |mut acc, (epoch, k)| { @@ -99,7 +102,7 @@ impl WideLineage { acc }) } - pub fn update_tree_for(&self, epoch: Epoch) -> Option> { + pub fn update_tree_for(&self, epoch: UserEpoch) -> Option> { let epoch_data = self.epoch_lineages.get(&epoch)?; let all_paths = self .core_keys @@ -135,6 +138,129 @@ impl WideLineage { } } +// An `EpochMapper` allows to map `UserEpoch` to `IncrementalEpoch` of +// a `TreeStorage`, and vice versa +pub trait EpochMapper: Sized + Send + Sync + Clone + Debug { + fn try_to_incremental_epoch( + &self, + epoch: UserEpoch, + ) -> impl Future> + Send; + + fn to_incremental_epoch( + &self, + epoch: UserEpoch, + ) -> impl Future + Send { + async move { + self.try_to_incremental_epoch(epoch) + .await + .unwrap_or_else(|| panic!("IncrementalEpoch corresponding to {epoch} not found")) + } + } + + fn try_to_user_epoch( + &self, + epoch: IncrementalEpoch, + ) -> impl Future> + Send; + + fn to_user_epoch(&self, epoch: IncrementalEpoch) -> impl Future + Send { + async move { + self.try_to_user_epoch(epoch) + .await + .unwrap_or_else(|| panic!("UserEpoch corresponding to {epoch} not found")) + } + } + + fn add_epoch_map( + &mut self, + user_epoch: UserEpoch, + incremental_epoch: IncrementalEpoch, + ) -> impl Future> + Send; +} + +/// Wrapper data structure to safely use an instance of an `EpochMapper` shared among multiple +/// threads. The `READ_ONLY` flag specifies whether the wrapped `EpochMapper` can be +/// modified or not by callers of this wrapper, that is if `READ_ONLY` is `true`, then callers +// this wrapper can only access the `EpochMapper` without modifying it +#[derive(Clone, Debug)] +pub struct SharedEpochMapper(Arc>); + +pub(crate) type RoSharedEpochMapper = SharedEpochMapper; + +impl From<&SharedEpochMapper> + for RoSharedEpochMapper +{ + fn from(value: &SharedEpochMapper) -> Self { + Self(value.0.clone()) + } +} + +impl SharedEpochMapper { + pub(crate) fn new(mapper: T) -> Self { + Self(Arc::new(RwLock::new(mapper))) + } + + /// Get a writable access to the underlying `EpochMapper`, if `SharedEpochMapper` + /// is not READ_ONLY. Returns `None` if `SharedEpochMapper` is instead `READ_ONLY`. + pub(crate) async fn write_access_ref(&mut self) -> Option> { + if !READ_ONLY { + Some(self.0.write().await) + } else { + None + } + } + + pub(crate) async fn read_access_ref(&self) -> RwLockReadGuard { + self.0.read().await + } + + pub(crate) async fn apply_fn Result<(), RyhopeError>>( + &mut self, + mut f: Fn, + ) -> Result<(), RyhopeError> + where + T: 'static, + { + if let Some(mut mapper) = self.write_access_ref().await { + f(mapper.deref_mut()) + } else { + Ok(()) + } + } +} + +impl AsRef> for SharedEpochMapper { + fn as_ref(&self) -> &RwLock { + &self.0 + } +} + +impl EpochMapper for SharedEpochMapper { + async fn try_to_incremental_epoch(&self, epoch: UserEpoch) -> Option { + self.0.read().await.try_to_incremental_epoch(epoch).await + } + + async fn try_to_user_epoch(&self, epoch: IncrementalEpoch) -> Option { + self.0.read().await.try_to_user_epoch(epoch).await + } + + async fn add_epoch_map( + &mut self, + user_epoch: UserEpoch, + incremental_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + // add new epoch mapping only if `self` is not READ_ONLY + if !READ_ONLY { + self.0 + .write() + .await + .add_epoch_map(user_epoch, incremental_epoch) + .await + } else { + Ok(()) + } + } +} + /// A `TreeStorage` stores all data related to the tree structure, i.e. (i) the /// state of the tree structure, (ii) the putative metadata associated to the /// tree nodes. @@ -144,12 +270,20 @@ pub trait TreeStorage: Sized + Send + Sync { /// A storage backend for the underlying tree nodes type NodeStorage: EpochKvStorage + Send + Sync; + type EpochMapper: EpochMapper; + /// Return a handle to the state storage. fn state(&self) -> &Self::StateStorage; /// Return a mutable handle to the state storage. fn state_mut(&mut self) -> &mut Self::StateStorage; + /// Return a handle to the epoch mapper. + fn epoch_mapper(&self) -> &Self::EpochMapper; + + /// Return a mutable handle to the epoch mapper. + fn epoch_mapper_mut(&mut self) -> &mut Self::EpochMapper; + /// Return a handle to the nodes storage. fn nodes(&self) -> &Self::NodeStorage; @@ -157,19 +291,22 @@ pub trait TreeStorage: Sized + Send + Sync { fn nodes_mut(&mut self) -> &mut Self::NodeStorage; /// Return a list of the nodes “born” (i.e. dirtied) at `epoch`. - fn born_at(&self, epoch: Epoch) -> impl Future>; + fn born_at(&self, epoch: UserEpoch) -> impl Future>; /// Rollback this tree one epoch in the past - fn rollback(&mut self) -> impl Future> { - self.rollback_to(self.nodes().current_epoch() - 1) + fn rollback(&mut self) -> impl Future> { + async move { + self.rollback_to(self.nodes().current_epoch().await? - 1) + .await + } } /// Rollback this tree to the given epoch - fn rollback_to(&mut self, epoch: Epoch) -> impl Future>; + fn rollback_to(&mut self, epoch: UserEpoch) -> impl Future>; /// Return an epoch-locked, read-only, [`TreeStorage`] offering a view on /// this Merkle tree as it was at the given epoch. - fn view_at<'a>(&'a self, epoch: Epoch) -> TreeStorageView<'a, T, Self> + fn view_at<'a>(&'a self, epoch: UserEpoch) -> TreeStorageView<'a, T, Self> where T: 'a, { @@ -192,16 +329,16 @@ pub trait EpochStorage Dese where Self: Send + Sync, { - /// Return the current epoch of the storage - fn current_epoch(&self) -> Epoch; + /// Return the current epoch of the storage. It returns an error + /// if the current epoch is undefined, which might happen when the epochs + /// are handled by another storage. + fn current_epoch(&self) -> impl Future> + Send; /// Return the value stored at the current epoch. - fn fetch(&self) -> impl Future> + Send { - async { self.fetch_at(self.current_epoch()).await } - } + fn fetch(&self) -> impl Future> + Send; /// Return the value stored at the given epoch. - fn fetch_at(&self, epoch: Epoch) -> impl Future> + Send; + fn fetch_at(&self, epoch: UserEpoch) -> impl Future> + Send; /// Set the stored value at the current epoch. fn store(&mut self, t: T) -> impl Future> + Send; @@ -219,12 +356,10 @@ where } /// Roll back this storage one epoch in the past. - fn rollback(&mut self) -> impl Future> { - self.rollback_to(self.current_epoch() - 1) - } + fn rollback(&mut self) -> impl Future>; /// Roll back this storage to the given epoch - fn rollback_to(&mut self, epoch: Epoch) -> impl Future>; + fn rollback_to(&mut self, epoch: UserEpoch) -> impl Future>; } /// A read-only, versioned, KV storage. Intended to be implemented in @@ -237,23 +372,23 @@ where V: Send + Sync, { /// Return the first registered time stamp of the storage - fn initial_epoch(&self) -> Epoch; + fn initial_epoch(&self) -> impl Future + Send; - /// Return the current time stamp of the storage - fn current_epoch(&self) -> Epoch; + /// Return the current time stamp of the storage. It returns an error + /// if the current epoch is undefined, which might happen when the epochs + /// are handled by another storage. + fn current_epoch(&self) -> impl Future> + Send; /// Return the value associated to `k` at the current epoch if it exists, /// `None` otherwise. - fn try_fetch(&self, k: &K) -> impl Future, RyhopeError>> + Send { - async { self.try_fetch_at(k, self.current_epoch()).await } - } + fn try_fetch(&self, k: &K) -> impl Future, RyhopeError>> + Send; /// Return the value associated to `k` at the given `epoch` if it exists, /// `None` otherwise. fn try_fetch_at( &self, k: &K, - epoch: Epoch, + epoch: UserEpoch, ) -> impl Future, RyhopeError>> + Send; /// Return whether the given key is present at the current epoch. @@ -262,28 +397,33 @@ where } /// Return whether the given key is present at the given epoch. - fn contains_at(&self, k: &K, epoch: Epoch) -> impl Future> { + fn contains_at( + &self, + k: &K, + epoch: UserEpoch, + ) -> impl Future> { async move { self.try_fetch_at(k, epoch).await.map(|x| x.is_some()) } } /// Return the number of stored K/V pairs at the current epoch. - fn size(&self) -> impl Future { - self.size_at(self.current_epoch()) - } + fn size(&self) -> impl Future; /// Return the number of stored K/V pairs at the given epoch. - fn size_at(&self, epoch: Epoch) -> impl Future; + fn size_at(&self, epoch: UserEpoch) -> impl Future; /// Return all the keys existing at the given epoch. - fn keys_at(&self, epoch: Epoch) -> impl Future>; + fn keys_at(&self, epoch: UserEpoch) -> impl Future>; /// Return a key alive at epoch, if any. - fn random_key_at(&self, epoch: Epoch) -> impl Future>; + fn random_key_at(&self, epoch: UserEpoch) -> impl Future>; /// Return all the valid key/value pairs at the given `epoch`. /// /// NOTE: be careful when using this function, it is not lazy. - fn pairs_at(&self, epoch: Epoch) -> impl Future, RyhopeError>>; + fn pairs_at( + &self, + epoch: UserEpoch, + ) -> impl Future, RyhopeError>>; } /// A versioned KV storage only allowed to mutate entries only in the current @@ -333,20 +473,18 @@ pub trait EpochKvStorage: /// Rollback this storage one epoch back. Please note that this is a /// destructive and irreversible operation. - fn rollback(&mut self) -> impl Future> { - self.rollback_to(self.current_epoch() - 1) - } + fn rollback(&mut self) -> impl Future>; /// Rollback this storage to the given epoch. Please note that this is a /// destructive and irreversible operation. - fn rollback_to(&mut self, epoch: Epoch) -> impl Future>; + fn rollback_to(&mut self, epoch: UserEpoch) -> impl Future>; } /// Characterizes a trait allowing for epoch-based atomic updates. pub trait TransactionalStorage { /// Start a new transaction, defining a transition between the storage at /// two epochs. - fn start_transaction(&mut self) -> Result<(), RyhopeError>; + fn start_transaction(&mut self) -> impl Future>; /// Closes the current transaction and commit to the new state at the new /// epoch. @@ -363,7 +501,7 @@ pub trait TransactionalStorage { Fut: Future>, { async { - self.start_transaction()?; + self.start_transaction().await?; f(self).await?; self.commit_transaction().await } @@ -388,12 +526,12 @@ pub trait SqlTransactionStorage: TransactionalStorage { /// This hook **MUST** be called after the **SUCCESSFUL** execution of the /// transaction given to [`commit_in`]. It **MUST NOT** be called if the /// transaction execution failed. - fn commit_success(&mut self); + fn commit_success(&mut self) -> impl Future; /// This hook **MUST** be called after the **FAILED** execution of the /// transaction given to [`commit_in`]. It **MUST NOT** be called if the /// transaction execution is successful. - fn commit_failed(&mut self); + fn commit_failed(&mut self) -> impl Future; } /// Similar to [`TransactionalStorage`], but returns a [`Minitree`] of the @@ -478,12 +616,12 @@ pub trait SqlTreeTransactionalStorage impl Future; /// This hook **MUST** be called after the **FAILED** execution of the /// transaction given to [`commit_in`]. It **MUST NOT** be called if the /// transaction execution is successful. - fn commit_failed(&mut self); + fn commit_failed(&mut self) -> impl Future; } /// The meta-operations trait gathers high-level operations that may be @@ -499,18 +637,18 @@ pub trait MetaOperations: /// by the union of all the paths-to-the-root for the given keys. fn wide_lineage_between( &self, - at: Epoch, + at: UserEpoch, t: &T, keys: &Self::KeySource, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> impl Future, RyhopeError>>; fn wide_update_trees( &self, - at: Epoch, + at: UserEpoch, t: &T, keys: &Self::KeySource, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> impl Future>, RyhopeError>> { async move { let wide_lineage = self.wide_lineage_between(at, t, keys, bounds).await?; @@ -524,11 +662,11 @@ pub trait MetaOperations: } } #[allow(clippy::type_complexity)] - fn try_fetch_many_at + Send>( + fn try_fetch_many_at + Send>( &self, t: &T, data: I, - ) -> impl Future, V)>, RyhopeError>> + Send + ) -> impl Future, V)>, RyhopeError>> + Send where ::IntoIter: Send; } diff --git a/ryhope/src/storage/pgsql/epoch_mapper.rs b/ryhope/src/storage/pgsql/epoch_mapper.rs new file mode 100644 index 000000000..9c1e2fbf4 --- /dev/null +++ b/ryhope/src/storage/pgsql/epoch_mapper.rs @@ -0,0 +1,400 @@ +use anyhow::Context; +use std::{collections::BTreeSet, sync::Arc}; +use tokio::sync::RwLock; +use tokio_postgres::{Row, Transaction}; + +use crate::{ + error::{ensure, RyhopeError}, + mapper_table_name, + storage::{memory::EpochMapperCache, EpochMapper}, + IncrementalEpoch, UserEpoch, INCREMENTAL_EPOCH, USER_EPOCH, +}; + +use super::storages::DBPool; + +pub(crate) const INITIAL_INCREMENTAL_EPOCH: IncrementalEpoch = 0; + +/// Implementation of `EpochMapper` persisted to a Postgres DB +#[derive(Clone, Debug)] +pub struct EpochMapperStorage { + /// A pointer to the DB client + db: DBPool, + /// The table in which the data must be persisted + table: String, + in_tx: bool, + /// Set of `UserEpoch`s being updated in the cache since the last commit to the DB + dirty: BTreeSet, + // Internal cache used to store the mappings between `UserEpoch`s and `IncrementalEpoch`s + // already fetched from the DB. The main purpose of the cache is avoiding the need to run + // a SQL query to the DB each time an epoch translation is needed. + // The current cache implementation relies on the assumption that the epoch mapper is an + // append-only storage, that is: + // - Once a mapping between a `UserEpoch` and an `IncrementalEpoch` is add to the DB, it is + // no longer modified + // - An existing mapping between a `UserEpoch` and an `IncrementalEpoch` is never deleted, + // unless with a rollback operation + // This assumption allows to ensure that whenever a data is read from the DB and moved to the + // cache, it never gets outdated, unless a rollback occurs. + // Note that, while the underlying DB storage could be shared among multiple `EpochMapperStorage`s, + // the cache is private to each instance of `EpochMapperStorage`, and it is handled uniquely by the + // current `EpochMapperStorage`. The usage of a `RwLock` data structure to wrap the cache is only + // an implementation detail to be able to update the cache also in methods of `EpochMapper` trait + // which aren't expected to modify the `EpochMapper` + pub(super) cache: Arc>>, +} + +impl EpochMapperStorage { + /// Upper bound on the number of epoch mappings that can be stored in an `EpochMapperCache` + /// to avoid a blowup in memory consumption; the cache will be wiped as soon as the number of + /// epoch mappings found goes beyond this value + const MAX_CACHE_ENTRIES: usize = 1000000; + + pub(crate) fn mapper_table_name(&self) -> String { + mapper_table_name(&self.table) + } + + pub(crate) async fn new_from_table(table: String, db: DBPool) -> Result { + let cache = { + let connection = db + .get() + .await + .map_err(|err| RyhopeError::from_bb8("getting a connection", err))?; + let mapper_table_name = mapper_table_name(table.as_str()); + let rows = connection + .query( + &format!( + "SELECT {USER_EPOCH}, {INCREMENTAL_EPOCH} FROM {} ORDER BY {USER_EPOCH}", + mapper_table_name, + ), + &[], + ) + .await + .context("while fetching incremental epoch") + .unwrap(); + ensure( + !rows.is_empty(), + format!("Loading from empty table {mapper_table_name}"), + )?; + let read_row = |row: &Row| { + let user_epoch = row.get::<_, i64>(0) as UserEpoch; + let incremental_epoch = row.get::<_, i64>(1) as IncrementalEpoch; + (user_epoch, incremental_epoch) + }; + let (user_epoch, incremental_epoch) = read_row(&rows[0]); + ensure( + incremental_epoch == INITIAL_INCREMENTAL_EPOCH, + format!("Wrong initial epoch found in table {mapper_table_name}"), + )?; + let mut cache = EpochMapperCache::new_at(user_epoch); + for row in &rows[1..] { + let (user_epoch, incremental_epoch) = read_row(row); + cache.add_epoch_map(user_epoch, incremental_epoch).await?; + } + cache + }; + Ok(Self { + db, + table, + in_tx: false, + dirty: Default::default(), + cache: Arc::new(RwLock::new(cache)), + }) + } + + pub(crate) async fn new( + table: String, + db: DBPool, + initial_epoch: UserEpoch, + ) -> Result { + // Add initial epoch to cache + let mapper_table_name = mapper_table_name(table.as_str()); + Ok(if EXTERNAL_EPOCH_MAPPER { + // Initialize from mapper table + let mapper = Self::new_from_table(table, db).await?; + // check that there is a mapping initial_epoch -> INITIAL_INCREMENTAL_EPOCH + ensure( + mapper.try_to_incremental_epoch(initial_epoch).await + == Some(INITIAL_INCREMENTAL_EPOCH), + "No initial epoch {initial_epoch} found in mapping table {mapper_table_name}", + )?; + mapper + } else { + // add epoch map for `initial_epoch` to the DB + db.get() + .await + .map_err(|err| RyhopeError::from_bb8("getting a connection", err))? + .query( + &format!( + "INSERT INTO {} ({USER_EPOCH}, {INCREMENTAL_EPOCH}) + VALUES ($1, $2)", + mapper_table_name, + ), + &[&(initial_epoch as UserEpoch), &INITIAL_INCREMENTAL_EPOCH], + ) + .await + .map_err(|err| { + RyhopeError::from_db(format!("Inserting epochs in {mapper_table_name}"), err) + })?; + let cache = EpochMapperCache::new_at(initial_epoch); + Self { + db, + table, + in_tx: false, + dirty: Default::default(), + cache: Arc::new(RwLock::new(cache)), + } + }) + } + + /// Add a new epoch mapping for `IncrementalEpoch` `epoch`, assuming that `UserEpoch`s + /// are also computed incrementally from an initial shift. If there is already a mapping for + /// `IncrementalEpoch` `epoch`, then this function has no side effects, because it is assumed + /// that the mapping has already been provided according to another logic. + pub(crate) async fn new_incremental_epoch( + &mut self, + epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + if let Some(mapped_epoch) = self.cache.write().await.new_incremental_epoch(epoch) { + // if a new mapping is actually added to the cache, then we add the `UserEpoch` + // of this mapping to the `dirty` set, so that it is later committed to the DB + self.dirty.insert(mapped_epoch); + } + Ok(()) + } + + pub(crate) fn start_transaction(&mut self) -> Result<(), RyhopeError> { + if self.in_tx { + return Err(RyhopeError::AlreadyInTransaction); + } + self.in_tx = true; + Ok(()) + } + + pub(crate) async fn commit_in_transaction( + &mut self, + db_tx: &mut Transaction<'_>, + ) -> Result<(), RyhopeError> { + // build the set of epoch mappings (user_epoch, incremental_epoch) to be written to the DB + let mut rows_to_insert = vec![]; + for &user_epoch in self.dirty.iter() { + let incremental_epoch = self + .cache + .read() + .await + .try_to_incremental_epoch(user_epoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "Epoch {user_epoch} not found in cache" + )))?; + rows_to_insert.push(format!("({user_epoch}, {incremental_epoch})")); + } + + // Insert in the DB table with a single query + db_tx + .query( + &format!( + "INSERT INTO {} ({USER_EPOCH}, {INCREMENTAL_EPOCH}) + VALUES {}", + self.mapper_table_name(), + rows_to_insert.join(",") + ), + &[], + ) + .await + .map_err(|err| { + RyhopeError::from_db( + format!("Inserting new epochs in {}", self.mapper_table_name()), + err, + ) + })?; + + Ok(()) + } + + pub(crate) async fn latest_epoch(&self) -> UserEpoch { + // always fetch it from the DB as it might be outdated in cache + let connection = self.db.get().await.unwrap(); + let row = connection + .query_opt( + &format!( + "SELECT {USER_EPOCH}, {INCREMENTAL_EPOCH} FROM {} + WHERE {USER_EPOCH} = + (SELECT MAX({USER_EPOCH}) FROM {})", + self.mapper_table_name(), + self.mapper_table_name(), + ), + &[], + ) + .await + .context("while fetching incremental epoch") + .unwrap(); + if let Some(row) = row { + let user_epoch = row.get::<_, i64>(0) as UserEpoch; + let incremental_epoch = row.get::<_, i64>(1); + self.cache + .write() + .await + .add_epoch_map(user_epoch, incremental_epoch) + .await + .context("while adding mapping to cache") + .unwrap(); + user_epoch + } else { + unreachable!( + "There should always be at least one row in mapper table {}", + self.mapper_table_name() + ); + } + } + + pub(crate) fn commit_success(&mut self) { + self.dirty.clear(); + self.in_tx = false; + } + + pub(crate) async fn commit_failed(&mut self) { + // revert mappings inserted in the cache since the last commit. + // we rollback to the smallest epoch found in dirty, if any + if let Some(epoch) = self.dirty.pop_first() { + self.cache + .write() + .await + .rollback_to(epoch) + .expect("Cannot rollback to older epoch {epoch}"); + } + self.dirty.clear(); + self.in_tx = false; + } + + /// Rollback `self` to `UserEpoch` epoch. If `EXTERNAL_EPOCH_MAPPER` is true, then + /// this method only rollbacks the cache, as the DB is expected to be rolled back + /// by an external `EpochMapperStorage`; otherwise, the DB is also rolled back + /// by this method. Thus, this implementation of rollback currently works under the + /// assumption that the rollback operation will consistently be called also over + /// the external `EpochMapperStorage`, otherwise the rollback will not be effective + /// even for the current storage (as it will only wipe the cache, but no the DB) + pub(crate) async fn rollback_to( + &mut self, + epoch: UserEpoch, + ) -> Result<(), RyhopeError> { + // rollback the cache + self.cache.write().await.rollback_to(epoch)?; + if !EXTERNAL_EPOCH_MAPPER { + // rollback also DB + let connection = self + .db + .get() + .await + .map_err(|err| RyhopeError::from_bb8("getting connection", err))?; + connection + .query( + &format!( + "DELETE FROM {} WHERE {USER_EPOCH} > $1", + self.mapper_table_name() + ), + &[&(epoch)], + ) + .await + .map_err(|err| { + RyhopeError::from_db( + format!( + "Rolling back epoch mapper table {}", + self.mapper_table_name() + ), + err, + ) + })?; + } + + Ok(()) + } +} + +impl EpochMapper for EpochMapperStorage { + async fn try_to_incremental_epoch(&self, epoch: UserEpoch) -> Option { + let result = self + .cache + .read() + .await + .try_to_incremental_epoch(epoch) + .await; + if result.is_none() { + let connection = self.db.get().await.unwrap(); + let row = connection + .query_opt( + &format!( + "SELECT {INCREMENTAL_EPOCH} FROM {} WHERE {USER_EPOCH} = $1", + self.mapper_table_name() + ), + &[&(epoch)], + ) + .await + .context("while fetching incremental epoch") + .unwrap(); + if let Some(row) = row { + let incremental_epoch = row.get::<_, i64>(0) as IncrementalEpoch; + self.cache + .write() + .await + .add_epoch_map(epoch, incremental_epoch) + .await + .context("while adding mapping to cache") + .unwrap(); + Some(incremental_epoch) + } else { + None + } + } else { + result + } + } + + async fn try_to_user_epoch(&self, epoch: IncrementalEpoch) -> Option { + let result = self.cache.read().await.try_to_user_epoch(epoch).await; + if result.is_none() { + let connection = self.db.get().await.unwrap(); + let row = connection + .query_opt( + &format!( + "SELECT {USER_EPOCH} FROM {} WHERE {INCREMENTAL_EPOCH} = $1", + self.mapper_table_name() + ), + &[&(epoch)], + ) + .await + .context("while fetching incremental epoch") + .unwrap(); + if let Some(row) = row { + let user_epoch = row.get::<_, i64>(0) as UserEpoch; + self.cache + .write() + .await + .add_epoch_map(user_epoch, epoch) + .await + .context("while adding mapping to cache") + .unwrap(); + Some(user_epoch) + } else { + None + } + } else { + result + } + } + + async fn add_epoch_map( + &mut self, + user_epoch: UserEpoch, + incremental_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + // add to cache + self.cache + .write() + .await + .add_epoch_map(user_epoch, incremental_epoch) + .await?; + // add arbitrary epoch to dirty set + self.dirty.insert(user_epoch); + Ok(()) + } +} diff --git a/ryhope/src/storage/pgsql/mod.rs b/ryhope/src/storage/pgsql/mod.rs index 7f2208178..94a63f90c 100644 --- a/ryhope/src/storage/pgsql/mod.rs +++ b/ryhope/src/storage/pgsql/mod.rs @@ -1,28 +1,28 @@ use self::storages::{CachedDbStore, CachedDbTreeStore, DbConnector}; use super::{ - EpochStorage, FromSettings, MetaOperations, PayloadStorage, SqlTransactionStorage, - TransactionalStorage, TreeStorage, WideLineage, + EpochMapper, EpochStorage, FromSettings, MetaOperations, PayloadStorage, SharedEpochMapper, + SqlTransactionStorage, TransactionalStorage, TreeStorage, WideLineage, }; use crate::{ error::{ensure, RyhopeError}, + mapper_table_name, metadata_table_name, storage::pgsql::storages::DBPool, tree::{NodeContext, TreeTopology}, - Epoch, InitSettings, KEY, PAYLOAD, VALID_FROM, VALID_UNTIL, + IncrementalEpoch, InitSettings, UserEpoch, INCREMENTAL_EPOCH, KEY, PAYLOAD, USER_EPOCH, + VALID_FROM, VALID_UNTIL, }; use bb8_postgres::PostgresConnectionManager; +use epoch_mapper::{EpochMapperStorage, INITIAL_INCREMENTAL_EPOCH}; use futures::TryFutureExt; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use std::{ - collections::HashSet, - fmt::Debug, - future::Future, - sync::{Arc, Mutex}, -}; +use std::{collections::HashSet, fmt::Debug, future::Future, sync::Arc}; use storages::{NodeProjection, PayloadProjection}; +use tokio::sync::RwLock; use tokio_postgres::{NoTls, Transaction}; use tracing::*; +mod epoch_mapper; mod storages; const MAX_PGSQL_BIGINT: i64 = i64::MAX; @@ -102,7 +102,10 @@ pub trait PayloadInDb: Clone + Send + Sync + Debug + Serialize + for<'a> Deseria impl Deserialize<'a>> PayloadInDb for T {} /// If it exists, remove the given table from the current database. -async fn delete_storage_table(db: DBPool, table: &str) -> Result<(), RyhopeError> { +async fn delete_storage_table( + db: DBPool, + table: &str, +) -> Result<(), RyhopeError> { let connection = db.get().await.unwrap(); connection .execute(&format!("DROP TABLE IF EXISTS {}", table), &[]) @@ -110,10 +113,37 @@ async fn delete_storage_table(db: DBPool, table: &str) -> Result<(), RyhopeError .map_err(|err| RyhopeError::from_db(format!("unable to delete table `{table}`"), err)) .map(|_| ())?; connection - .execute(&format!("DROP TABLE IF EXISTS {}_meta", table), &[]) + .execute( + &format!("DROP TABLE IF EXISTS {}", metadata_table_name(table)), + &[], + ) .await .map_err(|err| RyhopeError::from_db(format!("unable to delete table `{table}`"), err)) - .map(|_| ()) + .map(|_| ())?; + if EXTERNAL_EPOCH_MAPPER { + // The epoch mapper is external, so we just need to delete the view + let mapper_table_alias = mapper_table_name(table); + connection + .execute(&format!("DROP VIEW IF EXISTS {mapper_table_alias}"), &[]) + .await + .map_err(|err| { + RyhopeError::from_db(format!("unable to delete view `{mapper_table_alias}`"), err) + }) + .map(|_| ()) + } else { + // The epoch mapper is internal, so we directly erase the table + let mapper_table_name = mapper_table_name(table); + connection + .execute( + &format!("DROP TABLE IF EXISTS {mapper_table_name} CASCADE"), + &[], + ) + .await + .map_err(|err| { + RyhopeError::from_db(format!("unable to delete table `{mapper_table_name}`"), err) + }) + .map(|_| ()) + } } /// Keeps track of which kind of operation came into the cache @@ -159,9 +189,13 @@ pub struct SqlStorageSettings { pub table: String, /// A way to connect to the DB server pub source: SqlServerConnection, + /// In case an external epoch mapper is employed for this storage, + /// this field contains the name of the table providing such an epoch mapper. + /// It is None if the epoch mapper is handled internally by the storage + pub external_mapper: Option, } -pub struct PgsqlStorage +pub struct PgsqlStorage where T: TreeTopology + DbConnector, T::Key: ToFromBytea, @@ -173,18 +207,21 @@ where /// A connection to the PostgreSQL server db: DBPool, /// The current epoch - epoch: i64, + epoch: IncrementalEpoch, + /// Epoch mapper + epoch_mapper: SharedEpochMapper, /// Tree state information state: CachedDbStore, /// Topological information - tree_store: Arc>>, + tree_store: Arc>>, nodes: NodeProjection, payloads: PayloadProjection, /// If any, the transaction progress in_tx: bool, } -impl FromSettings for PgsqlStorage +impl FromSettings + for PgsqlStorage where T: TreeTopology + DbConnector, T::Key: ToFromBytea, @@ -198,6 +235,24 @@ where init_settings: InitSettings, storage_settings: Self::Settings, ) -> Result { + // check consistency between `EXTERNAL_EPOCH_MAPPER` and `storage_settings.external_mapper`. + // This check is not relevant if `init_settings` is `MustExist`, as in this case we don't need + // to create a new mapping table or view. + if let InitSettings::MustExist = init_settings { + } else { + match ( + EXTERNAL_EPOCH_MAPPER, + storage_settings.external_mapper.is_some(), + ) { + (true, false) => Err(RyhopeError::internal( + "No external mapper table provided for a storage with external epoch mapper", + ))?, + (false, true) => Err(RyhopeError::internal( + "External mapper table provided for a storage with no external epoch mapper", + ))?, + _ => {} + } + }; match init_settings { InitSettings::MustExist => { Self::load_existing(&storage_settings.source, storage_settings.table).await @@ -208,6 +263,7 @@ where storage_settings.table, tree_state, 0, + storage_settings.external_mapper, ) .await } @@ -217,6 +273,7 @@ where storage_settings.table, tree_state, epoch, + storage_settings.external_mapper, ) .await } @@ -226,6 +283,7 @@ where storage_settings.table, tree_settings, 0, + storage_settings.external_mapper, ) .await } @@ -235,6 +293,7 @@ where storage_settings.table, tree_settings, initial_epoch, + storage_settings.external_mapper, ) .await } @@ -252,7 +311,10 @@ async fn fetch_epoch_data(db: DBPool, table: &str) -> Result<(i64, i64), RyhopeE let connection = db.get().await.unwrap(); connection .query_one( - &format!("SELECT MIN({VALID_FROM}), MAX({VALID_UNTIL}) FROM {table}_meta",), + &format!( + "SELECT MIN({VALID_FROM}), MAX({VALID_UNTIL}) FROM {}", + metadata_table_name(table) + ), &[], ) .await @@ -260,7 +322,8 @@ async fn fetch_epoch_data(db: DBPool, table: &str) -> Result<(i64, i64), RyhopeE .map_err(|err| RyhopeError::from_db("fetching current epoch data", err)) } -impl std::fmt::Display for PgsqlStorage +impl std::fmt::Display + for PgsqlStorage where T: TreeTopology + DbConnector, T::Key: ToFromBytea, @@ -272,7 +335,7 @@ where write!(f, "PgSqlStorage {}@{}", self.table, self.epoch) } } -impl PgsqlStorage +impl PgsqlStorage where T: TreeTopology + DbConnector, T::Key: ToFromBytea, @@ -288,7 +351,8 @@ where db_src: &SqlServerConnection, table: String, tree_state: T::State, - epoch: Epoch, + epoch: UserEpoch, + mapper_table: Option, ) -> Result { debug!("creating new table for `{table}` at epoch {epoch}"); let db_pool = Self::init_db_pool(db_src).await?; @@ -297,13 +361,18 @@ where fetch_epoch_data(db_pool.clone(), &table).await.is_err(), format!("table `{table}` already exists"), )?; - Self::create_tables(db_pool.clone(), &table).await?; + Self::create_tables(db_pool.clone(), &table, mapper_table).await?; + + let epoch_mapper = SharedEpochMapper::new( + EpochMapperStorage::new::(table.clone(), db_pool.clone(), epoch) + .await?, + ); - let tree_store = Arc::new(Mutex::new(CachedDbTreeStore::new( - epoch, - epoch, + let tree_store = Arc::new(RwLock::new(CachedDbTreeStore::new( + INITIAL_INCREMENTAL_EPOCH, table.clone(), db_pool.clone(), + (&epoch_mapper).into(), ))); let nodes = NodeProjection { wrapped: tree_store.clone(), @@ -315,13 +384,19 @@ where let r = Self { table: table.clone(), db: db_pool.clone(), - epoch, + epoch: 0, in_tx: false, tree_store, nodes, payloads, - state: CachedDbStore::with_value(epoch, table.clone(), db_pool.clone(), tree_state) - .await?, + state: CachedDbStore::with_value( + table.clone(), + db_pool.clone(), + tree_state, + (&epoch_mapper).into(), + ) + .await?, + epoch_mapper, }; Ok(r) } @@ -337,11 +412,31 @@ where let (initial_epoch, latest_epoch) = fetch_epoch_data(db_pool.clone(), &table).await?; debug!("loading `{table}`; latest epoch is {latest_epoch}"); - let tree_store = Arc::new(Mutex::new(CachedDbTreeStore::new( - initial_epoch, + ensure( + initial_epoch == INITIAL_INCREMENTAL_EPOCH, + format!( + "Wrong internal initial epoch found for existing table {table}: + expected {INITIAL_INCREMENTAL_EPOCH}, found {initial_epoch}" + ), + )?; + let epoch_mapper = + EpochMapperStorage::new_from_table(table.clone(), db_pool.clone()).await?; + let latest_epoch_in_mapper = epoch_mapper + .to_incremental_epoch(epoch_mapper.latest_epoch().await) + .await; + ensure( + latest_epoch_in_mapper == latest_epoch, + format!( + "Mismatch between the latest internal epoch in mapper table and the latest epoch + found in the storage: {latest_epoch_in_mapper} != {latest_epoch}" + ), + )?; + let epoch_mapper = SharedEpochMapper::new(epoch_mapper); + let tree_store = Arc::new(RwLock::new(CachedDbTreeStore::new( latest_epoch, table.clone(), db_pool.clone(), + (&epoch_mapper).into(), ))); let nodes = NodeProjection { wrapped: tree_store.clone(), @@ -354,7 +449,13 @@ where table: table.clone(), db: db_pool.clone(), epoch: latest_epoch, - state: CachedDbStore::new(initial_epoch, latest_epoch, table.clone(), db_pool.clone()), + state: CachedDbStore::new( + latest_epoch, + table.clone(), + db_pool.clone(), + (&epoch_mapper).into(), + ), + epoch_mapper, tree_store, nodes, payloads, @@ -370,19 +471,29 @@ where db_src: &SqlServerConnection, table: String, tree_state: T::State, - initial_epoch: Epoch, + initial_epoch: UserEpoch, + mapper_table: Option, ) -> Result { debug!("resetting table `{table}` at epoch {initial_epoch}"); let db_pool = Self::init_db_pool(db_src).await?; - delete_storage_table(db_pool.clone(), &table).await?; - Self::create_tables(db_pool.clone(), &table).await?; + delete_storage_table::(db_pool.clone(), &table).await?; + Self::create_tables(db_pool.clone(), &table, mapper_table).await?; - let tree_store = Arc::new(Mutex::new(CachedDbTreeStore::new( - initial_epoch, - initial_epoch, + let epoch_mapper = SharedEpochMapper::new( + EpochMapperStorage::new::( + table.clone(), + db_pool.clone(), + initial_epoch, + ) + .await?, + ); + + let tree_store = Arc::new(RwLock::new(CachedDbTreeStore::new( + INITIAL_INCREMENTAL_EPOCH, table.clone(), db_pool.clone(), + (&epoch_mapper).into(), ))); let nodes = NodeProjection { wrapped: tree_store.clone(), @@ -394,14 +505,15 @@ where let r = Self { table: table.clone(), db: db_pool.clone(), - epoch: initial_epoch, + epoch: INITIAL_INCREMENTAL_EPOCH, state: CachedDbStore::with_value( - initial_epoch, table.clone(), db_pool.clone(), tree_state, + (&epoch_mapper).into(), ) .await?, + epoch_mapper, tree_store, nodes, payloads, @@ -458,7 +570,11 @@ where /// the tree at the given epoch range. /// /// Will fail if the CREATE is not valid (e.g. the table already exists) - async fn create_tables(db: DBPool, table: &str) -> Result<(), RyhopeError> { + async fn create_tables( + db: DBPool, + table: &str, + mapper_table: Option, + ) -> Result<(), RyhopeError> { let node_columns = >::columns() .iter() .map(|(name, t)| format!("{name} {t},")) @@ -482,11 +598,42 @@ where .map(|_| ()) .map_err(|err| RyhopeError::from_db(format!("creating table `{table}`"), err))?; + // create index on `VALID_FROM` + connection + .execute( + &format!("CREATE INDEX {table}_index_from ON {table} ({VALID_FROM})"), + &[], + ) + .await + .map(|_| ()) + .map_err(|err| { + RyhopeError::from_db( + format!("unable to create index on table `{table}` for {VALID_FROM}"), + err, + ) + })?; + + // create index on `VALID_UNTIL` + connection + .execute( + &format!("CREATE INDEX {table}_index_until ON {table} ({VALID_UNTIL})"), + &[], + ) + .await + .map(|_| ()) + .map_err(|err| { + RyhopeError::from_db( + format!("unable to create index on table `{table}` for {VALID_UNTIL}"), + err, + ) + })?; + // The meta table will store everything related to the tree itself. + let meta_table = metadata_table_name(table); connection .execute( &format!( - "CREATE TABLE {table}_meta ( + "CREATE TABLE {meta_table} ( {VALID_FROM} BIGINT NOT NULL UNIQUE, {VALID_UNTIL} BIGINT DEFAULT -1, {PAYLOAD} JSONB)" @@ -495,9 +642,72 @@ where ) .await .map(|_| ()) - .map_err(|err| RyhopeError::from_db(format!("creating table `{table}_meta`"), err))?; + .map_err(|err| RyhopeError::from_db(format!("creating table `{meta_table}`"), err))?; - Ok(()) + Ok(())?; + + // create index on `VALID_UNTIL` + connection + .execute( + &format!("CREATE INDEX {meta_table}_index_until ON {meta_table} ({VALID_UNTIL})"), + &[], + ) + .await + .map(|_| ()) + .map_err(|err| { + RyhopeError::from_db( + format!("unable to create index on table `{meta_table}` for {VALID_UNTIL}"), + err, + ) + })?; + + // Create the mapper table if the mapper table is not external, otherwise + // create a view for the mapper table name expected for `table` to `mapper_table`. + if EXTERNAL_EPOCH_MAPPER { + ensure( + mapper_table.is_some(), + "No mapper table name provided for storage with external epoch mapper", + )?; + let mapper_table_alias = mapper_table_name(table); + let mapper_table_name = mapper_table_name(mapper_table.unwrap().as_str()); + connection + .execute( + &format!( + " + CREATE VIEW {mapper_table_alias} AS + SELECT {USER_EPOCH}, {INCREMENTAL_EPOCH} FROM {mapper_table_name}" + ), + &[], + ) + .await + .map(|_| ()) + .map_err(|err| { + RyhopeError::from_db( + format!("unable to create view for `{mapper_table_alias}`"), + err, + ) + }) + } else { + let mapper_table_name = mapper_table_name(table); + connection + .execute( + &format!( + "CREATE TABLE {mapper_table_name} ( + {USER_EPOCH} BIGINT NOT NULL UNIQUE, + {INCREMENTAL_EPOCH} BIGINT NOT NULL UNIQUE + )" + ), + &[], + ) + .await + .map(|_| ()) + .map_err(|err| { + RyhopeError::from_db( + format!("unable to create table `{mapper_table_name}`"), + err, + ) + }) + } } /// Close the lifetim of a row to `self.epoch`. @@ -566,33 +776,15 @@ where // Collect all the keys found in the caches let mut cached_keys = HashSet::new(); { - cached_keys.extend(self.tree_store.lock().unwrap().nodes_cache.keys().cloned()); + cached_keys.extend(self.tree_store.read().await.nodes_cache.keys().cloned()); } { - cached_keys.extend( - self.tree_store - .lock() - .map_err(|e| { - RyhopeError::fatal(format!("failed to lock tree store mutex: {e:?}")) - })? - .payload_cache - .keys() - .cloned(), - ); + cached_keys.extend(self.tree_store.read().await.payload_cache.keys().cloned()); } for k in cached_keys { - let node_value = { self.tree_store.lock().unwrap().nodes_cache.get(&k).cloned() }; - let data_value = { - self.tree_store - .lock() - .map_err(|e| { - RyhopeError::fatal(format!("failed to lock tree store mutex: {e:?}")) - })? - .payload_cache - .get(&k) - .cloned() - }; + let node_value = { self.tree_store.read().await.nodes_cache.get(&k).cloned() }; + let data_value = { self.tree_store.read().await.payload_cache.get(&k).cloned() }; match (node_value, data_value) { // Nothing or a combination of read-only operations, do nothing @@ -654,13 +846,20 @@ where (_, Some(None)) => unreachable!(), } } + // add new incremental epoch to `epoch_mapper` (unless an an epoch map for `self.epoch + 1` + // have already been added to `self.epoch_mapper`) and commit the new epoch map to DB + let new_epoch = self.epoch + 1; + if let Some(mut mapper) = self.epoch_mapper.write_access_ref().await { + mapper.new_incremental_epoch(new_epoch).await?; + mapper.commit_in_transaction(db_tx).await?; + } self.state.commit_in(db_tx).await?; trace!("[{}] commit successful.", self.table); Ok(()) } // FIXME: should return Result - fn on_commit_success(&mut self) { + async fn on_commit_success(&mut self) { assert!(self.in_tx); trace!( "[{self}] commit succesful; updating inner state - current epoch {}", @@ -668,23 +867,34 @@ where ); self.in_tx = false; self.epoch += 1; - self.state.commit_success(); - self.tree_store.lock().unwrap().new_epoch(); + self.state.commit_success().await; + self.epoch_mapper + .apply_fn(|mapper| { + mapper.commit_success(); + Ok(()) + }) + .await + .unwrap(); + self.tree_store.write().await.new_epoch(); } - fn on_commit_failed(&mut self) { + async fn on_commit_failed(&mut self) { assert!(self.in_tx); trace!( "[{self}] commit failed; updating inner state - current epoch {}", self.epoch ); self.in_tx = false; - self.state.commit_failed(); - self.tree_store.lock().unwrap().clear(); + self.state.commit_failed().await; + if let Some(mut mapper) = self.epoch_mapper.write_access_ref().await { + mapper.commit_failed().await; + } + self.tree_store.write().await.clear(); } } -impl TransactionalStorage for PgsqlStorage +impl TransactionalStorage + for PgsqlStorage where V: Send + Sync, T: DbConnector, @@ -692,13 +902,16 @@ where T::Node: Send + Sync + Clone, T::State: Send + Sync + Clone, { - fn start_transaction(&mut self) -> Result<(), RyhopeError> { + async fn start_transaction(&mut self) -> Result<(), RyhopeError> { if self.in_tx { return Err(RyhopeError::AlreadyInTransaction); } trace!("[{self}] starting a new transaction"); self.in_tx = true; - self.state.start_transaction()?; + self.epoch_mapper + .apply_fn(|mapper| mapper.start_transaction()) + .await?; + self.state.start_transaction().await?; Ok(()) } @@ -722,15 +935,16 @@ where .await .map_err(|err| RyhopeError::from_db("committing transaction", err)); if err.is_ok() { - self.on_commit_success(); + self.on_commit_success().await; } else { - self.on_commit_failed(); + self.on_commit_failed().await; } err } } -impl SqlTransactionStorage for PgsqlStorage +impl SqlTransactionStorage + for PgsqlStorage where V: Send + Sync, T: DbConnector, @@ -743,18 +957,19 @@ where self.commit_in_transaction(tx).await } - fn commit_success(&mut self) { + async fn commit_success(&mut self) { trace!("[{self}] API-facing commit_success called"); - self.on_commit_success(); + self.on_commit_success().await; } - fn commit_failed(&mut self) { + async fn commit_failed(&mut self) { trace!("[{self}] API-facing commit_failed called"); - self.on_commit_failed() + self.on_commit_failed().await } } -impl TreeStorage for PgsqlStorage +impl TreeStorage + for PgsqlStorage where T: TreeTopology + DbConnector, V: PayloadInDb + Send, @@ -764,6 +979,7 @@ where { type StateStorage = CachedDbStore; type NodeStorage = NodeProjection; + type EpochMapper = SharedEpochMapper; fn state(&self) -> &Self::StateStorage { &self.state @@ -781,12 +997,13 @@ where &mut self.nodes } - async fn born_at(&self, epoch: Epoch) -> Vec { + async fn born_at(&self, epoch: UserEpoch) -> Vec { + let inner_epoch = self.epoch_mapper.to_incremental_epoch(epoch).await; let connection = self.db.get().await.unwrap(); connection .query( &format!("SELECT {KEY} FROM {} WHERE {VALID_FROM}=$1", self.table), - &[&epoch], + &[&inner_epoch], ) .await .expect("while fetching newborns from database") @@ -795,23 +1012,64 @@ where .collect::>() } - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { self.state.rollback_to(epoch).await?; - self.tree_store.lock().unwrap().rollback_to(epoch).await?; - self.epoch = epoch; + let inner_epoch = self + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "IncrementalEpoch for epoch {epoch} not found" + )))?; + self.tree_store + .write() + .await + .rollback_to(inner_epoch) + .await?; + self.epoch = inner_epoch; + + // rollback epoch mapper + self.epoch_mapper + .as_ref() + .write() + .await + .rollback_to::(epoch) + .await?; // Ensure epochs coherence + assert_eq!(self.epoch, self.tree_store.read().await.current_epoch()); assert_eq!( - self.state.current_epoch(), - self.tree_store.lock().unwrap().current_epoch() + self.epoch_mapper + .to_incremental_epoch(self.state.current_epoch().await?) + .await, + self.epoch + ); + assert_eq!( + self.epoch_mapper + .to_incremental_epoch( + self.epoch_mapper + .read_access_ref() + .await + .latest_epoch() + .await + ) + .await, + self.epoch, ); - assert_eq!(self.state.current_epoch(), self.epoch); - Ok(()) } + + fn epoch_mapper(&self) -> &Self::EpochMapper { + &self.epoch_mapper + } + + fn epoch_mapper_mut(&mut self) -> &mut Self::EpochMapper { + &mut self.epoch_mapper + } } -impl PayloadStorage for PgsqlStorage +impl PayloadStorage + for PgsqlStorage where Self: TreeStorage, T: TreeTopology + DbConnector, @@ -832,7 +1090,8 @@ where } } -impl MetaOperations for PgsqlStorage +impl MetaOperations + for PgsqlStorage where Self: TreeStorage, T: TreeTopology + DbConnector, @@ -846,10 +1105,10 @@ where async fn wide_lineage_between( &self, - at: Epoch, + at: UserEpoch, t: &T, keys: &Self::KeySource, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> Result, RyhopeError> { let r = t .wide_lineage_between( @@ -864,16 +1123,26 @@ where Ok(r) } - fn try_fetch_many_at::Key)> + Send>( + fn try_fetch_many_at::Key)> + Send>( &self, t: &T, data: I, - ) -> impl Future, V)>, RyhopeError>> + Send + ) -> impl Future, V)>, RyhopeError>> + Send where ::IntoIter: Send, { trace!("[{self}] fetching many contexts & payloads",); let table = self.table.to_owned(); - async move { t.fetch_many_at(self, self.db.clone(), &table, data).await } + async move { + let mut data_with_incremental_epochs = vec![]; + for (epoch, key) in data { + // add current (epoch, key) pair to data to be fetched only if `epoch` is found in the epoch mapper + if let Some(inner_epoch) = self.epoch_mapper.try_to_incremental_epoch(epoch).await { + data_with_incremental_epochs.push((epoch, inner_epoch, key)); + } + } + t.fetch_many_at(self, self.db.clone(), &table, data_with_incremental_epochs) + .await + } } } diff --git a/ryhope/src/storage/pgsql/storages.rs b/ryhope/src/storage/pgsql/storages.rs index c61d02eea..d9fc04e6c 100644 --- a/ryhope/src/storage/pgsql/storages.rs +++ b/ryhope/src/storage/pgsql/storages.rs @@ -1,14 +1,16 @@ use crate::{ error::{ensure, RyhopeError}, + mapper_table_name, storage::{ - EpochKvStorage, EpochStorage, RoEpochKvStorage, SqlTransactionStorage, - TransactionalStorage, TreeStorage, WideLineage, + EpochKvStorage, EpochMapper, EpochStorage, RoEpochKvStorage, RoSharedEpochMapper, + SqlTransactionStorage, TransactionalStorage, TreeStorage, WideLineage, }, tree::{ sbbst::{self, NodeIdx}, scapegoat, NodeContext, TreeTopology, }, - Epoch, EPOCH, KEY, PAYLOAD, VALID_FROM, VALID_UNTIL, + IncrementalEpoch, UserEpoch, EPOCH, INCREMENTAL_EPOCH, KEY, PAYLOAD, USER_EPOCH, VALID_FROM, + VALID_UNTIL, }; use bb8::Pool; use bb8_postgres::PostgresConnectionManager; @@ -20,13 +22,16 @@ use std::{ fmt::Debug, future::Future, marker::PhantomData, - sync::{Arc, Mutex}, + sync::Arc, }; use tokio::sync::RwLock; use tokio_postgres::{self, NoTls, Row, Transaction}; use tracing::*; -use super::{CachedValue, PayloadInDb, ToFromBytea, MAX_PGSQL_BIGINT}; +use super::{ + epoch_mapper::{EpochMapperStorage, INITIAL_INCREMENTAL_EPOCH}, + metadata_table_name, CachedValue, PayloadInDb, ToFromBytea, MAX_PGSQL_BIGINT, +}; pub type DBPool = Pool>; @@ -66,7 +71,7 @@ where db_tx: &tokio_postgres::Transaction<'_>, table: &str, k: &Self::Key, - birth_epoch: Epoch, + birth_epoch: IncrementalEpoch, v: &Self::Node, ) -> impl Future>; @@ -76,7 +81,7 @@ where db_tx: &tokio_postgres::Transaction<'_>, table: &str, k: &Self::Key, - epoch: Epoch, + epoch: IncrementalEpoch, v: V, ) -> impl Future> { async move { @@ -99,13 +104,13 @@ where db: DBPool, table: &str, k: &Self::Key, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> impl Future, RyhopeError>> + Send; fn fetch_all_keys( db: DBPool, table: &str, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> impl Future, RyhopeError>> + Send { async move { let connection = db.get().await.unwrap(); @@ -129,7 +134,7 @@ where fn fetch_a_key( db: DBPool, table: &str, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> impl Future, RyhopeError>> + Send { async move { let connection = db.get().await.unwrap(); @@ -153,7 +158,7 @@ where fn fetch_all_pairs( db: DBPool, table: &str, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> impl Future, RyhopeError>> + std::marker::Send { async move { let connection = db.get().await.unwrap(); @@ -178,7 +183,7 @@ where db: DBPool, table: &str, k: &Self::Key, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> impl std::future::Future, RyhopeError>> + std::marker::Send { async move { let connection = db.get().await.unwrap(); @@ -217,23 +222,27 @@ where db: DBPool, table: &str, keys_query: &str, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), // we keep `UserEpoch` here because we need to do ranges + // over epochs in this operation ) -> impl Future, RyhopeError>>; /// Return the value associated to the given key at the given epoch. #[allow(clippy::type_complexity)] - fn fetch_many_at, I: IntoIterator + Send>( + fn fetch_many_at< + S: TreeStorage, + I: IntoIterator + Send, + >( &self, s: &S, db: DBPool, table: &str, data: I, - ) -> impl Future, V)>, RyhopeError>> + Send; + ) -> impl Future, V)>, RyhopeError>> + Send; } /// Implementation of a [`DbConnector`] for a tree over `K` with empty nodes. /// Only applies to the SBBST for now. -impl DbConnector for sbbst::Tree +impl DbConnector for sbbst::Tree where V: Debug + Clone + Send + Sync + Serialize + for<'a> Deserialize<'a>, { @@ -245,7 +254,7 @@ where db: DBPool, table: &str, k: &NodeIdx, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> Result, RyhopeError> { let connection = db.get().await.unwrap(); connection @@ -271,7 +280,7 @@ where db_tx: &tokio_postgres::Transaction<'_>, table: &str, k: &NodeIdx, - birth_epoch: Epoch, + birth_epoch: IncrementalEpoch, _n: &(), ) -> Result<(), RyhopeError> { db_tx @@ -295,17 +304,14 @@ where db: DBPool, table: &str, keys_query: &str, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> Result, RyhopeError> { - // In the SBBST case, parsil will not be able to inject the table name; - // so we do it here. - let keys_query = format!("{keys_query} FROM {table}"); // Execute `keys_query` to retrieve the core keys from the DB let core_keys = db .get() .await .map_err(|err| RyhopeError::from_bb8("getting a connection", err))? - .query(&keys_query, &[]) + .query(&keys_query.to_string(), &[]) .await .map_err(|err| { RyhopeError::from_db( @@ -327,12 +333,20 @@ where } // Fetch all the payloads for the wide lineage in one fell swoop - let payload_query = format!( - "SELECT - {KEY}, generate_series(GREATEST({VALID_FROM}, $1), LEAST({VALID_UNTIL}, $2)) AS epoch, {PAYLOAD} - FROM {table} - WHERE NOT ({VALID_FROM} > $2 OR {VALID_UNTIL} < $1) AND {KEY} = ANY($3)", - ); + let mapper_table_name = mapper_table_name(table); + let payload_query = format!(" + SELECT + {KEY}, + generate_series(GREATEST({VALID_FROM}, min_epoch), LEAST({VALID_UNTIL}, max_epoch)) AS epoch, + {PAYLOAD} + FROM {table} CROSS JOIN + (SELECT MIN({INCREMENTAL_EPOCH}) as min_epoch, MAX({INCREMENTAL_EPOCH}) as max_epoch + FROM {mapper_table_name} + WHERE {USER_EPOCH} >= $1 AND {USER_EPOCH} <= $2) as mapper_range + WHERE {VALID_FROM} <= mapper_range.max_epoch AND {VALID_UNTIL} >= mapper_range.min_epoch + AND {KEY} = ANY($3) + ; + "); let rows = db .get() .await @@ -354,11 +368,19 @@ where // Assemble the final result #[allow(clippy::type_complexity)] let mut epoch_lineages: HashMap< - Epoch, + UserEpoch, (HashMap>, HashMap), > = HashMap::new(); for row in &rows { let epoch = row.get::<_, i64>("epoch"); + // convert incremental epoch to user epoch + let epoch = s + .epoch_mapper() + .try_to_user_epoch(epoch as IncrementalEpoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "UserEpoch corresponding to epoch {epoch} not found" + )))?; let key = NodeIdx::from_bytea(row.get::<_, Vec>(KEY)); let payload = Self::payload_from_row(row)?; @@ -377,21 +399,20 @@ where async fn fetch_many_at< S: TreeStorage, - I: IntoIterator + Send, + I: IntoIterator + Send, >( &self, s: &S, db: DBPool, table: &str, data: I, - ) -> Result, V)>, RyhopeError> { - let data = data.into_iter().collect::>(); + ) -> Result, V)>, RyhopeError> { let connection = db.get().await.unwrap(); let immediate_table = data - .iter() - .map(|(epoch, key)| { + .into_iter() + .map(|(user_epoch, incremental_epoch, key)| { format!( - "({epoch}::BIGINT, '\\x{}'::BYTEA)", + "({user_epoch}::BIGINT, {incremental_epoch}::BIGINT, '\\x{}'::BYTEA)", hex::encode(key.to_bytea()) ) }) @@ -399,26 +420,28 @@ where let mut r = Vec::new(); for row in connection - .query( - &dbg!(format!( - "SELECT batch.key, batch.epoch, {table}.{PAYLOAD} FROM - (VALUES {}) AS batch (epoch, key) + .query( + &dbg!(format!( + "SELECT batch.key, batch.user_epoch, {table}.{PAYLOAD} FROM + (VALUES {}) AS batch (user_epoch, incremental_epoch, key) LEFT JOIN {table} ON - batch.key = {table}.{KEY} AND {table}.{VALID_FROM} <= batch.epoch AND batch.epoch <= {table}.{VALID_UNTIL}", - immediate_table - )), - &[], - ) + batch.key = {table}.{KEY} AND {table}.{VALID_FROM} <= batch.incremental_epoch + AND batch.incremental_epoch <= {table}.{VALID_UNTIL}", + immediate_table + )), + &[], + ) .await .map_err(|err| RyhopeError::from_db("fetching payload from DB", err))? - .iter() { - let k = Self::Key::from_bytea(row.get::<_, Vec>(0)); - let epoch = row.get::<_, Epoch>(1); - let v = row.get::<_, Option>>(2).map(|x| x.0); - if let Some(v) = v { - r.push((epoch, self.node_context(&k, s).await?.unwrap() , v)); - } + .iter() + { + let k = Self::Key::from_bytea(row.get::<_, Vec>(0)); + let epoch = row.get::<_, UserEpoch>(1); + let v = row.get::<_, Option>>(2).map(|x| x.0); + if let Some(v) = v { + r.push((epoch, self.node_context(&k, s).await?.unwrap(), v)); } + } Ok(r) } } @@ -450,7 +473,7 @@ where db: DBPool, table: &str, k: &K, - epoch: Epoch, + epoch: IncrementalEpoch, ) -> Result, RyhopeError> { let connection = db.get().await.unwrap(); connection @@ -496,7 +519,7 @@ where db_tx: &tokio_postgres::Transaction<'_>, table: &str, k: &K, - birth_epoch: Epoch, + birth_epoch: IncrementalEpoch, n: &Self::Node, ) -> Result<(), RyhopeError> { db_tx @@ -524,11 +547,11 @@ where async fn wide_lineage_between>( &self, - _: &S, + s: &S, db: DBPool, table: &str, keys_query: &str, - bounds: (Epoch, Epoch), + bounds: (UserEpoch, UserEpoch), ) -> Result, RyhopeError> { ensure( !keys_query.contains('$'), @@ -547,8 +570,11 @@ where LEFT_CHILD = LEFT_CHILD, RIGHT_CHILD = RIGHT_CHILD, SUBTREE_SIZE = SUBTREE_SIZE, + INCREMENTAL_EPOCH = INCREMENTAL_EPOCH, + USER_EPOCH = USER_EPOCH, max_depth = 2, zk_table = table, + mapper_table_name = mapper_table_name(table), core_keys_query = keys_query, ); let connection = db.get().await.unwrap(); @@ -569,7 +595,7 @@ where let mut core_keys = Vec::new(); #[allow(clippy::type_complexity)] let mut epoch_lineages: HashMap< - Epoch, + UserEpoch, (HashMap>, HashMap), > = HashMap::new(); @@ -580,6 +606,14 @@ where let epoch = row.try_get::<_, i64>(EPOCH).map_err(|err| { RyhopeError::invalid_format(format!("fetching `epoch` from {row:?}"), err) })?; + // convert incremental epoch to user epoch + let epoch = s + .epoch_mapper() + .try_to_user_epoch(epoch as IncrementalEpoch) + .await + .ok_or(RyhopeError::epoch_error(format!( + "UserEpoch corresponding to epoch {epoch} not found" + )))?; let node = >::node_from_row(row); let payload = Self::payload_from_row(row)?; if is_core { @@ -607,21 +641,20 @@ where async fn fetch_many_at< S: TreeStorage, - I: IntoIterator + Send, + I: IntoIterator + Send, >( &self, _s: &S, db: DBPool, table: &str, data: I, - ) -> Result, V)>, RyhopeError> { - let data = data.into_iter().collect::>(); + ) -> Result, V)>, RyhopeError> { let connection = db.get().await.unwrap(); let immediate_table = data - .iter() - .map(|(epoch, key)| { + .into_iter() + .map(|(user_epoch, incremental_epoch, key)| { format!( - "({epoch}::BIGINT, '\\x{}'::BYTEA)", + "({user_epoch}::BIGINT, {incremental_epoch}::BIGINT, '\\x{}'::BYTEA)", hex::encode(key.to_bytea()) ) }) @@ -632,12 +665,13 @@ where .query( &format!( "SELECT - batch.key, batch.epoch, {table}.{PAYLOAD}, + batch.key, batch.user_epoch, {table}.{PAYLOAD}, {table}.{PARENT}, {table}.{LEFT_CHILD}, {table}.{RIGHT_CHILD} FROM - (VALUES {}) AS batch (epoch, key) + (VALUES {}) AS batch (user_epoch, incremental_epoch, key) LEFT JOIN {table} ON - batch.key = {table}.{KEY} AND {table}.{VALID_FROM} <= batch.epoch AND batch.epoch <= {table}.{VALID_UNTIL}", + batch.key = {table}.{KEY} AND {table}.{VALID_FROM} <= batch.incremental_epoch + AND batch.incremental_epoch <= {table}.{VALID_UNTIL}", immediate_table ), &[], @@ -647,7 +681,7 @@ where .iter() { let k = Self::Key::from_bytea(row.get::<_, Vec>(0)); - let epoch = row.get::<_, Epoch>(1); + let epoch = row.get::<_, UserEpoch>(1); let v = row.get::<_, Option>>(2).map(|x| x.0); if let Some(v) = v { r.push(( @@ -670,27 +704,32 @@ where pub struct CachedDbStore Deserialize<'a>> { /// A pointer to the DB client db: DBPool, - /// The first valid epoch - initial_epoch: Epoch, /// Whether a transaction is in process in_tx: bool, /// True if the wrapped state has been modified dirty: bool, /// The current epoch - epoch: Epoch, + epoch: IncrementalEpoch, /// The table in which the data must be persisted table: String, + // epoch mapper + epoch_mapper: RoSharedEpochMapper, pub(super) cache: RwLock>, } impl Deserialize<'a>> CachedDbStore { - pub fn new(initial_epoch: Epoch, current_epoch: Epoch, table: String, db: DBPool) -> Self { + pub fn new( + current_epoch: UserEpoch, + table: String, + db: DBPool, + mapper: RoSharedEpochMapper, + ) -> Self { Self { - initial_epoch, db, in_tx: false, dirty: false, epoch: current_epoch, table, + epoch_mapper: mapper, cache: RwLock::new(None), } } @@ -699,19 +738,20 @@ impl Deserialize<'a>> Cache /// immediately persisted, as the DB representation of the payload must be /// valid even if it is never modified further by the user. pub async fn with_value( - initial_epoch: Epoch, table: String, db: DBPool, t: T, + mapper: RoSharedEpochMapper, ) -> Result { + let initial_epoch = INITIAL_INCREMENTAL_EPOCH; { let connection = db.get().await.unwrap(); connection .query( &format!( - "INSERT INTO {}_meta ({VALID_FROM}, {VALID_UNTIL}, {PAYLOAD}) - VALUES ($1, $1, $2)", - table + "INSERT INTO {} ({VALID_FROM}, {VALID_UNTIL}, {PAYLOAD}) + VALUES ($1, $1, $2)", + metadata_table_name(table.as_str()) ), &[&initial_epoch, &Json(t.clone())], ) @@ -723,11 +763,11 @@ impl Deserialize<'a>> Cache Ok(Self { db, - initial_epoch, in_tx: false, dirty: true, epoch: initial_epoch, table, + epoch_mapper: mapper, cache: RwLock::new(Some(t)), }) } @@ -739,14 +779,15 @@ impl Deserialize<'a>> Cache ensure(self.in_tx, "not in a transaction")?; trace!("[{self}] commiting in transaction"); + let meta_table = metadata_table_name(&self.table); + if self.dirty { let state = self.cache.read().await.clone(); db_tx .query( &format!( - "INSERT INTO {}_meta ({VALID_FROM}, {VALID_UNTIL}, {PAYLOAD}) - VALUES ($1, $1, $2)", - self.table + "INSERT INTO {meta_table} ({VALID_FROM}, {VALID_UNTIL}, {PAYLOAD}) + VALUES ($1, $1, $2)" ), &[&(self.epoch + 1), &Json(state)], ) @@ -758,8 +799,7 @@ impl Deserialize<'a>> Cache db_tx .query( &format!( - "UPDATE {}_meta SET {VALID_UNTIL} = $1 + 1 WHERE {VALID_UNTIL} = $1", - self.table + "UPDATE {meta_table} SET {VALID_UNTIL} = $1 + 1 WHERE {VALID_UNTIL} = $1" ), &[&(self.epoch)], ) @@ -787,13 +827,99 @@ impl Deserialize<'a>> Cache self.dirty = false; self.in_tx = false; } + + async fn fetch_at_inner(&self, epoch: IncrementalEpoch) -> Result { + trace!("[{self}] fetching payload at {}", epoch); + let meta_table = metadata_table_name(&self.table); + let connection = self + .db + .get() + .await + .expect("Failed to get DB connection from pool"); + connection + .query_one( + &format!( + "SELECT {PAYLOAD} FROM {meta_table} WHERE {VALID_FROM} <= $1 AND $1 <= {VALID_UNTIL}" + ), + &[&epoch], + ) + .await + .and_then(|row| row.try_get::<_, Json>(0)) + .map(|x| x.0) + .map_err(|err| RyhopeError::from_db( + format!( + "Fetching state from `{meta_table}` at epoch `{epoch}`" + ), err + )) + } + + async fn rollback_to_incremental_epoch( + &mut self, + new_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { + ensure( + new_epoch < self.epoch, + format!( + "unable to rollback into the future: requested epoch ({}) > current epoch ({})", + new_epoch, self.epoch + ), + )?; + ensure( + new_epoch >= INITIAL_INCREMENTAL_EPOCH, + format!( + "unable to rollback to {} before initial epoch {}", + new_epoch, INITIAL_INCREMENTAL_EPOCH + ), + )?; + + let _ = self.cache.get_mut().take(); + let meta_table = metadata_table_name(&self.table); + let mut connection = self.db.get().await.unwrap(); + let db_tx = connection + .transaction() + .await + .expect("unable to create DB transaction"); + // Roll back all the nodes that would still have been alive + db_tx + .query( + &format!("UPDATE {meta_table} SET {VALID_UNTIL} = $1 WHERE {VALID_UNTIL} > $1"), + &[&new_epoch], + ) + .await + .map_err(|err| { + RyhopeError::from_db( + format!("Rolling back alive nodes to epoch {new_epoch} in table {meta_table}"), + err, + ) + })?; + // Delete nodes that would not have been born yet + db_tx + .query( + &format!("DELETE FROM {meta_table} WHERE {VALID_FROM} > $1"), + &[&new_epoch], + ) + .await + .map_err(|err| { + RyhopeError::from_db( + format!("Deleting nodes born after epoch {new_epoch} from table {meta_table}"), + err, + ) + })?; + db_tx + .commit() + .await + .map_err(|err| RyhopeError::from_db("committing", err))?; + self.epoch = new_epoch; + + Ok(()) + } } impl TransactionalStorage for CachedDbStore where T: Debug + Clone + Serialize + for<'a> Deserialize<'a> + Send + Sync, { - fn start_transaction(&mut self) -> Result<(), RyhopeError> { + async fn start_transaction(&mut self) -> Result<(), RyhopeError> { trace!("[{self}] starting transaction"); if self.in_tx { return Err(RyhopeError::AlreadyInTransaction); @@ -847,12 +973,12 @@ where self.commit_in_transaction(tx).await } - fn commit_success(&mut self) { + async fn commit_success(&mut self) { trace!("[{self}] commit_success"); self.on_commit_success() } - fn commit_failed(&mut self) { + async fn commit_failed(&mut self) { trace!("[{self}] commit_failed"); self.on_commit_failed() } @@ -865,7 +991,7 @@ where async fn fetch(&self) -> Result { trace!("[{self}] fetching payload"); if self.cache.read().await.is_none() { - let state = self.fetch_at(self.epoch).await?; + let state = self.fetch_at_inner(self.epoch).await?; let _ = self.cache.write().await.replace(state.clone()); Ok(state) } else { @@ -873,29 +999,15 @@ where } } - async fn fetch_at(&self, epoch: Epoch) -> Result { - trace!("[{self}] fetching payload at {}", epoch); - let connection = self.db.get().await.unwrap(); - connection - .query_one( - &format!( - "SELECT {PAYLOAD} FROM {}_meta WHERE {VALID_FROM} <= $1 AND $1 <= {VALID_UNTIL}", - self.table, - ), - &[&epoch], - ) + async fn fetch_at(&self, epoch: UserEpoch) -> Result { + let epoch = self + .epoch_mapper + .try_to_incremental_epoch(epoch) .await - .and_then(|row| row.try_get::<_, Json>(0)) - .map(|x| x.0) - .map_err(|err| { - RyhopeError::from_db( - format!( - "failed to fetch state from `{}_meta` at epoch `{}`", - self.table, - epoch - ), - err) - }) + .ok_or(RyhopeError::epoch_error(format!( + "IncrementalEpoch not found for epoch {epoch}" + )))?; + self.fetch_at_inner(epoch).await } async fn store(&mut self, t: T) -> Result<(), RyhopeError> { @@ -905,64 +1017,30 @@ where Ok(()) } - fn current_epoch(&self) -> Epoch { - self.epoch - } - - async fn rollback_to(&mut self, new_epoch: Epoch) -> Result<(), RyhopeError> { - ensure( - new_epoch >= self.initial_epoch, - format!( - "unable to rollback to {} before initial epoch {}", - new_epoch, self.initial_epoch - ), - )?; - ensure( - new_epoch < self.current_epoch(), - format!( - "unable to rollback into the future: requested epoch ({}) > current epoch ({})", - new_epoch, - self.current_epoch() - ), - )?; - - let _ = self.cache.get_mut().take(); - let mut connection = self.db.get().await.unwrap(); - let db_tx = connection - .transaction() + async fn current_epoch(&self) -> Result { + self.epoch_mapper + .try_to_user_epoch(self.epoch) .await - .expect("unable to create DB transaction"); - // Roll back all the nodes that would still have been alive - db_tx - .query( - &format!( - "UPDATE {}_meta SET {VALID_UNTIL} = $1 WHERE {VALID_UNTIL} > $1", - self.table - ), - &[&new_epoch], - ) - .await - .map_err(|err| { - RyhopeError::from_db(format!("time-stamping `{}_meta`", self.table), err) - })?; - // Delete nodes that would not have been born yet - db_tx - .query( - &format!("DELETE FROM {}_meta WHERE {VALID_FROM} > $1", self.table), - &[&new_epoch], - ) - .await - .map_err(|err| { - RyhopeError::from_db(format!("reaping nodes `{}_meta`", self.table), err) - })?; + .ok_or(RyhopeError::CurrenEpochUndefined(self.epoch)) + } - db_tx - .commit() + async fn rollback_to(&mut self, new_epoch: UserEpoch) -> Result<(), RyhopeError> { + let inner_epoch = self + .epoch_mapper + .try_to_incremental_epoch(new_epoch) .await - .map_err(|err| RyhopeError::from_db("committing transaction", err))?; - self.epoch = new_epoch; + .ok_or(RyhopeError::epoch_error(format!( + "IncrementalEpoch not found for epoch {new_epoch}" + )))?; + self.rollback_to_incremental_epoch(inner_epoch).await + } - Ok(()) + async fn rollback(&mut self) -> Result<(), RyhopeError> { + ensure( + self.epoch > INITIAL_INCREMENTAL_EPOCH, + "cannot rollback before initial epoch", + )?; + self.rollback_to_incremental_epoch(self.epoch - 1).await } } @@ -975,14 +1053,14 @@ where T::Key: ToFromBytea, V: Debug + Clone + Send + Sync + Serialize + for<'a> Deserialize<'a>, { - /// The initial epoch - initial_epoch: Epoch, /// The latest *commited* epoch - epoch: Epoch, + epoch: UserEpoch, /// A pointer to the DB client db: DBPool, /// DB backing this cache table: String, + // Epoch mapper + epoch_mapper: RoSharedEpochMapper, /// Operations pertaining to the in-process transaction. pub(super) nodes_cache: HashMap>>, pub(super) payload_cache: HashMap>>, @@ -1004,13 +1082,18 @@ where T::Key: ToFromBytea, V: Debug + Clone + Send + Sync + Serialize + for<'a> Deserialize<'a>, { - pub fn new(initial_epoch: Epoch, current_epoch: Epoch, table: String, db: DBPool) -> Self { + pub fn new( + current_epoch: IncrementalEpoch, + table: String, + db: DBPool, + mapper: RoSharedEpochMapper, + ) -> Self { trace!("[{}] initializing CachedDbTreeStore", table); CachedDbTreeStore { - initial_epoch, epoch: current_epoch, table, db: db.clone(), + epoch_mapper: mapper, nodes_cache: Default::default(), payload_cache: Default::default(), _p: PhantomData, @@ -1022,24 +1105,20 @@ where self.payload_cache.clear(); } - pub fn new_epoch(&mut self) { + pub(crate) fn new_epoch(&mut self) { self.clear(); self.epoch += 1; } - pub fn initial_epoch(&self) -> Epoch { - self.initial_epoch - } - - pub fn current_epoch(&self) -> Epoch { + pub(crate) fn current_epoch(&self) -> IncrementalEpoch { self.epoch } - pub async fn size(&self) -> usize { - self.size_at(self.epoch).await + async fn size(&self) -> usize { + self.size_at(self.current_epoch()).await } - pub async fn size_at(&self, epoch: Epoch) -> usize { + async fn size_at(&self, epoch: IncrementalEpoch) -> usize { let connection = self.db.get().await.unwrap(); connection .query_one( @@ -1057,13 +1136,16 @@ where .unwrap() } - pub(super) async fn rollback_to(&mut self, new_epoch: Epoch) -> Result<(), RyhopeError> { + pub(super) async fn rollback_to( + &mut self, + new_epoch: IncrementalEpoch, + ) -> Result<(), RyhopeError> { trace!("[{self}] rolling back to {new_epoch}"); ensure( - new_epoch >= self.initial_epoch, + new_epoch >= INITIAL_INCREMENTAL_EPOCH, format!( "unable to rollback to {} before initial epoch {}", - new_epoch, self.initial_epoch + new_epoch, INITIAL_INCREMENTAL_EPOCH ), )?; ensure( @@ -1124,7 +1206,7 @@ where T::Key: ToFromBytea, V: Debug + Clone + Send + Sync + Serialize + for<'a> Deserialize<'a>, { - pub(super) wrapped: Arc>>, + pub(super) wrapped: Arc>>, } impl std::fmt::Display for NodeProjection where @@ -1133,81 +1215,150 @@ where V: PayloadInDb, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}/Nodes", self.wrapped.lock().unwrap()) + write!(f, "{}/Nodes", self.wrapped.as_ref().blocking_read()) + } +} + +impl NodeProjection +where + T: TreeTopology + DbConnector, + T::Key: ToFromBytea, + V: Debug + Clone + Send + Sync + Serialize + for<'a> Deserialize<'a>, +{ + async fn try_fetch_at_incremental_epoch( + &self, + k: &T::Key, + epoch: IncrementalEpoch, + ) -> Result, RyhopeError> { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); + Ok(if epoch == self.wrapped.read().await.current_epoch() { + // Directly returns the value if it is already in cache, fetch it from + // the DB otherwise. + let value = self.wrapped.read().await.nodes_cache.get(k).cloned(); + if let Some(Some(cached_value)) = value { + Some(cached_value.into_value()) + } else if let Some(value) = T::fetch_node_at(db, &table, k, epoch).await? { + self.wrapped + .write() + .await + .nodes_cache + .insert(k.clone(), Some(CachedValue::Read(value.clone()))); + Some(value) + } else { + None + } + } else { + T::fetch_node_at(db, &table, k, epoch).await? + }) } } + impl RoEpochKvStorage for NodeProjection where T: TreeTopology + DbConnector, T::Key: ToFromBytea, V: PayloadInDb, { - delegate::delegate! { - to self.wrapped.lock().unwrap() { - fn initial_epoch(&self) -> Epoch ; - fn current_epoch(&self) -> Epoch ; - async fn size(&self) -> usize; - async fn size_at(&self, epoch: Epoch) -> usize; - } + async fn initial_epoch(&self) -> UserEpoch { + self.wrapped + .read() + .await + .epoch_mapper + .to_user_epoch(INITIAL_INCREMENTAL_EPOCH) + .await as UserEpoch + } + + async fn current_epoch(&self) -> Result { + let inner_epoch = self.wrapped.read().await.current_epoch(); + self.wrapped + .read() + .await + .epoch_mapper + .try_to_user_epoch(inner_epoch) + .await + .ok_or(RyhopeError::CurrenEpochUndefined(inner_epoch)) } - fn try_fetch_at( + async fn size(&self) -> usize { + self.wrapped.read().await.size().await + } + + async fn size_at(&self, epoch: UserEpoch) -> usize { + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await as UserEpoch; + self.wrapped.read().await.size_at(inner_epoch).await + } + + async fn try_fetch_at( &self, k: &T::Key, - epoch: Epoch, - ) -> impl Future, RyhopeError>> + Send { + epoch: UserEpoch, + ) -> Result, RyhopeError> { trace!("[{self}] fetching {k:?}@{epoch}",); - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); - async move { - if epoch == self.current_epoch() { - // Directly returns the value if it is already in cache, fetch it from - // the DB otherwise. - let value = self.wrapped.lock().unwrap().nodes_cache.get(k).cloned(); - Ok(if let Some(Some(cached_value)) = value { - Some(cached_value.into_value()) - } else if let Some(value) = T::fetch_node_at(db, &table, k, epoch).await.unwrap() { - let mut guard = self.wrapped.lock().unwrap(); - guard - .nodes_cache - .insert(k.clone(), Some(CachedValue::Read(value.clone()))); - Some(value) - } else { - None - }) - } else { - T::fetch_node_at(db, &table, k, epoch).await - } + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await; + if let Some(epoch) = inner_epoch { + self.try_fetch_at_incremental_epoch(k, epoch).await + } else { + Ok(None) } } - async fn keys_at(&self, epoch: Epoch) -> Vec { - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); + async fn keys_at(&self, epoch: UserEpoch) -> Vec { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); - T::fetch_all_keys(db, &table, epoch).await.unwrap() + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; + + T::fetch_all_keys(db, &table, inner_epoch).await.unwrap() } - async fn random_key_at(&self, epoch: Epoch) -> Option { - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); + async fn random_key_at(&self, epoch: UserEpoch) -> Option { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); - T::fetch_a_key(db, &table, epoch).await.unwrap() + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; + + T::fetch_a_key(db, &table, inner_epoch).await.unwrap() } - async fn pairs_at(&self, _epoch: Epoch) -> Result, RyhopeError> { + async fn pairs_at(&self, _epoch: UserEpoch) -> Result, RyhopeError> { unimplemented!("should never be used"); } async fn try_fetch(&self, k: &T::Key) -> Result, RyhopeError> { - self.try_fetch_at(k, self.current_epoch()).await + let current_epoch = self.wrapped.read().await.current_epoch(); + self.try_fetch_at_incremental_epoch(k, current_epoch).await } async fn contains(&self, k: &T::Key) -> Result { self.try_fetch(k).await.map(|x| x.is_some()) } - async fn contains_at(&self, k: &T::Key, epoch: Epoch) -> Result { + async fn contains_at(&self, k: &T::Key, epoch: UserEpoch) -> Result { self.try_fetch_at(k, epoch).await.map(|x| x.is_some()) } } @@ -1218,48 +1369,34 @@ where T::Node: Sync + Clone, V: PayloadInDb, { - delegate::delegate! { - to self.wrapped.lock().unwrap() { - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError>; - } - } - - fn remove(&mut self, k: T::Key) -> impl Future> + Send { + async fn remove(&mut self, k: T::Key) -> Result<(), RyhopeError> { trace!("[{self}] removing {k:?} from cache",); - self.wrapped.lock().unwrap().nodes_cache.insert(k, None); - async { Ok(()) } + self.wrapped.write().await.nodes_cache.insert(k, None); + Ok(()) } - fn update( - &mut self, - k: T::Key, - new_value: T::Node, - ) -> impl Future> + Send { + async fn update(&mut self, k: T::Key, new_value: T::Node) -> Result<(), RyhopeError> { trace!("[{self}] updating cache {k:?} -> {new_value:?}"); // If the operation is already present from a read, replace it with the // new value. self.wrapped - .lock() - .unwrap() + .write() + .await .nodes_cache .insert(k, Some(CachedValue::Written(new_value))); - async { Ok(()) } + Ok(()) } - fn store( - &mut self, - k: T::Key, - value: T::Node, - ) -> impl Future> + Send { + async fn store(&mut self, k: T::Key, value: T::Node) -> Result<(), RyhopeError> { trace!("[{self}] storing {k:?} -> {value:?} in cache"); // If the operation is already present from a read, replace it with the // new value. self.wrapped - .lock() - .unwrap() + .write() + .await .nodes_cache .insert(k, Some(CachedValue::Written(value))); - async { Ok(()) } + Ok(()) } async fn update_with( @@ -1277,6 +1414,23 @@ where Ok(()) } } + + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; + self.wrapped.write().await.rollback_to(inner_epoch).await + } + + async fn rollback(&mut self) -> Result<(), RyhopeError> { + let inner_epoch = self.wrapped.read().await.current_epoch(); + ensure(inner_epoch > 0, "cannot rollback past the initial epoch")?; + self.wrapped.write().await.rollback_to(inner_epoch).await + } } /// A wrapper around a [`CachedDbTreeStore`] to make it appear as a KV store for @@ -1291,7 +1445,7 @@ where T::Key: ToFromBytea, V: Debug + Clone + Send + Sync + Serialize + for<'a> Deserialize<'a>, { - pub(super) wrapped: Arc>>, + pub(super) wrapped: Arc>>, } impl std::fmt::Display for PayloadProjection where @@ -1300,75 +1454,148 @@ where V: PayloadInDb, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}/Payload", self.wrapped.lock().unwrap()) + write!(f, "{}/Payload", self.wrapped.blocking_read()) } } -impl RoEpochKvStorage for PayloadProjection +impl PayloadProjection where T: TreeTopology + DbConnector, T::Key: ToFromBytea, V: PayloadInDb, { - delegate::delegate! { - to self.wrapped.lock().unwrap() { - fn initial_epoch(&self) -> Epoch ; - fn current_epoch(&self) -> Epoch ; - async fn size(&self) -> usize ; - async fn size_at(&self, epoch: Epoch) -> usize ; - } - } - - fn try_fetch_at( + async fn try_fetch_at_incremental_epoch( &self, k: &T::Key, - epoch: Epoch, - ) -> impl Future, RyhopeError>> + Send { - trace!("[{self}] attempting to fetch payload for {k:?}@{epoch}"); - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); - async move { - if epoch == self.current_epoch() { - // Directly returns the value if it is already in cache, fetch it from - // the DB otherwise. - let value = self.wrapped.lock().unwrap().payload_cache.get(k).cloned(); - if let Some(Some(cached_value)) = value { - Ok(Some(cached_value.into_value())) - } else if let Some(value) = T::fetch_payload_at(db, &table, k, epoch).await.unwrap() - { - let mut guard = self.wrapped.lock().unwrap(); - guard - .payload_cache - .insert(k.clone(), Some(CachedValue::Read(value.clone()))); - Ok(Some(value)) - } else { - Ok(None) - } + epoch: IncrementalEpoch, + ) -> Result, RyhopeError> { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); + Ok(if epoch == self.wrapped.read().await.current_epoch() { + // Directly returns the value if it is already in cache, fetch it from + // the DB otherwise. + let value = self.wrapped.read().await.payload_cache.get(k).cloned(); + if let Some(Some(cached_value)) = value { + Some(cached_value.into_value()) + } else if let Some(value) = T::fetch_payload_at(db, &table, k, epoch).await? { + self.wrapped + .write() + .await + .payload_cache + .insert(k.clone(), Some(CachedValue::Read(value.clone()))); + Some(value) } else { - T::fetch_payload_at(db, &table, k, epoch).await + None } + } else { + T::fetch_payload_at(db, &table, k, epoch).await? + }) + } +} + +impl RoEpochKvStorage for PayloadProjection +where + T: TreeTopology + DbConnector, + T::Key: ToFromBytea, + V: PayloadInDb, +{ + async fn initial_epoch(&self) -> UserEpoch { + self.wrapped + .read() + .await + .epoch_mapper + .to_user_epoch(INITIAL_INCREMENTAL_EPOCH) + .await as UserEpoch + } + + async fn current_epoch(&self) -> Result { + let inner_epoch = self.wrapped.read().await.current_epoch(); + self.wrapped + .read() + .await + .epoch_mapper + .try_to_user_epoch(inner_epoch as IncrementalEpoch) + .await + .ok_or(RyhopeError::CurrenEpochUndefined(inner_epoch)) + } + + async fn size(&self) -> usize { + self.wrapped.read().await.size().await + } + + async fn size_at(&self, epoch: UserEpoch) -> usize { + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await as UserEpoch; + self.wrapped.read().await.size_at(inner_epoch).await + } + + async fn try_fetch_at(&self, k: &T::Key, epoch: UserEpoch) -> Result, RyhopeError> { + trace!("[{self}] attempting to fetch payload for {k:?}@{epoch}"); + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .try_to_incremental_epoch(epoch) + .await; + if let Some(epoch) = inner_epoch { + self.try_fetch_at_incremental_epoch(k, epoch).await + } else { + Ok(None) } } - async fn keys_at(&self, epoch: Epoch) -> Vec { - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); + async fn try_fetch(&self, k: &T::Key) -> Result, RyhopeError> { + let current_epoch = self.wrapped.read().await.current_epoch(); + self.try_fetch_at_incremental_epoch(k, current_epoch).await + } + + async fn keys_at(&self, epoch: UserEpoch) -> Vec { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); + + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; - T::fetch_all_keys(db, &table, epoch).await.unwrap() + T::fetch_all_keys(db, &table, inner_epoch).await.unwrap() } - async fn random_key_at(&self, epoch: Epoch) -> Option { - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); + async fn random_key_at(&self, epoch: UserEpoch) -> Option { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; - T::fetch_a_key(db, &table, epoch).await.unwrap() + T::fetch_a_key(db, &table, inner_epoch).await.unwrap() } - async fn pairs_at(&self, epoch: Epoch) -> Result, RyhopeError> { - let db = self.wrapped.lock().unwrap().db.clone(); - let table = self.wrapped.lock().unwrap().table.to_owned(); + async fn pairs_at(&self, epoch: UserEpoch) -> Result, RyhopeError> { + let db = self.wrapped.read().await.db.clone(); + let table = self.wrapped.read().await.table.to_owned(); + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; - T::fetch_all_pairs(db, &table, epoch).await + T::fetch_all_pairs(db, &table, inner_epoch).await } } impl EpochKvStorage for PayloadProjection @@ -1378,47 +1605,50 @@ where T::Node: Sync + Clone, V: PayloadInDb, { - delegate::delegate! { - to self.wrapped.lock().unwrap() { - async fn rollback_to(&mut self, epoch: Epoch) -> Result<(), RyhopeError>; - } - } - - fn remove(&mut self, k: T::Key) -> impl Future> + Send { + async fn remove(&mut self, k: T::Key) -> Result<(), RyhopeError> { trace!("[{self}] removing {k:?} from cache"); - self.wrapped.lock().unwrap().nodes_cache.insert(k, None); - async { Ok(()) } + self.wrapped.write().await.nodes_cache.insert(k, None); + Ok(()) } - fn update( - &mut self, - k: T::Key, - new_value: V, - ) -> impl Future> + Send { + async fn update(&mut self, k: T::Key, new_value: V) -> Result<(), RyhopeError> { trace!("[{self}] updating cache {k:?} -> {new_value:?}"); // If the operation is already present from a read, replace it with the // new value. self.wrapped - .lock() - .unwrap() + .write() + .await .payload_cache .insert(k, Some(CachedValue::Written(new_value))); - async { Ok(()) } + Ok(()) } - fn store( - &mut self, - k: T::Key, - value: V, - ) -> impl Future> + Send { + async fn store(&mut self, k: T::Key, value: V) -> Result<(), RyhopeError> { trace!("[{self}] storing {k:?} -> {value:?} in cache",); // If the operation is already present from a read, replace it with the // new value. self.wrapped - .lock() - .unwrap() + .write() + .await .payload_cache .insert(k, Some(CachedValue::Written(value))); - async { Ok(()) } + Ok(()) + } + + async fn rollback_to(&mut self, epoch: UserEpoch) -> Result<(), RyhopeError> { + let inner_epoch = self + .wrapped + .read() + .await + .epoch_mapper + .to_incremental_epoch(epoch) + .await; + self.wrapped.write().await.rollback_to(inner_epoch).await + } + + async fn rollback(&mut self) -> Result<(), RyhopeError> { + let inner_epoch = self.wrapped.read().await.current_epoch(); + ensure(inner_epoch > 0, "cannot rollback past the initial epoch")?; + self.wrapped.write().await.rollback_to(inner_epoch).await } } diff --git a/ryhope/src/storage/pgsql/wide_lineage.sql b/ryhope/src/storage/pgsql/wide_lineage.sql index 699d0093d..091478224 100644 --- a/ryhope/src/storage/pgsql/wide_lineage.sql +++ b/ryhope/src/storage/pgsql/wide_lineage.sql @@ -80,11 +80,16 @@ WITH RECURSIVE descendance (is_core, {KEY}, {PARENT}, {LEFT_CHILD}, {RIGHT_CHILD MAX(is_core) AS is_core, -- Expand the epoch ranges [[{VALID_FROM}, {VALID_UNTIL}]] into -- ({VALID_UNTIL} - {VALID_FROM}) individual rows, clamped within - -- [[min_block, max_block]] - generate_series(GREATEST({VALID_FROM}, $1), LEAST({VALID_UNTIL}, $2)) AS {EPOCH}, + -- [[min_epoch, max_epoch]] + generate_series(GREATEST({VALID_FROM}, mapper_range.min_epoch), LEAST({VALID_UNTIL}, mapper_range.max_epoch)) AS {EPOCH}, -- Normal columns {KEY}, {PARENT}, {LEFT_CHILD}, {RIGHT_CHILD}, {SUBTREE_SIZE}, {PAYLOAD} FROM - descendance + descendance JOIN ( + SELECT MIN({INCREMENTAL_EPOCH}) as min_epoch, MAX({INCREMENTAL_EPOCH}) as max_epoch + FROM {mapper_table_name} + WHERE {USER_EPOCH} >= $1 AND {USER_EPOCH} <= $2 + ) AS mapper_range + ON {VALID_FROM} <= mapper_range.max_epoch AND {VALID_UNTIL} >= mapper_range.min_epoch -- Results must be deduplicated according to this tuple of attributes - GROUP BY (is_core, {KEY}, {PARENT}, {LEFT_CHILD}, {RIGHT_CHILD}, {SUBTREE_SIZE}, {VALID_FROM}, {VALID_UNTIL}, {PAYLOAD}) + GROUP BY (is_core, {KEY}, {PARENT}, {LEFT_CHILD}, {RIGHT_CHILD}, {SUBTREE_SIZE}, {VALID_FROM}, {VALID_UNTIL}, min_epoch, max_epoch, {PAYLOAD}) diff --git a/ryhope/src/storage/tests.rs b/ryhope/src/storage/tests.rs index 6ed519e2d..005b476b4 100644 --- a/ryhope/src/storage/tests.rs +++ b/ryhope/src/storage/tests.rs @@ -13,14 +13,16 @@ use crate::{ storage::{ memory::InMemory, pgsql::{PgsqlStorage, SqlServerConnection, SqlStorageSettings}, - EpochKvStorage, PayloadStorage, RoEpochKvStorage, SqlTreeTransactionalStorage, TreeStorage, + EpochKvStorage, EpochMapper, PayloadStorage, RoEpochKvStorage, SqlTreeTransactionalStorage, + TreeStorage, }, tree::{ - sbbst::{self, Tree}, + sbbst, scapegoat::{self, Alpha}, PrintableTree, TreeTopology, }, - Epoch, InitSettings, MerkleTreeKvDb, NodePayload, EPOCH, KEY, VALID_FROM, VALID_UNTIL, + IncrementalEpoch, InitSettings, MerkleTreeKvDb, NodePayload, UserEpoch, EPOCH, KEY, VALID_FROM, + VALID_UNTIL, }; use super::TreeTransactionalStorage; @@ -33,12 +35,12 @@ impl NodePayload for usize {} impl NodePayload for String {} impl NodePayload for i64 {} -async fn _storage_in_memory(initial_epoch: Epoch) -> Result<()> { +async fn _storage_in_memory(initial_epoch: UserEpoch) -> Result<()> { type K = String; type V = usize; type TestTree = scapegoat::Tree; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new( InitSettings::ResetAt(scapegoat::Tree::empty(Alpha::new(0.8)), initial_epoch), @@ -96,12 +98,12 @@ async fn shifted_storage_in_memory() -> Result<()> { _storage_in_memory(388).await } -async fn _storage_in_pgsql(initial_epoch: Epoch) -> Result<()> { +async fn _storage_in_pgsql(initial_epoch: UserEpoch) -> Result<()> { type K = String; type V = usize; type TestTree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let table = format!("simple_{}", initial_epoch); let mut s = MerkleTreeKvDb::::new( @@ -109,6 +111,7 @@ async fn _storage_in_pgsql(initial_epoch: Epoch) -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: table.clone(), + external_mapper: None, }, ) .await?; @@ -121,6 +124,7 @@ async fn _storage_in_pgsql(initial_epoch: Epoch) -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table, + external_mapper: None, }, ) .await?; @@ -255,15 +259,16 @@ impl From<&str> for ShaizedString { #[tokio::test] async fn sbbst_storage_in_pgsql() -> Result<()> { type V = ShaizedString; - type TestTree = sbbst::Tree; - type SqlStorage = PgsqlStorage; - type RamStorage = InMemory; + type TestTree = sbbst::IncrementalTree; + type SqlStorage = PgsqlStorage; + type RamStorage = InMemory; let mut s_psql = MerkleTreeKvDb::::new( - InitSettings::Reset(sbbst::Tree::empty()), + InitSettings::Reset(TestTree::empty()), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "simple_sbbst".to_string(), + external_mapper: None, }, ) .await?; @@ -299,6 +304,7 @@ async fn sbbst_storage_in_pgsql() -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "simple_sbbst".to_string(), + external_mapper: None, }, ) .await?; @@ -316,11 +322,9 @@ async fn sbbst_storage_in_pgsql() -> Result<()> { s_psql.diff_at(i).await?.unwrap().print(); } - let mut s_ram = MerkleTreeKvDb::::new( - InitSettings::Reset(sbbst::Tree::empty()), - (), - ) - .await?; + let mut s_ram = + MerkleTreeKvDb::::new(InitSettings::Reset(TestTree::empty()), ()) + .await?; s_ram .in_transaction(|t| { Box::pin(async { @@ -418,7 +422,7 @@ async fn hashes() -> Result<()> { type V = ShaizedString; type Tree = scapegoat::Tree; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new( InitSettings::ResetAt(Tree::empty(Alpha::fully_balanced()), 392), @@ -458,7 +462,7 @@ async fn hashes_pgsql() -> Result<()> { type V = ShaizedString; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; { let mut s = MerkleTreeKvDb::::new( @@ -466,6 +470,7 @@ async fn hashes_pgsql() -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "test_hashes".into(), + external_mapper: None, }, ) .await?; @@ -501,6 +506,7 @@ async fn hashes_pgsql() -> Result<()> { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "test_hashes".into(), + external_mapper: None, }, ) .await?; @@ -528,11 +534,11 @@ async fn hashes_pgsql() -> Result<()> { } #[tokio::test] -async fn sbbst_requires_sequential_keys() -> Result<()> { - type Tree = sbbst::Tree; +async fn incremental_sbbst_requires_sequential_keys() -> Result<()> { + type Tree = sbbst::IncrementalTree; type V = i64; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::with_shift_and_capacity(10, 0)), @@ -550,18 +556,176 @@ async fn sbbst_requires_sequential_keys() -> Result<()> { Ok(()) } +#[tokio::test] +async fn epoch_sbbst_can_use_non_sequential_keys() -> Result<()> { + type Tree = sbbst::EpochTree; + type V = i64; + + type Storage = InMemory; + + let mut s = MerkleTreeKvDb::::new( + InitSettings::Reset(Tree::with_shift_and_capacity(10, 0)), + (), + ) + .await?; + + s.start_transaction().await?; + assert!(s.store(2, 2).await.is_err()); // try insert key smaller than initial shift + assert!(s.store(12, 2).await.is_ok()); + assert!(s.store(11, 2).await.is_err()); // try insert key smaller than previous one + assert!(s.store(14, 2).await.is_ok()); + assert!(s.store(15, 2).await.is_ok()); + s.commit_transaction().await?; + + // check that values have been inserted + assert_eq!(s.try_fetch(&12).await?.unwrap(), 2); + assert_eq!(s.try_fetch(&14).await?.unwrap(), 2); + assert_eq!(s.try_fetch(&15).await?.unwrap(), 2); + + // chekc that 11 has not been inserted + assert!(s.try_fetch(&11).await?.is_none()); + Ok(()) +} + +#[tokio::test] +async fn epoch_sbbst_over_pgsql_with_non_sequential_keys() -> Result<()> { + type Tree = sbbst::EpochTree; + type V = i64; + + type Storage = PgsqlStorage; + + let mut s = MerkleTreeKvDb::::new( + InitSettings::Reset(Tree::with_shift_and_capacity(10, 0)), + SqlStorageSettings { + table: "epoch_sbbst".to_string(), + source: SqlServerConnection::NewConnection(db_url()), + external_mapper: None, + }, + ) + .await?; + + s.start_transaction().await?; + assert!(s.store(2, 2).await.is_err()); // try insert key smaller than initial shift + assert!(s.store(12, 2).await.is_ok()); + assert!(s.store(11, 2).await.is_err()); // try insert key smaller than previous one + s.commit_transaction().await?; + + // start a new transaction + s.start_transaction().await?; + assert!(s.store(14, 2).await.is_ok()); + s.commit_transaction().await?; + + // check that values have been inserted + assert_eq!(s.try_fetch(&12).await?.unwrap(), 2); + assert_eq!(s.try_fetch(&14).await?.unwrap(), 2); + + // check that 11 has not been inserted + assert!(s.try_fetch(&11).await?.is_none()); + + assert_eq!(s.storage.epoch_mapper().to_incremental_epoch(12).await, 1); + assert_eq!(s.storage.epoch_mapper().to_incremental_epoch(14).await, 2); + assert!(s + .storage + .epoch_mapper() + .try_to_incremental_epoch(11) + .await + .is_none()); + + Ok(()) +} + +#[tokio::test] +async fn test_caching_mechanism() -> Result<()> { + const MAX_ENTRIES: usize = 10; + const INITIAL_EPOCH: UserEpoch = 4; + + // test that we never erase from an `InMemoryEpochMapper` + let mut epoch_mapper = crate::storage::memory::InMemoryEpochMapper::new_at(INITIAL_EPOCH); + + for i in 0..2 * MAX_ENTRIES { + epoch_mapper + .add_epoch_map(INITIAL_EPOCH + i as UserEpoch, i as IncrementalEpoch) + .await?; + } + + assert_eq!(epoch_mapper.initial_epoch(), INITIAL_EPOCH); + for i in 0..2 * MAX_ENTRIES { + // check that no epoch has been erased from the storage + assert!(epoch_mapper + .try_to_incremental_epoch(INITIAL_EPOCH + i as UserEpoch) + .await + .is_some()) + } + + // test that epochs are not erased from cache if we insert them sequentially + let mut epoch_cache = + crate::storage::memory::EpochMapperCache::::new_at(INITIAL_EPOCH); + + for i in 0..2 * MAX_ENTRIES { + epoch_cache + .add_epoch_map(INITIAL_EPOCH + i as UserEpoch, i as IncrementalEpoch) + .await?; + } + + assert_eq!(epoch_cache.initial_epoch(), INITIAL_EPOCH); + println!("{}", epoch_cache.last_epoch()); + for i in 0..2 * MAX_ENTRIES { + // check that no epoch has been erased from the storage + assert!( + epoch_cache + .try_to_incremental_epoch(INITIAL_EPOCH + i as UserEpoch) + .await + .is_some(), + "failed for epoch {i}" + ); + } + + // now, insert epochs not sequentially, and test that epochs starts to be erased + for i in 0..MAX_ENTRIES { + epoch_cache + .add_epoch_map( + (3 * MAX_ENTRIES as UserEpoch + INITIAL_EPOCH) * (i + 1) as UserEpoch, + (2 * MAX_ENTRIES + i) as IncrementalEpoch, + ) + .await?; + } + + assert_eq!(epoch_cache.initial_epoch(), INITIAL_EPOCH); + // count number of epochs still in the storage + let mut num_epochs = 0; + for i in 0..2 * MAX_ENTRIES { + num_epochs += epoch_cache + .try_to_incremental_epoch(INITIAL_EPOCH + i as UserEpoch) + .await + .is_some() as usize; + } + for i in 0..MAX_ENTRIES { + num_epochs += epoch_cache + .try_to_incremental_epoch( + (3 * MAX_ENTRIES as UserEpoch + INITIAL_EPOCH) * (i + 1) as UserEpoch, + ) + .await + .is_some() as usize; + } + + assert_eq!(num_epochs, MAX_ENTRIES); + + Ok(()) +} + #[tokio::test] async fn thousand_rows() -> Result<()> { type K = i64; type V = usize; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::fully_balanced())), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "thousand".to_string(), + external_mapper: None, }, ) .await?; @@ -612,14 +776,13 @@ async fn thousand_rows() -> Result<()> { #[tokio::test] async fn aggregation_memory() -> Result<()> { - type Tree = sbbst::Tree; + type Tree = sbbst::IncrementalTree; type V = MinMaxi64; - type Storage = InMemory; + type Storage = InMemory; let mut s = - MerkleTreeKvDb::::new(InitSettings::Reset(sbbst::Tree::empty()), ()) - .await?; + MerkleTreeKvDb::::new(InitSettings::Reset(Tree::empty()), ()).await?; s.in_transaction(|s| { Box::pin(async { @@ -645,15 +808,16 @@ async fn aggregation_memory() -> Result<()> { #[tokio::test] async fn aggregation_pgsql() -> Result<()> { - type Tree = sbbst::Tree; + type Tree = sbbst::IncrementalTree; type V = MinMaxi64; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::ResetAt(Tree::empty(), 32), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "agg".to_string(), + external_mapper: None, }, ) .await?; @@ -685,7 +849,7 @@ async fn test_rollback< S: EpochKvStorage + TreeTransactionalStorage + Send + Sync, >( s: &mut S, - initial_epoch: Epoch, + initial_epoch: UserEpoch, ) { for i in 0..3 { s.in_transaction(|s| { @@ -699,7 +863,7 @@ async fn test_rollback< .unwrap(); } - assert_eq!(s.current_epoch(), 3 + initial_epoch); + assert_eq!(s.current_epoch().await.unwrap(), 3 + initial_epoch); assert_eq!(s.size().await, 6); for i in 0..=5 { assert!(s.contains(&i.into()).await.unwrap()); @@ -709,7 +873,7 @@ async fn test_rollback< s.rollback_to(1 + initial_epoch) .await .unwrap_or_else(|_| panic!("failed to rollback to {}", 1 + initial_epoch)); - assert_eq!(s.current_epoch(), 1 + initial_epoch); + assert_eq!(s.current_epoch().await.unwrap(), 1 + initial_epoch); assert_eq!(s.size().await, 2); for i in 0..=5 { if i <= 1 { @@ -721,13 +885,15 @@ async fn test_rollback< // rollback once to reach to epoch 0 s.rollback().await.unwrap(); - assert_eq!(s.current_epoch(), initial_epoch); + println!("Rollbacked to initial epoch"); + assert_eq!(s.current_epoch().await.unwrap(), initial_epoch); assert_eq!(s.size().await, 0); for i in 0..=5 { assert!(!s.contains(&i.into()).await.unwrap()); } // Can not rollback before epoch 0 + println!("Rolling back before initial epoch"); assert!(s.rollback().await.is_err()); } @@ -737,7 +903,7 @@ async fn rollback_memory() { type V = MinMaxi64; type Tree = scapegoat::Tree; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::new(0.7))), (), @@ -754,9 +920,9 @@ async fn rollback_memory_at() { type V = MinMaxi64; type Tree = scapegoat::Tree; - type Storage = InMemory; + type Storage = InMemory; - const INITIAL_EPOCH: Epoch = 4875; + const INITIAL_EPOCH: UserEpoch = 4875; let mut s = MerkleTreeKvDb::::new( InitSettings::ResetAt(Tree::empty(Alpha::new(0.7)), INITIAL_EPOCH), (), @@ -773,12 +939,13 @@ async fn rollback_psql() { type V = MinMaxi64; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::new(0.7))), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "rollback".to_string(), + external_mapper: None, }, ) .await @@ -793,13 +960,14 @@ async fn rollback_psql_at() { type V = MinMaxi64; type Tree = scapegoat::Tree; - const INITIAL_EPOCH: Epoch = 4875; - type Storage = PgsqlStorage; + const INITIAL_EPOCH: UserEpoch = 4875; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::ResetAt(Tree::empty(Alpha::new(0.7)), INITIAL_EPOCH), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "rollback_at".to_string(), + external_mapper: None, }, ) .await @@ -810,9 +978,9 @@ async fn rollback_psql_at() { #[tokio::test] async fn context_at() { - type Tree = sbbst::Tree; + type Tree = sbbst::IncrementalTree; type V = MinMaxi64; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new(InitSettings::Reset(Tree::empty()), ()) .await .unwrap(); @@ -862,7 +1030,7 @@ async fn initial_state() { type K = i64; type V = MinMaxi64; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; // Create an empty tree { @@ -871,6 +1039,7 @@ async fn initial_state() { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "empty_tree".to_string(), + external_mapper: None, }, ) .await @@ -883,6 +1052,7 @@ async fn initial_state() { SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "empty_tree".to_string(), + external_mapper: None, }, ) .await @@ -897,9 +1067,9 @@ async fn initial_state() { #[tokio::test] async fn dirties() { - type Tree = sbbst::Tree; + type Tree = sbbst::IncrementalTree; type V = MinMaxi64; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new(InitSettings::Reset(Tree::empty()), ()) .await .unwrap(); @@ -947,16 +1117,17 @@ async fn grouped_txs() -> Result<()> { type K = i64; type V = MinMaxi64; - type SbbstTree = sbbst::Tree; - type SbbstStorage = PgsqlStorage; + type SbbstTree = sbbst::EpochTree; + type SbbstStorage = PgsqlStorage; type ScapeTree = scapegoat::Tree; - type ScapeStorage = PgsqlStorage; + type ScapeStorage = PgsqlStorage; let mut t1 = MerkleTreeKvDb::::new( - InitSettings::Reset(Tree::empty()), + InitSettings::Reset(SbbstTree::empty()), SqlStorageSettings { table: "nested_sbbst".into(), source: SqlServerConnection::Pool(db_pool.clone()), + external_mapper: None, }, ) .await @@ -967,6 +1138,7 @@ async fn grouped_txs() -> Result<()> { SqlStorageSettings { table: "nested_scape".into(), source: SqlServerConnection::Pool(db_pool.clone()), + external_mapper: Some("nested_sbbst".into()), }, ) .await @@ -983,7 +1155,6 @@ async fn grouped_txs() -> Result<()> { t2.start_transaction().await?; t1.store(1, 456.into()).await?; - t1.store(2, 789.into()).await?; t2.store(8786384, 456.into()).await?; t2.store(4, 329.into()).await?; @@ -997,14 +1168,14 @@ async fn grouped_txs() -> Result<()> { tx.commit().await?; - t1.commit_success(); - t2.commit_success(); + t1.commit_success().await; + t2.commit_success().await; // The commited root must be equal to its in-flight snapshot let commited_root = t1.root().await.unwrap().unwrap(); assert_eq!(commited_root, in_flight_root); // Sizes must have been commited coorectly - assert_eq!(t1.size().await, 2); + assert_eq!(t1.size().await, 1); assert_eq!(t2.size().await, 3); assert!(t2.try_fetch(&4).await.unwrap().is_some()); @@ -1016,7 +1187,6 @@ async fn grouped_txs() -> Result<()> { t2.start_transaction().await?; t1.store(3, 456.into()).await?; - t1.store(4, 789.into()).await?; t2.store(578943, 542.into()).await?; t2.store(943, commited_root.into()).await?; @@ -1025,11 +1195,11 @@ async fn grouped_txs() -> Result<()> { t2.commit_in(&mut tx).await?; tx.rollback().await?; - t1.commit_failed(); - t2.commit_failed(); + t1.commit_failed().await; + t2.commit_failed().await; // Size should not have changed - assert_eq!(t1.size().await, 2); + assert_eq!(t1.size().await, 1); assert_eq!(t2.size().await, 3); // Old data must still be there @@ -1048,13 +1218,14 @@ async fn fetch_many() { type K = String; type V = usize; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::never_balanced())), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "many".to_string(), + external_mapper: None, }, ) .await @@ -1089,17 +1260,17 @@ async fn fetch_many() { let many = s .try_fetch_many_at([ // OK - (1i64, "restera".to_string()), + (1i64 as UserEpoch, "restera".to_string()), // OK - (2i64, "restera".to_string()), + (2i64 as UserEpoch, "restera".to_string()), // non-existing epoch - (4i64, "restera".to_string()), + (4i64 as UserEpoch, "restera".to_string()), // does not exist yet - (1i64, "car".to_string()), + (1i64 as UserEpoch, "car".to_string()), // OK - (2i64, "car".to_string()), + (2i64 as UserEpoch, "car".to_string()), // non-existing key - (1i64, "meumeu".to_string()), + (1i64 as UserEpoch, "meumeu".to_string()), ]) .await .unwrap() @@ -1111,12 +1282,9 @@ async fn fetch_many() { assert_eq!( many, [ - (1i64, "restera".to_string(), 12), - (2i64, "restera".to_string(), 12), - (2i64, "car".to_string(), 0), - // This should not exist, but as we use infinity to mark alive - // nodes, it will still appear - (4i64, "restera".to_string(), 12), + (1i64 as UserEpoch, "restera".to_string(), 12), + (2i64 as UserEpoch, "restera".to_string(), 12), + (2i64 as UserEpoch, "car".to_string(), 0), ] .into_iter() .collect::>() @@ -1128,13 +1296,14 @@ async fn wide_update_trees() { type K = String; type V = usize; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::never_balanced())), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "wide".to_string(), + external_mapper: None, }, ) .await @@ -1189,13 +1358,14 @@ async fn all_pgsql() { type K = String; type V = usize; type Tree = scapegoat::Tree; - type Storage = PgsqlStorage; + type Storage = PgsqlStorage; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::never_balanced())), SqlStorageSettings { source: SqlServerConnection::NewConnection(db_url()), table: "fetch_all".to_string(), + external_mapper: None, }, ) .await @@ -1261,7 +1431,7 @@ async fn all_memory() { type K = String; type V = usize; type Tree = scapegoat::Tree; - type Storage = InMemory; + type Storage = InMemory; let mut s = MerkleTreeKvDb::::new( InitSettings::Reset(Tree::empty(Alpha::never_balanced())), diff --git a/ryhope/src/storage/updatetree.rs b/ryhope/src/storage/updatetree.rs index 72051e148..6a9228cb6 100644 --- a/ryhope/src/storage/updatetree.rs +++ b/ryhope/src/storage/updatetree.rs @@ -1,7 +1,7 @@ use crate::{ error::RyhopeError, tree::{NodeContext, TreeTopology}, - Epoch, + UserEpoch, }; use futures::{future::BoxFuture, FutureExt}; use serde::{Deserialize, Serialize}; @@ -18,7 +18,7 @@ use super::TreeStorage; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct UpdateTree { /// The epoch stemming from the application of this update tree - epoch: Epoch, + epoch: UserEpoch, /// An arena-like storage of all the nodes in the tree nodes: Vec>, /// key -> arena index mapping @@ -68,7 +68,7 @@ impl UpdateTree { impl UpdateTree { /// Create an empty `UpdateTree`. - fn empty(epoch: Epoch) -> Self { + fn empty(epoch: UserEpoch) -> Self { Self { epoch, nodes: Vec::new(), @@ -77,7 +77,7 @@ impl UpdateTree { } /// Instantiate a new `UpdateTree` containing all the provided paths. - pub fn from_paths>>(paths: I, epoch: Epoch) -> Self { + pub fn from_paths>>(paths: I, epoch: UserEpoch) -> Self { let mut paths = paths.into_iter(); if let Some(path) = paths.next() { let mut r = Self::from_path(path, epoch); @@ -92,7 +92,7 @@ impl UpdateTree { /// Instantiate a new `UpdateTree` from a seminal path from the root to a /// node. - pub fn from_path(mut path: Vec, epoch: Epoch) -> Self { + pub fn from_path(mut path: Vec, epoch: UserEpoch) -> Self { path.reverse(); if let Some(root_k) = path.pop() { let mut tree = UpdateTree { @@ -162,7 +162,7 @@ impl UpdateTree { } /// Return the epoch generated by this tree. - pub fn epoch(&self) -> Epoch { + pub fn epoch(&self) -> UserEpoch { self.epoch } @@ -277,7 +277,7 @@ impl UpdateTree { pub async fn from_tree + Sync, S: TreeStorage>( t: &T, s: &S, - epoch: Epoch, + epoch: UserEpoch, ) -> Self { let mut r = Self::empty(epoch); if let Some(root) = t.root(s).await.unwrap() { @@ -293,7 +293,7 @@ impl UpdateTree { /// /// This method assumes that the given map correctly encodes a binary tree /// and will not perform any check. - pub fn from_map(epoch: Epoch, root: &K, nodes: &HashMap>) -> Self { + pub fn from_map(epoch: UserEpoch, root: &K, nodes: &HashMap>) -> Self { let mut r = Self::empty(epoch); r.rec_from_map(root, nodes, None); r @@ -621,7 +621,10 @@ mod tests { #[tokio::test] async fn from_tree() { let t = sbbst::Tree; - let storage = InMemory::::new(sbbst::Tree::with_capacity(10)); + let storage = InMemory::::new_with_epoch( + sbbst::IncrementalTree::with_capacity(10), + 0, + ); let ut = UpdateTree::from_tree(&t, &storage, 1).await; ut.print(); } diff --git a/ryhope/src/storage/view.rs b/ryhope/src/storage/view.rs index f5395679f..5b9950333 100644 --- a/ryhope/src/storage/view.rs +++ b/ryhope/src/storage/view.rs @@ -1,10 +1,10 @@ //! This module offers facilities to “time-travel”, i.e. access the successive //! states of a tree at given epochs. -use std::{collections::HashMap, fmt::Debug, marker::PhantomData}; +use std::{collections::HashMap, fmt::Debug, future::Future, marker::PhantomData}; use serde::{Deserialize, Serialize}; -use crate::{error::RyhopeError, tree::TreeTopology, Epoch}; +use crate::{error::RyhopeError, tree::TreeTopology, UserEpoch}; use super::{EpochKvStorage, EpochStorage, RoEpochKvStorage, TransactionalStorage, TreeStorage}; @@ -17,7 +17,7 @@ pub struct StorageView< /// The wrapped [`EpochStorage`] &'s S, /// The target epoch - Epoch, + UserEpoch, /// [ignore] PhantomData, ); @@ -29,7 +29,7 @@ impl< where T: Send, { - fn start_transaction(&mut self) -> Result<(), RyhopeError> { + async fn start_transaction(&mut self) -> Result<(), RyhopeError> { unimplemented!("storage views are read only") } @@ -45,11 +45,11 @@ impl< where T: Send, { - fn current_epoch(&self) -> Epoch { - self.1 + async fn current_epoch(&self) -> Result { + Ok(self.1) } - async fn fetch_at(&self, epoch: Epoch) -> Result { + async fn fetch_at(&self, epoch: UserEpoch) -> Result { if epoch != self.1 { unimplemented!( "this storage view is locked at {}; {epoch} unreachable", @@ -68,7 +68,11 @@ where unimplemented!("storage views are read only") } - async fn rollback_to(&mut self, _epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, _epoch: UserEpoch) -> Result<(), RyhopeError> { + unimplemented!("storage views are read only") + } + + async fn rollback(&mut self) -> Result<(), RyhopeError> { unimplemented!("storage views are read only") } } @@ -78,7 +82,7 @@ pub struct KvStorageAt<'a, T: TreeTopology, S: RoEpochKvStorage /// The wrapped [`RoEpochKvStorage`] wrapped: &'a S, /// The epoch at which the wrapped storage is being looked at - current_epoch: Epoch, + current_epoch: UserEpoch, /// [ignore] _p: PhantomData, } @@ -86,15 +90,19 @@ pub struct KvStorageAt<'a, T: TreeTopology, S: RoEpochKvStorage impl + Sync> RoEpochKvStorage for KvStorageAt<'_, T, S> { - fn initial_epoch(&self) -> Epoch { + fn initial_epoch(&self) -> impl Future + Send { self.wrapped.initial_epoch() } - fn current_epoch(&self) -> Epoch { - self.current_epoch + async fn current_epoch(&self) -> Result { + Ok(self.current_epoch) } - async fn try_fetch_at(&self, k: &T::Key, epoch: Epoch) -> Result, RyhopeError> { + async fn try_fetch_at( + &self, + k: &T::Key, + epoch: UserEpoch, + ) -> Result, RyhopeError> { if epoch > self.current_epoch { unimplemented!( "this storage view is locked at {}; {epoch} unreachable", @@ -105,19 +113,27 @@ impl + Sync> RoEpochKvStor } } - async fn size_at(&self, epoch: Epoch) -> usize { + async fn try_fetch(&self, k: &T::Key) -> Result, RyhopeError> { + self.wrapped.try_fetch_at(k, self.current_epoch).await + } + + async fn size(&self) -> usize { + self.wrapped.size_at(self.current_epoch).await + } + + async fn size_at(&self, epoch: UserEpoch) -> usize { self.wrapped.size_at(epoch).await } - async fn keys_at(&self, epoch: Epoch) -> Vec { + async fn keys_at(&self, epoch: UserEpoch) -> Vec { self.wrapped.keys_at(epoch).await } - async fn random_key_at(&self, epoch: Epoch) -> Option { + async fn random_key_at(&self, epoch: UserEpoch) -> Option { self.wrapped.random_key_at(epoch).await } - async fn pairs_at(&self, epoch: Epoch) -> Result, RyhopeError> { + async fn pairs_at(&self, epoch: UserEpoch) -> Result, RyhopeError> { if epoch > self.current_epoch { unimplemented!( "this storage view is locked at {}; {epoch} unreachable", @@ -144,7 +160,11 @@ impl + Sync> EpochKvStorag unimplemented!("storage views are read only") } - async fn rollback_to(&mut self, _epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, _epoch: UserEpoch) -> Result<(), RyhopeError> { + unimplemented!("storage views are read only") + } + + async fn rollback(&mut self) -> Result<(), RyhopeError> { unimplemented!("storage views are read only") } } @@ -154,17 +174,18 @@ pub struct TreeStorageView<'a, T: TreeTopology, S: TreeStorage> { /// The wrapped [`TreeStorage`] pub wrapped: &'a S, /// The target epoch - pub epoch: Epoch, + pub epoch: UserEpoch, /// A wrapper over the state storage of `wrapped` pub state: StorageView<'a, T::State, S::StateStorage>, /// A wrapper over the node storage of `wrapped` pub nodes: KvStorageAt<'a, T, S::NodeStorage>, + epoch_mapper: &'a S::EpochMapper, /// [ignore] pub _t: PhantomData, } impl<'a, T: TreeTopology + 'a, S: TreeStorage + 'a> TreeStorageView<'a, T, S> { /// Create a new view on `s` locked at `epoch`. - pub fn new(s: &'a S, epoch: Epoch) -> Self { + pub fn new(s: &'a S, epoch: UserEpoch) -> Self { Self { wrapped: s, epoch, @@ -174,6 +195,7 @@ impl<'a, T: TreeTopology + 'a, S: TreeStorage + 'a> TreeStorageView<'a, T, S> current_epoch: epoch, _p: PhantomData, }, + epoch_mapper: s.epoch_mapper(), _t: PhantomData, } } @@ -186,6 +208,7 @@ where { type StateStorage = StorageView<'a, T::State, S::StateStorage>; type NodeStorage = KvStorageAt<'a, T, S::NodeStorage>; + type EpochMapper = S::EpochMapper; fn state(&self) -> &Self::StateStorage { &self.state @@ -203,11 +226,19 @@ where unimplemented!("storage views are read only") } - async fn born_at(&self, epoch: Epoch) -> Vec { + async fn born_at(&self, epoch: UserEpoch) -> Vec { self.wrapped.born_at(epoch).await } - async fn rollback_to(&mut self, _epoch: Epoch) -> Result<(), RyhopeError> { + async fn rollback_to(&mut self, _epoch: UserEpoch) -> Result<(), RyhopeError> { + unimplemented!("storage views are read only") + } + + fn epoch_mapper(&self) -> &Self::EpochMapper { + self.epoch_mapper + } + + fn epoch_mapper_mut(&mut self) -> &mut Self::EpochMapper { unimplemented!("storage views are read only") } } diff --git a/ryhope/src/tests/example.rs b/ryhope/src/tests/example.rs index e38705216..1015fe6a1 100644 --- a/ryhope/src/tests/example.rs +++ b/ryhope/src/tests/example.rs @@ -13,7 +13,7 @@ async fn run() -> Result<()> { type V = usize; type RowTree = scapegoat::Tree; - type Storage = InMemory; + type Storage = InMemory; let mut tree = MerkleTreeKvDb::::new( InitSettings::Reset(scapegoat::Tree::empty(Alpha::new(0.5))), (), @@ -23,7 +23,7 @@ async fn run() -> Result<()> { println!("Insertion of some (key,value) pairs"); println!( "Current version of the tree before insertion: {}", - tree.current_epoch() + tree.current_epoch().await.unwrap() ); let res = tree @@ -35,10 +35,10 @@ async fn run() -> Result<()> { .await .expect("this should work"); - let first_stamp = tree.current_epoch(); + let first_stamp = tree.current_epoch().await?; println!( "Current version of the tree after insertion: {}", - tree.current_epoch() + first_stamp ); println!("Tree of keys to update:"); @@ -77,7 +77,7 @@ async fn run() -> Result<()> { } // Printing the tree at its previous versions - println!("tree at {} is now:", tree.current_epoch()); + println!("tree at {} is now:", tree.current_epoch().await?); tree.tree().print(&tree.storage).await; println!("tree at epoch {first_stamp} was:"); @@ -88,14 +88,10 @@ async fn run() -> Result<()> { "The update tree from {first_stamp} to {} was:", first_stamp + 1 ); - tree.diff_at(first_stamp + 1) - .await - .unwrap() - .unwrap() - .print(); + tree.diff_at(first_stamp + 1).await?.unwrap().print(); println!("The update tree from 0 to 1 was:",); - tree.diff_at(1).await.unwrap().unwrap().print(); + tree.diff_at(1).await?.unwrap().print(); Ok(()) } diff --git a/ryhope/src/tests/trees.rs b/ryhope/src/tests/trees.rs index c2a62bd42..642eb2fca 100644 --- a/ryhope/src/tests/trees.rs +++ b/ryhope/src/tests/trees.rs @@ -5,10 +5,16 @@ mod sbbst { tree::{sbbst, MutableTree, TreeTopology}, }; - fn sbbst_in_memory(shift: usize, n: usize) -> (sbbst::Tree, InMemory) { + fn sbbst_in_memory( + shift: usize, + n: usize, + ) -> ( + sbbst::IncrementalTree, + InMemory, + ) { ( - sbbst::Tree, - InMemory::new(sbbst::Tree::with_shift_and_capacity(shift, n)), + sbbst::IncrementalTree::default(), + InMemory::new_with_epoch(sbbst::IncrementalTree::with_shift_and_capacity(shift, n), 0), ) } @@ -47,7 +53,7 @@ mod sbbst { let (mut t, mut s) = sbbst_in_memory(1000, 6); assert_eq!(t.size(&s).await.unwrap(), 6); - s.start_transaction().unwrap(); + s.start_transaction().await.unwrap(); t.insert(1007, &mut s).await.unwrap(); s.commit_transaction().await.unwrap(); assert_eq!(t.size(&s).await.unwrap(), 7); @@ -89,8 +95,11 @@ mod scapegoat { + Send, >( a: Alpha, - ) -> (scapegoat::Tree, InMemory, ()>) { - (Default::default(), InMemory::new(scapegoat::Tree::empty(a))) + ) -> (scapegoat::Tree, InMemory, (), false>) { + ( + Default::default(), + InMemory::new_with_epoch(scapegoat::Tree::empty(a), 0), + ) } #[tokio::test] @@ -101,7 +110,7 @@ mod scapegoat { assert_eq!(t.size(&s).await.unwrap(), 0); - s.start_transaction()?; + s.start_transaction().await?; t.insert("adsfda".into(), &mut s).await?; assert_eq!(t.size(&s).await.unwrap(), 1); @@ -129,8 +138,8 @@ mod scapegoat { let (mut bbst, mut bs) = scapegaot_in_memory::(Alpha::fully_balanced()); let (mut list, mut ls) = scapegaot_in_memory::(Alpha::never_balanced()); - bs.start_transaction().unwrap(); - ls.start_transaction().unwrap(); + bs.start_transaction().await.unwrap(); + ls.start_transaction().await.unwrap(); for i in 0..128 { bbst.insert(i, &mut bs).await.unwrap(); list.insert(i, &mut ls).await.unwrap(); @@ -149,7 +158,7 @@ mod scapegoat { let (mut t, mut s) = scapegaot_in_memory::(Alpha::new(0.5)); - s.start_transaction()?; + s.start_transaction().await?; for i in 0..20 { t.insert("A".repeat(i), &mut s).await.unwrap(); t.print(&s).await; diff --git a/ryhope/src/tree/sbbst.rs b/ryhope/src/tree/sbbst.rs index 881414e79..c68747538 100644 --- a/ryhope/src/tree/sbbst.rs +++ b/ryhope/src/tree/sbbst.rs @@ -51,12 +51,13 @@ //! parent = parent(s_tree, parent) use super::{MutableTree, NodeContext, NodePath, TreeTopology}; use crate::{ - error::RyhopeError, - storage::{EpochKvStorage, EpochStorage, TreeStorage}, + error::{ensure, RyhopeError}, + storage::{EpochKvStorage, EpochMapper, EpochStorage, TreeStorage}, tree::PrintableTree, + IncrementalEpoch, UserEpoch, }; use serde::{Deserialize, Serialize}; -use std::collections::HashSet; +use std::{collections::HashSet, future::Future}; /// Represents a user-facing index, in the shift+1..max range. pub type NodeIdx = usize; @@ -111,19 +112,31 @@ pub struct State { impl State { pub fn root(&self) -> NodeIdx { - self.outer_root() + self.outer_idx(self.inner_root()).0 } - pub fn ascendance>(&self, ns: I) -> HashSet { + async fn root_with_mapper(&self, mapper: &M) -> NodeIdx { + self.outer_root(mapper).await + } + + pub async fn ascendance>(&self, ns: I) -> HashSet { + self.ascendance_with_mapper(ns, self).await + } + + async fn ascendance_with_mapper, M: IndexMapper>( + &self, + ns: I, + mapper: &M, + ) -> HashSet { let mut ascendance = HashSet::new(); let inner_max = self.inner_max(); for n in ns.into_iter() { - let inner_idx = self.inner_idx(n); + let inner_idx = mapper.to_inner_idx(OuterIdx(n)).await; if inner_idx <= inner_max { if let Some(lineage) = self.lineage_inner(&inner_idx) { for n in lineage.into_full_path() { if n <= inner_max { - ascendance.insert(self.outer_idx(n)); + ascendance.insert(mapper.to_outer_idx(n).await.0); } } } @@ -133,8 +146,12 @@ impl State { ascendance } - pub fn parent(&self, n: NodeIdx) -> Option { - let n = self.inner_idx(n); + pub async fn parent(&self, n: NodeIdx) -> Option { + self.parent_with_mapper(n, self).await + } + + async fn parent_with_mapper(&self, n: NodeIdx, mapper: &M) -> Option { + let n = mapper.to_inner_idx(OuterIdx(n)).await; if n > self.inner_max() { panic!("{n:?} not in tree"); } @@ -148,18 +165,26 @@ impl State { parent = parent_in_saturated(parent); } - Some(self.outer_idx(parent)) + Some(mapper.to_outer_idx(parent).await.0) + } + + pub async fn lineage(&self, n: &NodeIdx) -> Option> { + self.lineage_with_mapper(n, self).await } - pub fn lineage(&self, n: &NodeIdx) -> Option> { - if let Some(lineage_inner) = self.lineage_inner(&self.inner_idx(*n)) { + async fn lineage_with_mapper( + &self, + n: &NodeIdx, + mapper: &M, + ) -> Option> { + if let Some(lineage_inner) = self.lineage_inner(&mapper.to_inner_idx(OuterIdx(*n)).await) { let mut ascendance = vec![]; for n in lineage_inner.ascendance { - ascendance.push(self.outer_idx(n)); + ascendance.push(mapper.to_outer_idx(n).await.0); } Some(NodePath { ascendance, - target: self.outer_idx(lineage_inner.target), + target: mapper.to_outer_idx(lineage_inner.target).await.0, }) } else { None @@ -167,14 +192,29 @@ impl State { } pub fn node_context(&self, k: &NodeIdx) -> Option> { - if let Some(inner) = self.node_context_inner(&self.inner_idx(*k)) { - let parent_outer = inner.parent.map(|parent| self.outer_idx(parent)); + // Not a simple call to `node_context_with_mapper` since we need a non-async version + // to be employed in circuits + self.node_context_inner(&self.inner_idx(OuterIdx(*k))) + .map(|inner| NodeContext { + node_id: self.outer_idx(inner.node_id).0, + parent: inner.parent.map(|idx| self.outer_idx(idx).0), + left: inner.left.map(|idx| self.outer_idx(idx).0), + right: inner.right.map(|idx| self.outer_idx(idx).0), + }) + } - let left_outer = inner.left.map(|left| self.outer_idx(left)); - let right_outer = inner.right.map(|right| self.outer_idx(right)); + async fn node_context_with_mapper( + &self, + k: &NodeIdx, + mapper: &M, + ) -> Option> { + if let Some(inner) = self.node_context_inner(&mapper.to_inner_idx(OuterIdx(*k)).await) { + let parent_outer = mapper.to_outer_idx_map(inner.parent).await.map(|idx| idx.0); + let left_outer = mapper.to_outer_idx_map(inner.left).await.map(|idx| idx.0); + let right_outer = mapper.to_outer_idx_map(inner.right).await.map(|idx| idx.0); Some(NodeContext { - node_id: self.outer_idx(inner.node_id), + node_id: mapper.to_outer_idx(inner.node_id).await.0, parent: parent_outer, left: left_outer, right: right_outer, @@ -184,9 +224,20 @@ impl State { } } - pub fn children(&self, n: &NodeIdx) -> Option<(Option, Option)> { - if let Some((l, r)) = self.children_inner(&self.inner_idx(*n)) { - Some((l.map(|l| self.outer_idx(l)), r.map(|r| self.outer_idx(r)))) + pub async fn children(&self, n: &NodeIdx) -> Option<(Option, Option)> { + self.children_with_mapper(n, self).await + } + + async fn children_with_mapper( + &self, + n: &NodeIdx, + mapper: &M, + ) -> Option<(Option, Option)> { + if let Some((l, r)) = self.children_inner(&mapper.to_inner_idx(OuterIdx(*n)).await) { + Some(( + mapper.to_outer_idx_map(l).await.map(|idx| idx.0), + mapper.to_outer_idx_map(r).await.map(|idx| idx.0), + )) } else { None } @@ -204,17 +255,10 @@ impl State { 0 }) } - /// Re-shift an index from the canonical range to the actual one - fn outer_idx(&self, n: InnerIdx) -> NodeIdx { - (n + self.shift).0 - } + /// Return the root of the tree, as a shifted node index. - fn outer_root(&self) -> NodeIdx { - self.outer_idx(self.inner_root()) - } - /// Un-shift an index into the canonical range - fn inner_idx(&self, n: NodeIdx) -> InnerIdx { - InnerIdx(n - self.shift) + async fn outer_root(&self, mapper: &M) -> NodeIdx { + mapper.to_outer_idx(self.inner_root()).await.0 } fn parent_inner(&self, n: InnerIdx) -> Option { @@ -300,11 +344,70 @@ impl State { None } } + + fn inner_idx(&self, outer_idx: OuterIdx) -> InnerIdx { + InnerIdx(outer_idx.0 - self.shift) + } + + fn outer_idx(&self, inner_idx: InnerIdx) -> OuterIdx { + OuterIdx((inner_idx + self.shift).0) + } +} + +trait IndexMapper: Sized + Send + Sync + Clone { + fn to_inner_idx(&self, outer_idx: OuterIdx) -> impl Future + Send; + + fn to_outer_idx(&self, inner_idx: InnerIdx) -> impl Future + Send; + + /// Apply `to_outer_idx` to `inner_idx` if it is `Some`, otherwise return `None`. + fn to_outer_idx_map( + &self, + inner_idx: Option, + ) -> impl Future> + Send { + async move { + match inner_idx { + Some(inner_idx) => Some(self.to_outer_idx(inner_idx).await), + None => None, + } + } + } +} + +impl IndexMapper for T { + async fn to_inner_idx(&self, outer_idx: OuterIdx) -> InnerIdx { + InnerIdx( + self.to_incremental_epoch(outer_idx.0 as UserEpoch) + .await + .try_into() + .unwrap(), + ) + } + + async fn to_outer_idx(&self, inner_idx: InnerIdx) -> OuterIdx { + OuterIdx(self.to_user_epoch(inner_idx.0 as IncrementalEpoch).await as usize) + } +} + +impl IndexMapper for State { + async fn to_inner_idx(&self, outer_idx: OuterIdx) -> InnerIdx { + self.inner_idx(outer_idx) + } + + async fn to_outer_idx(&self, inner_idx: InnerIdx) -> OuterIdx { + self.outer_idx(inner_idx) + } } #[derive(Default)] -pub struct Tree; -impl Tree { +pub struct Tree; + +/// Type alias to represent a generic sbbst with incremental keys +pub type IncrementalTree = Tree; +/// Type alias to represent a generic sbbst with monotonically increasing keys being +/// used as epochs of the storage +pub type EpochTree = Tree; + +impl Tree { pub fn empty() -> State { State { max: InnerIdx(0), @@ -332,9 +435,37 @@ impl Tree { shift: 0, } } + + async fn to_inner_idx>( + &self, + s: &S, + state: &State, + n: OuterIdx, + ) -> InnerIdx { + if IS_EPOCH_TREE { + s.epoch_mapper().to_inner_idx(n).await + } else { + state.to_inner_idx(n).await + } + } + + async fn to_outer_idx>( + &self, + s: &S, + state: &State, + n: InnerIdx, + ) -> OuterIdx { + if IS_EPOCH_TREE { + s.epoch_mapper().to_outer_idx(n).await + } else { + state.to_outer_idx(n).await + } + } } -async fn shift>(s: &S) -> Result { +async fn shift>>( + s: &S, +) -> Result { s.state().fetch().await.map(|s| s.shift) } @@ -371,50 +502,75 @@ fn children_inner_in_saturated(n: &InnerIdx) -> Option<(InnerIdx, InnerIdx)> { Some((maybe_left, maybe_right)) } -impl TreeTopology for Tree { +impl TreeTopology for Tree { /// Max, shift type State = State; type Key = NodeIdx; type Node = (); - async fn size>(&self, s: &S) -> Result { + async fn size>(&self, s: &S) -> Result { s.state().fetch().await.map(|s| s.inner_max().0) } - async fn ascendance, I: IntoIterator>( + async fn ascendance, I: IntoIterator>( &self, ns: I, s: &S, ) -> Result, RyhopeError> { - s.state().fetch().await.map(|s| s.ascendance(ns)) + let state = s.state().fetch().await?; + Ok(if IS_EPOCH_TREE { + state.ascendance_with_mapper(ns, s.epoch_mapper()).await + } else { + state.ascendance(ns).await + }) } - async fn root>(&self, s: &S) -> Result, RyhopeError> { - s.state().fetch().await.map(|s| Some(s.root())) + async fn root>(&self, s: &S) -> Result, RyhopeError> { + let state = s.state().fetch().await?; + Ok(Some(if IS_EPOCH_TREE { + state.root_with_mapper(s.epoch_mapper()).await + } else { + state.root() + })) } - async fn parent>( + async fn parent>( &self, n: NodeIdx, s: &S, ) -> Result, RyhopeError> { - s.state().fetch().await.map(|s| s.parent(n)) + let state = s.state().fetch().await?; + Ok(if IS_EPOCH_TREE { + state.parent_with_mapper(n, s.epoch_mapper()).await + } else { + state.parent(n).await + }) } - async fn lineage>( + async fn lineage>( &self, n: &NodeIdx, s: &S, ) -> Result>, RyhopeError> { - s.state().fetch().await.map(|s| s.lineage(n)) + let state = s.state().fetch().await?; + Ok(if IS_EPOCH_TREE { + state.lineage_with_mapper(n, s.epoch_mapper()).await + } else { + state.lineage(n).await + }) } - async fn children>( + async fn children>( &self, n: &NodeIdx, s: &S, ) -> Result, Option)>, RyhopeError> { - s.state().fetch().await.map(|s| s.children(n)) + let state = s.state().fetch().await?; + Ok(if IS_EPOCH_TREE { + state.children_with_mapper(n, s.epoch_mapper()).await + } else { + state.children(n).await + }) } async fn node_context>( @@ -422,24 +578,27 @@ impl TreeTopology for Tree { k: &NodeIdx, s: &S, ) -> Result>, RyhopeError> { - s.state().fetch().await.map(|s| s.node_context(k)) + let state = s.state().fetch().await?; + Ok(if IS_EPOCH_TREE { + state.node_context_with_mapper(k, s.epoch_mapper()).await + } else { + state.node_context(k) + }) } - async fn contains>( + async fn contains>( &self, k: &NodeIdx, s: &S, ) -> Result { - s.state() - .fetch() - .await - .map(|s| s.inner_idx(*k) <= s.inner_max()) + let state = s.state().fetch().await?; + Ok(self.to_inner_idx(s, &state, OuterIdx(*k)).await <= state.inner_max()) } } -impl MutableTree for Tree { +impl MutableTree for Tree { // The SBBST only support appending exactly after the current largest key. - async fn insert>( + async fn insert>( &mut self, k: NodeIdx, s: &mut S, @@ -454,22 +613,42 @@ impl MutableTree for Tree { )?; let state = s.state().fetch().await?; - if state.inner_idx(k) != state.inner_max() + 1 { - return Err(RyhopeError::fatal(format!( - "invalid insert in SBBST: trying to insert {}, but next insert should be {} (shift = {})", - k, - state.outer_idx(state.inner_max() +1), - state.shift, - ))); + // compute the inner key of the next item to be inserted + let expected_inner_k = state.inner_max() + 1; + if IS_EPOCH_TREE { + // we need to check that k >= last epoch inserted + let max_outer = s.epoch_mapper().to_outer_idx(state.inner_max()).await; + ensure( + max_outer <= OuterIdx(k), + format!( + "Trying to insert an epoch {k} smaller than a previous inserted epoch {}", + max_outer.0 + ), + )?; + // in this case, k must be mapped to `expected_inner_k` in the epoch mapper + s.epoch_mapper_mut() + .add_epoch_map(k as UserEpoch, expected_inner_k.0 as IncrementalEpoch) + .await?; } else { - s.state_mut().update(|state| state.max += 1).await?; + // in this case, we need to check that the inner key corresponding to k + // is equal to `expected_inner_k` + let inner_k = state.to_inner_idx(OuterIdx(k)).await; + ensure(inner_k == expected_inner_k, + format!( + "invalid insert in SBBST: trying to insert {}, but next insert should be {} (shift = {})", + k, + state.to_outer_idx(expected_inner_k).await.0, + state.shift, + ), + )?; } + s.state_mut().update(|state| state.max += 1).await?; s.nodes_mut().store(k, ()).await?; Ok(self.lineage(&k, s).await?.unwrap()) } - async fn delete>( + async fn delete>( &mut self, _k: &NodeIdx, _: &mut S, @@ -478,8 +657,8 @@ impl MutableTree for Tree { } } -impl PrintableTree for Tree { - async fn tree_to_string>(&self, s: &S) -> String { +impl PrintableTree for Tree { + async fn tree_to_string>(&self, s: &S) -> String { let mut r = String::new(); let state = s.state().fetch().await.unwrap(); @@ -490,7 +669,11 @@ impl PrintableTree for Tree { let maybe_left = rank * (1 << (layer + 1)) + (1 << layer); if maybe_left <= state.inner_max().0 { let n = InnerIdx(maybe_left); - r.push_str(&format!("{}{}", state.outer_idx(n), spacing)) + r.push_str(&format!( + "{}{}", + self.to_outer_idx(s, &state, n).await.0, + spacing + )) } } r.push('\n'); diff --git a/verifiable-db/src/query/universal_circuit/cells.rs b/verifiable-db/src/query/universal_circuit/cells.rs index f57e5b04f..9439b7cd3 100644 --- a/verifiable-db/src/query/universal_circuit/cells.rs +++ b/verifiable-db/src/query/universal_circuit/cells.rs @@ -16,7 +16,7 @@ use ryhope::tree::{ TreeTopology, }; use std::iter::once; -type CellTree = sbbst::Tree; +type CellTree = sbbst::IncrementalTree; type CellTreeKey = ::Key; /// Re-compute the root hash of the cells tree by the column identifiers and values @@ -47,7 +47,7 @@ pub(crate) fn build_cells_tree( assert_eq!(input_len, input_values.len()); assert_eq!(input_len, is_real_value.len()); - let sbbst_state = sbbst::Tree::with_capacity(input_len); + let sbbst_state = sbbst::IncrementalTree::with_capacity(input_len); let root_key = sbbst_state.root(); build_cells_subtree_at_key(