diff --git a/.github/actions-rs/grcov.yml b/.github/actions-rs/grcov.yml new file mode 100644 index 0000000..dbe2786 --- /dev/null +++ b/.github/actions-rs/grcov.yml @@ -0,0 +1,2 @@ +output-type: lcov +output-file: ./lcov.info \ No newline at end of file diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml new file mode 100644 index 0000000..4b93f4c --- /dev/null +++ b/.github/workflows/codecov.yml @@ -0,0 +1,30 @@ +on: + push: + branches: [main] + pull_request: +name: coverage +env: + CARGO_NET_GIT_FETCH_WITH_CLI: true +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: arduino/setup-protoc@v1 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + override: true + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: actions-rs/cargo@v1 + with: + command: test + args: --all-features --no-fail-fast -- --include-ignored --test-threads=1 + env: + CARGO_INCREMENTAL: '0' + RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' + RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' + - uses: actions-rs/grcov@v0.1.5 + - name: Upload to codecov.io + uses: codecov/codecov-action@v3 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..40620c6 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,36 @@ +on: + push: + branches: [main, dev] + pull_request: +name: lint +env: + CARGO_NET_GIT_FETCH_WITH_CLI: true +jobs: + style: + runs-on: ubuntu-latest + steps: + - uses: arduino/setup-protoc@v1 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + components: rustfmt, clippy + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: cargo fmt --check + uses: actions-rs/cargo@v1 + with: + command: fmt + args: --check + - name: cargo doc + uses: actions-rs/cargo@v1 + if: always() + with: + command: doc + args: --no-deps --all-features + - name: cargo clippy + uses: actions-rs/clippy-check@v1 + if: always() + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..16f95b9 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,24 @@ +on: + - push + - pull_request +name: test +jobs: + test: + runs-on: ubuntu-latest + name: test + env: + CARGO_NET_GIT_FETCH_WITH_CLI: true + steps: + - uses: arduino/setup-protoc@v1 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: cargo test + uses: actions-rs/cargo@v1 + with: + command: test + args: --all-features -- --include-ignored diff --git a/README.md b/README.md index 7878131..7c3dce2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +[![Rust](https://img.shields.io/badge/built_with-Rust-dca282.svg)](https://www.rust-lang.org/) +[![License](https://img.shields.io/badge/License-Apache_2.0-brightgreen.svg)](https://github.com/ArunaStorage/synevi/blob/main/LICENSE-APACHE) +[![License](https://img.shields.io/badge/License-MIT-brightgreen.svg)](https://github.com/ArunaStorage/synevi/blob/main/LICENSE-MIT) +![CI](https://github.com/ArunaStorage/synevi/actions/workflows/test.yml/badge.svg) +[![Codecov](https://codecov.io/github/ArunaStorage/synevi/coverage.svg?branch=main)](https://codecov.io/gh/ArunaStorage/synevi) + # Synevi Synevi (greek: συνέβει - "it happened") is a leaderless, strict serializable, embeddable event store for event sourcing. @@ -16,11 +22,71 @@ For a more detailed explanation of the underlying architecture please visit our ### Embedded -TBD +## Getting Started with Synevi + +Synevi provides a distributed event storage system that ensures consistent event ordering across your network. You can get started with either gRPC networking (the default) and your choice of persistent LMDB storage or in-memory storage to match your needs. + +At its core, Synevi works by distributing events through coordinator nodes. When an event is sent to any coordinator, Synevi ensures all nodes execute it in the same order, maintaining consistency across your system. To work with Synevi, you'll need to implement two key traits for your events. + +First, define your events by implementing the `Transaction` trait: + +```rust +pub trait Transaction: Debug + Clone + Send { + type TxErr: Send + Serialize; + type TxOk: Send + Serialize; + + fn as_bytes(&self) -> Vec; + fn from_bytes(bytes: Vec) -> Result + where + Self: Sized; +} +``` + +Then, implement the `Executor` trait to handle how your store processes these events: + +```rust +#[async_trait::async_trait] +pub trait Executor: Send + Sync + 'static { + type Tx: Transaction + Serialize; + + async fn execute(&self, transaction: Self::Tx) -> SyneviResult; +} +``` + +Here's a complete example that shows how to set up a Synevi node: + +```rust +// Set up the network address +let socket_addr = SocketAddr::from_str(&format!("0.0.0.0:{}", 13000 + i)).unwrap(); +let network = synevi::network::GrpcNetwork::new( + socket_addr, + format!("http://0.0.0.0:{}", 13000 + i), + Ulid::new(), + 0u16, +); + +// Choose your storage implementation +// For persistent storage using LMDB: +let test_path = format!("/dev/shm/test/"); +fs::create_dir(&test_path).await.unwrap(); +let store = LmdbStore::new(test_path, 0u16).unwrap(); + +// Or for in-memory storage: +// let store = MemStore::new(0u16).unwrap(); + +// Create your node using your executor (or the DummyExecutor that does nothing) +let node = Node::new(m, i as u16, network, DummyExecutor, store) + .await + .unwrap(); +``` + +Synevi will automatically handle event distribution and ensure proper execution order across all functioning nodes in your network. The system is designed to be resilient, continuing to operate even if some nodes become unavailable. Whether you choose LMDB for persistence or in-memory storage for speed, the same consistency guarantees apply - all events are executed in the same order across every node in your network. + +For production deployments, make sure your `Transaction` implementations include proper error handling and that your `Executor` implementation is thread-safe. Consider your network topology when setting up nodes, as it can impact system performance and reliability. -### As standalone Application +Note: Currently each node can handle only one type of executor, this can make it complicated when having many different types of events, for this you can use the excellent [typetag](https://github.com/dtolnay/typetag) crate that enables serialization of many types that implement shared behavior. -TBD +### As standalone Application (Coming soon) ## Feedback & Contributions diff --git a/benches/performance.rs b/benches/performance.rs index dc4992f..ac798f8 100644 --- a/benches/performance.rs +++ b/benches/performance.rs @@ -57,14 +57,7 @@ async fn parallel_execution(coordinator: Arc { - res.unwrap(); - } - synevi_types::types::ExecutorResult::Internal(res) => { - res.unwrap(); - } - } + res.unwrap().unwrap().unwrap(); } } @@ -82,14 +75,7 @@ async fn contention_execution(coordinators: Vec { - res.unwrap(); - } - synevi_types::types::ExecutorResult::Internal(res) => { - res.unwrap(); - } - } + res.unwrap().unwrap().unwrap(); } } @@ -105,14 +91,7 @@ async fn _bigger_payloads_execution( joinset.spawn(async move { coordinator.transaction(i, payload).await }); } while let Some(res) = joinset.join_next().await { - match res.unwrap().unwrap() { - synevi_types::types::ExecutorResult::External(res) => { - res.unwrap(); - } - synevi_types::types::ExecutorResult::Internal(res) => { - res.unwrap(); - } - }; + res.unwrap().unwrap().unwrap(); } } diff --git a/docs/reconfiguration.md b/docs/reconfiguration.md index 0a6caac..505d62d 100644 --- a/docs/reconfiguration.md +++ b/docs/reconfiguration.md @@ -1,134 +1,3 @@ # Reconfiguration Protocol -Each node has an epoch u32. - -# Initialisation (0 Nodes) - -The first node starts with `epoch = 0`. It adds itself to the config (address etc.). And initializes with serial 0; - -# Join cluster as new node - -TODO: Electorate Networkinterface pre-accept -Electorate @ pre-accept = Set of all members from current epoch -If pre-accept returns a new epoch... Fetch configuration. -Accept to all members of old epoch and all members of new epoch - -In our impl: -When the coordinator receives info about a new epoch in PA responses. --> Fetch config of epoch from appropriate node -Await fetch && PA to all (old + new) - -1. Update Config request (UpdateConfigRequest) on node N1 -2. Wait for the current epoch to be ready ? (ReadyEpoch = e) -3. If node was part of e-1 - --------------------------------- - -### Onboarding revision 2 - -ArunaCtx { - 1. New node starts NewNodeCtx - 2. New node requests config + all_applieds from ExistingNodeCtx - 3. Existing node 1 sends special transaction (NewConfig + Epoch) to other nodes with new config -} - -NewNodeCtx { - 1. Init ReplicaBuffer and collect Commits/Applies - 2. Apply all_applieds - 3. Apply/Commit ReplicaBuffer - 4. Send JoinElectorate && Init FullReplica -} - -ExistingNodeCtx { - 1. Receive UpdateConfig - 2. Send all applieds - 4. Update Config - 5. Send NewConfig to all -} - -ElectorateCtx { - 1. Rcv & Apply NewConfig + Epoch - 2. Broadcast to updated config - 3. Keep old electorate until RcvJoinElectorate -} - -OldElectorateCtx { - 1. if new epoch start ArunaCtx ? -} - - - --------------------------------- - -### Onboarding Phase 1 - -- New node gets created -> Send Join request to an existing node -- Wait for the current epoch (e) to be ready - - Responds with current config (e) -- Existing node creates a transaction (to the old electorate) with the new epoch and asynchronously starts the reconfiguration protocol. - - Replicas notice the new epoch and initialize the reconfiguration protocol after fetching from node - -X | | | X -New epoch +1 (ts) - -- Reconfiguration protocol: - -- Wait for the current epoch to be ready -- If node was part of e-1 (previous epoch) - - Wait until the ReadyElectorate is true - - Send join electorate to all new nodes -- If node is a "new" one (Is part of e+1 but not e) - - ReadyElectorate = false -- else: ReadyElectorate = true - -- Increase epoch e = e+1 -- - - - - - - - - -------- -## Old ideas -0. Accept all consensus requests and put them to a buffer. - -1. Ask all node for last applied + hash + current configuration -2. Ask all other nodes if max last applied + hash is correct ? -3. If yes OK else ask another node 1. -4. Download from any node that responded OK all data from your last_applied to the last_applied from 1. (Check if your last applied is correct ?!) - -If your node was existing: - -1. No consensus requests received --> Check if you are - -Existing nodes can check if they are already a member of the current epoch and/or their state is correct (yet outdated). -- If state is correct: Recover the latest states and go ahead - - You are still member of an epoch and will receive Consensus requests. -> Go to buffer - - The first Apply Consensus you will receive is the last that needs to be synced out of consensus. - - Sync ! -- If state is incorrect: Create a new epoch that replaces the old node with a new one - -### Onboarding Phase 2 - -Fetch all applied of node-x based on my last applied + last applied hash; - -Node sends a `FetchConfiguration` request to a running node (possibly the initial one). I receives the current member configuration and the current epoch e. - -Reponse: Current config from the node (possible outdated) - -Node sends a `JoinCluster` request to all current members with e == e+1 and its own configuration and optionally its last applied t0. - -Response: -Majority(Old) -> OK (Last applied / Hash) - -> Replica start - - -NoMajority because someone else already got majority for this epoch. - - -# Re-join cluster as existing node - diff --git a/docs/recovery.md b/docs/recovery.md new file mode 100644 index 0000000..366d2e7 --- /dev/null +++ b/docs/recovery.md @@ -0,0 +1,30 @@ +# Recovery + +Transactions are recovered after a specific time has passed. + +If the node has data about the transaction (is either the coordinator or has witnessed a previous request). +Send recover request to all replicas. This has the function as an explicit pre-accept request and all replicas answer with their current state of the transaction (Tx). + +## Cases + +- No other has witnessed Tx -> Recover functions as Pre-Accept, continue with Accept +- If others have only witnessed a Pre-Accept: + - Do superseding transactions exist ? -> Continue with Accept + - Are there "waiting" transactions that should execute before ? -> Wait and restart recovery + - Otherwise continue with Accept + - For Accept: Enforce the fast-path if the original request got a majority (original T0 as T) otherwise use max(T) +- Accepted / Commited / Applied -> Continue with last known state + +## Synevi specific implementation details + +Unlike the original Accord implementation, synevi only collects T0s of dependencies. +This was done to mitigate the effects of larger transaction bodys, which would otherwise result in drastically increased message sizes. + +One downside of this is that nodes can witness the existence of transactions via dependencies without the actual transaction body. +If it is necessary to recover that transaction to satisfy the dependencies, this creates a problem of "unknown recoveries". + +To fix this: The waiting node can request a recovery from other nodes. These nodes will respond with a status information if they have witnessed the Tx and can recover it. As long as one node has the ability to recover the Tx and has witnessed the request the Tx will be recovered. + +If a majority responds with an unable to recover status and the rest do not respond, the dependency can be removed: + +The reason for this is that because the original transaction has not yet received a majority, it cannot have taken the fast path and must be recovered using a slow path. This also implies that a node that responds with a false state cannot allow the transaction to take the fast path. \ No newline at end of file diff --git a/docs/waithandler_refactor.md b/docs/waithandler_refactor.md new file mode 100644 index 0000000..e69de29 diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..7d27fca --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.82.0" +components = ["clippy"] diff --git a/synevi/src/lib.rs b/synevi/src/lib.rs index 3d0e280..b233fe4 100644 --- a/synevi/src/lib.rs +++ b/synevi/src/lib.rs @@ -16,3 +16,8 @@ pub mod network { pub use synevi_network::network::BroadcastResponse; } } + +pub mod storage { + pub use synevi_persistence::lmdb_store::LmdbStore; + pub use synevi_persistence::mem_store::MemStore; +} diff --git a/synevi_core/Cargo.toml b/synevi_core/Cargo.toml index 8367b62..548b2cb 100644 --- a/synevi_core/Cargo.toml +++ b/synevi_core/Cargo.toml @@ -30,6 +30,7 @@ async-channel = "2.3.1" ahash = { workspace = true } sha3 = "0.10.8" postcard = {version = "1.0.10", features = ["use-std"]} +bincode = "1.3.3" [dev-dependencies] monotime = { path = "../monotime", features = ["unsafe_time"] } diff --git a/synevi_core/src/coordinator.rs b/synevi_core/src/coordinator.rs index 95f9b4b..eab77ed 100644 --- a/synevi_core/src/coordinator.rs +++ b/synevi_core/src/coordinator.rs @@ -1,9 +1,7 @@ use crate::node::Node; use crate::utils::{from_dependency, into_dependency}; -use crate::wait_handler::WaitAction; use ahash::RandomState; use serde::Serialize; -use sha3::{Digest, Sha3_256}; use std::collections::HashSet; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -15,12 +13,12 @@ use synevi_network::network::{BroadcastRequest, Network, NetworkInterface}; use synevi_network::utils::IntoInner; use synevi_types::traits::Store; use synevi_types::types::{ - ExecutorResult, Hashes, InternalExecution, RecoveryState, SyneviResult, TransactionPayload, + ExecutorResult, Hashes, InternalSyneviResult, RecoverEvent, RecoveryState, TransactionPayload, }; use synevi_types::{Ballot, Executor, State, SyneviError, Transaction, T, T0}; -use tracing::{instrument, trace}; +use tracing::{error, instrument, trace}; -type RecoverySyneviResult = +type RecoveryInternalSyneviResult = Result::Tx>>, SyneviError>; pub struct Coordinator @@ -30,10 +28,9 @@ where S: Store, { pub node: Arc>, - pub transaction: TransactionStateMachine, } -#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default)] //, PartialEq, Eq)] pub struct TransactionStateMachine { pub id: u128, pub state: State, @@ -42,6 +39,7 @@ pub struct TransactionStateMachine { pub t: T, pub dependencies: HashSet, pub ballot: Ballot, + pub hashes: Option, } impl TransactionStateMachine @@ -59,60 +57,57 @@ where E: Executor, S: Store, { - #[instrument(level = "trace", skip(node, transaction))] - pub async fn new( - node: Arc>, - transaction: TransactionPayload, + #[instrument(level = "trace", skip(node))] + pub fn new(node: Arc>) -> Self { + trace!("Coordinator: New"); + Coordinator { node } + } + + #[instrument(level = "trace", skip(self))] + pub async fn pre_accept( + &self, id: u128, - ) -> Self { - trace!(?id, "Coordinator: New"); - let t0 = node.event_store.init_t_zero(node.info.serial).await; - Coordinator { - node, - transaction: TransactionStateMachine { + transaction: TransactionPayload, + ) -> InternalSyneviResult { + trace!(id = ?id, "Coordinator: Preaccept"); + + let node_clone = self.node.clone(); + let (state_machine, last_applied) = tokio::task::spawn_blocking(move || { + let t_zero = node_clone.event_store.init_t_zero(node_clone.get_serial()); + let state_machine = TransactionStateMachine { id, - state: State::Undefined, + state: State::PreAccepted, transaction, - t_zero: t0, - t: T(*t0), + t_zero, + t: T(*t_zero), dependencies: HashSet::default(), ballot: Ballot::default(), - }, - } - } - - #[instrument(level = "trace", skip(self))] - pub async fn run(&mut self) -> SyneviResult { - self.pre_accept().await - } - - #[instrument(level = "trace", skip(self))] - async fn pre_accept(&mut self) -> SyneviResult { - trace!(id = ?self.transaction.id, "Coordinator: Preaccept"); + hashes: None, + }; + let (t, _) = node_clone.event_store.last_applied(); + (state_machine, t.into()) + }) + .await?; self.node .stats .total_requests .fetch_add(1, Ordering::Relaxed); - let last_applied = { - let (t, _) = self.node.event_store.last_applied().await; - t.into() - }; - // Create the PreAccepted msg let pre_accepted_request = PreAcceptRequest { - id: self.transaction.id.to_be_bytes().into(), - event: self.transaction.get_transaction_bytes(), - timestamp_zero: (*self.transaction.t_zero).into(), + id: state_machine.id.to_be_bytes().into(), + event: state_machine.get_transaction_bytes(), + timestamp_zero: (*state_machine.t_zero).into(), last_applied, }; let network_interface = self.node.network.get_interface().await; + let pre_accepted_responses = network_interface .broadcast(BroadcastRequest::PreAccept( pre_accepted_request, - self.node.info.serial, + self.node.get_serial(), )) .await?; @@ -128,63 +123,74 @@ where return Err(SyneviError::CompetingCoordinator); } - self.pre_accept_consensus(&pa_responses).await?; + let state_machine = self + .pre_accept_consensus(pa_responses, state_machine) + .await?; - self.accept().await + self.accept(state_machine).await } #[instrument(level = "trace", skip(self))] async fn pre_accept_consensus( - &mut self, - responses: &[PreAcceptResponse], - ) -> Result<(), SyneviError> { + &self, + responses: Vec, + mut state_machine: TransactionStateMachine, + ) -> Result, SyneviError> { // Collect deps by t_zero and only keep the max t - let (_, last_applied_t0) = self.node.event_store.last_applied().await; - if last_applied_t0 != T0::default() { - self.transaction.dependencies.insert(last_applied_t0); - } - for response in responses { - let t_response = T::try_from(response.timestamp.as_slice())?; - if t_response > self.transaction.t { - self.transaction.t = t_response; - } - self.transaction - .dependencies - .extend(from_dependency(response.dependencies.clone())?); - } - // Upsert store - self.node - .event_store - .upsert_tx((&self.transaction).into()) - .await?; + let node_clone = self.node.clone(); + tokio::task::spawn_blocking(move || { + let (_, last_applied_t0) = node_clone.event_store.last_applied(); + if last_applied_t0 != T0::default() { + state_machine.dependencies.insert(last_applied_t0); + } + for response in responses { + let t_response = T::try_from(response.timestamp.as_slice())?; + if t_response > state_machine.t { + state_machine.t = t_response; + } + state_machine + .dependencies + .extend(from_dependency(response.dependencies.clone())?); + } - Ok(()) + // Upsert store + node_clone.event_store.upsert_tx((&state_machine).into())?; + Ok::<_, SyneviError>(state_machine) + }) + .await? } #[instrument(level = "trace", skip(self))] - async fn accept(&mut self) -> SyneviResult { - trace!(id = ?self.transaction.id, "Coordinator: Accept"); + async fn accept( + &self, + mut state_machine: TransactionStateMachine, + ) -> InternalSyneviResult { + trace!(id = ?state_machine.id, "Coordinator: Accept"); // Safeguard: T0 <= T - assert!(*self.transaction.t_zero <= *self.transaction.t); + assert!(*state_machine.t_zero <= *state_machine.t); - if *self.transaction.t_zero != *self.transaction.t { + if *state_machine.t_zero != *state_machine.t { self.node .stats .total_accepts .fetch_add(1, Ordering::Relaxed); - let last_applied = { - let (t, _) = self.node.event_store.last_applied().await; + + let node_clone = self.node.clone(); + + let last_applied = tokio::task::spawn_blocking(move || { + let (t, _) = node_clone.event_store.last_applied(); t.into() - }; + }) + .await?; let accepted_request = AcceptRequest { - id: self.transaction.id.to_be_bytes().into(), - ballot: self.transaction.ballot.into(), - event: self.transaction.get_transaction_bytes(), - timestamp_zero: (*self.transaction.t_zero).into(), - timestamp: (*self.transaction.t).into(), - dependencies: into_dependency(&self.transaction.dependencies), + id: state_machine.id.to_be_bytes().into(), + ballot: state_machine.ballot.into(), + event: state_machine.get_transaction_bytes(), + timestamp_zero: (*state_machine.t_zero).into(), + timestamp: (*state_machine.t).into(), + dependencies: into_dependency(&state_machine.dependencies), last_applied, }; @@ -202,94 +208,89 @@ where return Err(SyneviError::CompetingCoordinator); } - self.accept_consensus(&pa_responses).await?; + state_machine = self.accept_consensus(pa_responses, state_machine).await?; } - self.commit().await + self.commit(state_machine).await } #[instrument(level = "trace", skip(self))] - async fn accept_consensus(&mut self, responses: &[AcceptResponse]) -> Result<(), SyneviError> { + async fn accept_consensus( + &self, + responses: Vec, + mut state_machine: TransactionStateMachine, + ) -> Result, SyneviError> { // A little bit redundant, but I think the alternative to create a common behavior between responses may be even worse // Handle returned dependencies - for response in responses { - for dep in from_dependency(response.dependencies.clone())?.iter() { - if !self.transaction.dependencies.contains(dep) { - self.transaction.dependencies.insert(*dep); + let node_clone = self.node.clone(); + let state_machine = tokio::task::spawn_blocking(move || { + for response in responses { + for dep in from_dependency(response.dependencies.clone())?.iter() { + if !state_machine.dependencies.contains(dep) { + state_machine.dependencies.insert(*dep); + } } } - } - - // Mut state and update entry - self.transaction.state = State::Accepted; - self.node - .event_store - .upsert_tx((&self.transaction).into()) - .await?; - Ok(()) + // Mut state and update entry + state_machine.state = State::Accepted; + node_clone.event_store.upsert_tx((&state_machine).into())?; + Ok::<_, SyneviError>(state_machine) + }) + .await??; + Ok(state_machine) } #[instrument(level = "trace", skip(self))] - async fn commit(&mut self) -> SyneviResult { - trace!(id = ?self.transaction.id, "Coordinator: Commit"); + async fn commit( + &self, + mut state_machine: TransactionStateMachine, + ) -> InternalSyneviResult { + trace!(id = ?state_machine.id, "Coordinator: Commit"); let committed_request = CommitRequest { - id: self.transaction.id.to_be_bytes().into(), - event: self.transaction.get_transaction_bytes(), - timestamp_zero: (*self.transaction.t_zero).into(), - timestamp: (*self.transaction.t).into(), - dependencies: into_dependency(&self.transaction.dependencies), + id: state_machine.id.to_be_bytes().into(), + event: state_machine.get_transaction_bytes(), + timestamp_zero: (*state_machine.t_zero).into(), + timestamp: (*state_machine.t).into(), + dependencies: into_dependency(&state_machine.dependencies), }; let network_interface = self.node.network.get_interface().await; let (committed_result, broadcast_result) = tokio::join!( - self.commit_consensus(), + self.commit_consensus(state_machine), network_interface.broadcast(BroadcastRequest::Commit(committed_request)) ); - committed_result.unwrap(); - broadcast_result.unwrap(); // TODO: Recovery + state_machine = committed_result?; + broadcast_result?; - self.apply().await + self.apply(state_machine).await } #[instrument(level = "trace", skip(self))] - async fn commit_consensus(&mut self) -> Result<(), SyneviError> { - self.transaction.state = State::Commited; - - let (sx, rx) = tokio::sync::oneshot::channel(); - self.node - .wait_handler - .read() - .await - .as_ref() - .ok_or_else(|| SyneviError::MissingWaitHandler)? - .send_msg( - self.transaction.t_zero, - self.transaction.t, - self.transaction.dependencies.clone(), - self.transaction.get_transaction_bytes(), - WaitAction::CommitBefore, - sx, - self.transaction.id, - ) - .await?; - let _ = rx.await; - - Ok(()) + async fn commit_consensus( + &self, + mut state_machine: TransactionStateMachine, + ) -> Result, SyneviError> { + state_machine.state = State::Committed; + self.node.commit((&state_machine).into()).await?; + Ok(state_machine) } #[instrument(level = "trace", skip(self))] - async fn apply(&mut self) -> SyneviResult { - trace!(id = ?self.transaction.id, "Coordinator: Apply"); + async fn apply( + &self, + state_machine: TransactionStateMachine, + ) -> InternalSyneviResult { + trace!(id = ?state_machine.id, "Coordinator: Apply"); - let (synevi_result, hashes) = self.execute_consensus().await?; + let (synevi_result, hashes, state_machine) = self.execute_consensus(state_machine).await?; let applied_request = ApplyRequest { - id: self.transaction.id.to_be_bytes().into(), - event: self.transaction.get_transaction_bytes(), - timestamp: (*self.transaction.t).into(), - timestamp_zero: (*self.transaction.t_zero).into(), - dependencies: into_dependency(&self.transaction.dependencies), + id: state_machine.id.to_be_bytes().into(), + event: state_machine.get_transaction_bytes(), + timestamp: (*state_machine.t).into(), + timestamp_zero: (*state_machine.t_zero).into(), + dependencies: into_dependency(&state_machine.dependencies), execution_hash: hashes.execution_hash.to_vec(), transaction_hash: hashes.transaction_hash.to_vec(), }; @@ -303,74 +304,32 @@ where } #[instrument(level = "trace", skip(self))] - async fn execute_consensus(&mut self) -> Result<(SyneviResult, Hashes), SyneviError> { - self.transaction.state = State::Applied; - let (sx, rx) = tokio::sync::oneshot::channel(); - self.node - .get_wait_handler() - .await? - .send_msg( - self.transaction.t_zero, - self.transaction.t, - self.transaction.dependencies.clone(), - self.transaction.get_transaction_bytes(), - WaitAction::ApplyAfter, - sx, - self.transaction.id, - ) - .await?; - - rx.await - .map_err(|_| SyneviError::ReceiveError("Wait handle sender closed".to_string()))?; - - let result = match &self.transaction.transaction { - TransactionPayload::None => Err(SyneviError::TransactionNotFound), - TransactionPayload::External(tx) => self.node.executor.execute(tx.clone()).await, - TransactionPayload::Internal(request) => { - let result = match request { - InternalExecution::JoinElectorate { id, serial, host } => { - let res = self - .node - .add_member(*id, *serial, host.clone(), false) - .await; - let (t, hash) = self.node.event_store.last_applied_hash().await?; // TODO: Remove ? - self.node - .network - .report_config(t, hash, host.clone()) - .await?; - res - } - InternalExecution::ReadyElectorate { id, serial } => { - self.node.ready_member(*id, *serial).await - } - }; - match result { - Ok(_) => Ok(ExecutorResult::Internal(Ok(request.clone()))), - Err(err) => Ok(ExecutorResult::Internal(Err(err))), - } - } - }; - - let mut hasher = Sha3_256::new(); - postcard::to_io(&result, &mut hasher)?; - let hash = hasher.finalize(); - let hashes = self - .node - .event_store - .get_and_update_hash(self.transaction.t_zero, hash.into()) - .await?; - Ok((result, hashes)) + #[allow(clippy::type_complexity)] + async fn execute_consensus( + &self, + mut state_machine: TransactionStateMachine, + ) -> Result< + ( + InternalSyneviResult, + Hashes, + TransactionStateMachine, + ), + SyneviError, + > { + state_machine.state = State::Applied; + + let (result, hashes) = self.node.apply((&state_machine).into(), None).await?; + + Ok((result, hashes, state_machine)) } #[instrument(level = "trace", skip(node))] - pub async fn recover(node: Arc>, t0_recover: T0) -> SyneviResult { + pub async fn recover( + node: Arc>, + recover_event: RecoverEvent, + ) -> InternalSyneviResult { loop { let node = node.clone(); - let recover_event = node - .event_store - .recover_event(&t0_recover, node.get_info().serial) - .await; - let recover_event = recover_event?; let network_interface = node.network.get_interface().await; let recover_responses = network_interface @@ -378,33 +337,32 @@ where id: recover_event.id.to_be_bytes().to_vec(), ballot: recover_event.ballot.into(), event: recover_event.transaction.clone(), - timestamp_zero: t0_recover.into(), + timestamp_zero: recover_event.t_zero.into(), })) .await?; - let mut recover_coordinator = Coordinator:: { - node, - transaction: TransactionStateMachine { - transaction: TransactionPayload::from_bytes(recover_event.transaction)?, - t_zero: recover_event.t_zero, - t: recover_event.t, - ballot: recover_event.ballot, - state: recover_event.state, - id: recover_event.id, - dependencies: recover_event.dependencies, - }, + let state_machine = TransactionStateMachine:: { + transaction: TransactionPayload::from_bytes(recover_event.transaction.clone())?, + t_zero: recover_event.t_zero, + t: recover_event.t, + ballot: recover_event.ballot, + state: recover_event.state, + id: recover_event.id, + dependencies: recover_event.dependencies.clone(), + hashes: None, }; - let recover_result = recover_coordinator + let recover_result = Coordinator::::new(node) .recover_consensus( recover_responses .into_iter() .map(|res| res.into_inner()) .collect::, SyneviError>>()?, + state_machine, ) .await; if let Err(err) = &recover_result { - dbg!(&err); + error!(?err); } match recover_result? { RecoveryState::Recovered(result) => return Ok(result), @@ -420,9 +378,10 @@ where #[instrument(level = "trace", skip(self))] async fn recover_consensus( - &mut self, + &self, mut responses: Vec, - ) -> RecoverySyneviResult { + mut state_machine: TransactionStateMachine, + ) -> RecoveryInternalSyneviResult { // Keep track of values to replace let mut highest_ballot: Option = None; let mut superseding = false; @@ -457,11 +416,11 @@ where let replica_state = State::from(response.local_state() as i32); match replica_state { - State::PreAccepted if self.transaction.state <= State::PreAccepted => { - if replica_t > self.transaction.t { + State::PreAccepted if state_machine.state <= State::PreAccepted => { + if replica_t > state_machine.t { // Slow path - self.transaction.t = replica_t; - self.transaction + state_machine.t = replica_t; + state_machine .dependencies .extend(from_dependency(response.dependencies.clone())?); } else { @@ -469,25 +428,24 @@ where fast_path_counter += 1; fast_path_deps.extend(from_dependency(response.dependencies.clone())?); } - self.transaction.state = State::PreAccepted; + state_machine.state = State::PreAccepted; } - State::Accepted if self.transaction.state < State::Accepted => { - self.transaction.t = replica_t; - self.transaction.state = State::Accepted; - self.transaction.dependencies = from_dependency(response.dependencies.clone())?; + State::Accepted if state_machine.state < State::Accepted => { + state_machine.t = replica_t; + state_machine.state = State::Accepted; + state_machine.dependencies = from_dependency(response.dependencies.clone())?; } State::Accepted - if self.transaction.state == State::Accepted - && replica_t > self.transaction.t => + if state_machine.state == State::Accepted && replica_t > state_machine.t => { - self.transaction.t = replica_t; - self.transaction.dependencies = from_dependency(response.dependencies.clone())?; + state_machine.t = replica_t; + state_machine.dependencies = from_dependency(response.dependencies.clone())?; } - any_state if any_state > self.transaction.state => { - self.transaction.state = any_state; - self.transaction.t = replica_t; - if self.transaction.state >= State::Accepted { - self.transaction.dependencies = + any_state if any_state > state_machine.state => { + state_machine.state = any_state; + state_machine.t = replica_t; + if state_machine.state >= State::Accepted { + state_machine.dependencies = from_dependency(response.dependencies.clone())?; } } @@ -497,33 +455,34 @@ where if fast_path_counter > (responses.len() / 2) { // Enforce the fast path -> Slow path was minority - self.transaction.t = T(*self.transaction.t_zero); - self.transaction.dependencies = fast_path_deps; + state_machine.t = T(*state_machine.t_zero); + state_machine.dependencies = fast_path_deps; } if let Some(ballot) = highest_ballot { - self.node - .event_store - .accept_tx_ballot(&self.transaction.t_zero, ballot) - .await; + let event_store = self.node.event_store.clone(); + tokio::task::spawn_blocking(move || { + event_store.accept_tx_ballot(&state_machine.t_zero, ballot); + }) + .await?; return Ok(RecoveryState::CompetingCoordinator); } // Wait for deps - Ok(match self.transaction.state { - State::Applied => RecoveryState::Recovered(self.apply().await?), - State::Commited => RecoveryState::Recovered(self.commit().await?), - State::Accepted => RecoveryState::Recovered(self.accept().await?), + Ok(match state_machine.state { + State::Applied => RecoveryState::Recovered(self.apply(state_machine).await?), + State::Committed => RecoveryState::Recovered(self.commit(state_machine).await?), + State::Accepted => RecoveryState::Recovered(self.accept(state_machine).await?), State::PreAccepted => { if superseding { - RecoveryState::Recovered(self.accept().await?) + RecoveryState::Recovered(self.accept(state_machine).await?) } else if !waiting.is_empty() { // We will wait anyway if RestartRecovery is returned return Ok(RecoveryState::RestartRecovery); } else { - self.transaction.t = T(*self.transaction.t_zero); - RecoveryState::Recovered(self.accept().await?) + state_machine.t = T(*state_machine.t_zero); + RecoveryState::Recovered(self.accept(state_machine).await?) } } _ => { @@ -537,19 +496,21 @@ where #[cfg(test)] pub mod tests { use super::Coordinator; - use crate::node::Node; - use crate::tests::DummyExecutor; - use crate::tests::NetworkMock; + use super::TransactionStateMachine; + use std::collections::HashSet; use std::sync::atomic::Ordering; use synevi_network::consensus_transport::PreAcceptRequest; use synevi_network::network::Network; use synevi_network::network::{BroadcastRequest, NetworkInterface}; use synevi_network::utils::IntoInner; - use synevi_persistence::mem_store::MemStore; use synevi_types::traits::Store; + use synevi_types::types::TransactionPayload; + use synevi_types::Ballot; + use synevi_types::State; use synevi_types::SyneviError; - use synevi_types::{Executor, State, Transaction}; - use ulid::Ulid; + use synevi_types::T; + use synevi_types::{Executor, Transaction}; + use tracing::trace; #[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] #[allow(dead_code)] @@ -572,22 +533,40 @@ pub mod tests { E: Executor, S: Store, { - pub async fn failing_pre_accept(&mut self) -> Result<(), SyneviError> { + pub async fn failing_pre_accept( + &self, + id: u128, + transaction: TransactionPayload, + ) -> Result<(), SyneviError> { + trace!(id = ?id, "Coordinator: Failing Preaccept"); + + let node_clone = self.node.clone(); + let (state_machine, last_applied) = tokio::task::spawn_blocking(move || { + let t_zero = node_clone.event_store.init_t_zero(node_clone.get_serial()); + let state_machine = TransactionStateMachine { + id, + state: State::PreAccepted, + transaction, + t_zero, + t: T(*t_zero), + dependencies: HashSet::default(), + ballot: Ballot::default(), + hashes: None, + }; + let (t, _) = node_clone.event_store.last_applied(); + (state_machine, t.into()) + }) + .await?; self.node .stats .total_requests .fetch_add(1, Ordering::Relaxed); - let last_applied = { - let (t, _) = self.node.event_store.last_applied().await; - t.into() - }; - // Create the PreAccepted msg let pre_accepted_request = PreAcceptRequest { - id: self.transaction.id.to_be_bytes().into(), - event: self.transaction.get_transaction_bytes(), - timestamp_zero: (*self.transaction.t_zero).into(), + id: state_machine.id.to_be_bytes().into(), + event: state_machine.get_transaction_bytes(), + timestamp_zero: (*state_machine.t_zero).into(), last_applied, }; @@ -595,7 +574,7 @@ pub mod tests { let pre_accepted_responses = network_interface .broadcast(BroadcastRequest::PreAccept( pre_accepted_request, - self.node.info.serial, + self.node.get_serial(), )) .await?; @@ -604,33 +583,9 @@ pub mod tests { .map(|res| res.into_inner()) .collect::, SyneviError>>()?; - self.pre_accept_consensus(&pa_responses).await?; + self.pre_accept_consensus(pa_responses, state_machine) + .await?; Ok(()) } } - - #[tokio::test] - async fn init_test() { - let node = Node::::new_with_network_and_executor( - Ulid::new(), - 0, - NetworkMock::default(), - DummyExecutor, - ) - .await - .unwrap(); - - let coordinator = Coordinator::new( - node, - synevi_types::types::TransactionPayload::External(b"foo".to_vec()), - 0, - ) - .await; - - assert_eq!(coordinator.transaction.state, State::Undefined); - assert_eq!(*coordinator.transaction.t_zero, *coordinator.transaction.t); - assert_eq!(coordinator.transaction.t_zero.0.get_node(), 0); - assert_eq!(coordinator.transaction.t_zero.0.get_seq(), 1); - assert!(coordinator.transaction.dependencies.is_empty()); - } } diff --git a/synevi_core/src/lib.rs b/synevi_core/src/lib.rs index 4baec48..72cc2ee 100644 --- a/synevi_core/src/lib.rs +++ b/synevi_core/src/lib.rs @@ -6,16 +6,21 @@ pub mod utils; mod wait_handler; pub mod tests { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::AtomicU32; use std::sync::Arc; use synevi_network::configure_transport::GetEventResponse; use synevi_network::network::BroadcastResponse; + use synevi_network::network::MemberWithLatency; use synevi_network::network::NetworkInterface; + use synevi_network::network::NodeInfo; + use synevi_network::network::NodeStatus; use synevi_network::network::{BroadcastRequest, Network}; use synevi_network::replica::Replica; use synevi_types::types::SyneviResult; use synevi_types::Executor; use synevi_types::SyneviError; - use synevi_types::T; + use synevi_types::T0; use tokio::sync::mpsc::Receiver; use tokio::sync::Mutex; use ulid::Ulid; @@ -40,6 +45,9 @@ pub mod tests { self.got_requests.lock().await.push(request); Ok(vec![]) } + async fn broadcast_recovery(&self, _t0: T0) -> Result { + Ok(true) + } } #[async_trait::async_trait] @@ -70,22 +78,34 @@ pub mod tests { async fn get_waiting_time(&self, _node_serial: u16) -> u64 { 0 } - async fn get_member_len(&self) -> u32 { - 0 + async fn get_members(&self) -> Vec> { + vec![] + } + + fn get_node_status(&self) -> Arc { + Arc::new(NodeStatus { + info: NodeInfo { + id: Ulid::new(), + serial: 0, + host: "localhost".to_string(), + ready: AtomicBool::new(false), + }, + members_responded: AtomicU32::new(0), + has_members: AtomicBool::new(false), + }) } - async fn broadcast_config(&self, _host: String) -> Result<(u32, Vec), SyneviError> { - Ok((0, vec![0])) + async fn join_electorate(&self, _host: String) -> Result { + Ok(0) } async fn get_stream_events( &self, - _self_id: Vec, _last_applied: Vec, ) -> Result, SyneviError> { let (_, rcv) = tokio::sync::mpsc::channel(1); Ok(rcv) } - async fn ready_electorate(&self) -> Result<(), SyneviError> { + async fn ready_electorate(&self, _host: String) -> Result<(), SyneviError> { Ok(()) } @@ -93,12 +113,7 @@ pub mod tests { Ok(()) } - async fn report_config( - &self, - _last_applied: T, - _last_applied_hash: [u8; 32], - _host: String, - ) -> Result<(), SyneviError> { + async fn report_config(&self, _host: String) -> Result<(), SyneviError> { Ok(()) } } @@ -109,8 +124,8 @@ pub mod tests { impl Executor for DummyExecutor { type Tx = Vec; - async fn execute(&self, data: Vec) -> SyneviResult { - Ok(synevi_types::types::ExecutorResult::External(Ok(data))) + async fn execute(&self, _id: u128, data: Vec) -> SyneviResult { + Ok(Ok(data)) } } } diff --git a/synevi_core/src/node.rs b/synevi_core/src/node.rs index 59548f7..fe4a509 100644 --- a/synevi_core/src/node.rs +++ b/synevi_core/src/node.rs @@ -1,23 +1,23 @@ use crate::coordinator::Coordinator; use crate::replica::ReplicaConfig; -use crate::wait_handler::WaitHandler; +use crate::wait_handler::{CheckResult, WaitHandler}; +use sha3::{Digest, Sha3_256}; use std::fmt::Debug; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::Ordering; +use std::sync::RwLock; use std::sync::{atomic::AtomicU64, Arc}; -use synevi_network::consensus_transport::{ - ApplyRequest, ApplyResponse, CommitRequest, CommitResponse, -}; -use synevi_network::network::{Network, NodeInfo}; -use synevi_network::reconfiguration::{BufferedMessage, Report}; +use synevi_network::consensus_transport::{ApplyRequest, CommitRequest}; +use synevi_network::network::{Network, NetworkInterface}; use synevi_network::replica::Replica; use synevi_persistence::mem_store::MemStore; use synevi_types::traits::Store; -use synevi_types::types::{SyneviResult, TransactionPayload}; -use synevi_types::{Executor, State, SyneviError, T}; -use tokio::sync::mpsc::Receiver; -use tokio::sync::RwLock; +use synevi_types::types::{ + Event, ExecutorResult, Hashes, InternalExecution, InternalSyneviResult, SyneviResult, + TransactionPayload, UpsertEvent, +}; +use synevi_types::{Executor, State, SyneviError, Transaction, T}; use tokio::task::JoinSet; -use tracing::instrument; +use tracing::{error, instrument, trace}; use ulid::Ulid; #[derive(Debug, Default)] @@ -27,26 +27,19 @@ pub struct Stats { pub total_recovers: AtomicU64, } -enum HelperResponse { - Commit(CommitResponse), - Apply(ApplyResponse), -} - pub struct Node where N: Network + Send + Sync, E: Executor + Send + Sync, S: Store + Send + Sync, { - pub info: NodeInfo, pub network: N, pub executor: E, pub event_store: Arc, pub stats: Stats, - pub wait_handler: RwLock>>>, + pub wait_handler: WaitHandler, semaphore: Arc, - has_members: AtomicBool, - is_ready: Arc, + self_clone: RwLock>>, } impl Node @@ -80,8 +73,6 @@ where executor: E, store: S, ) -> Result, SyneviError> { - let node_name = NodeInfo { id, serial }; - let stats = Stats { total_requests: AtomicU64::new(0), total_accepts: AtomicU64::new(0), @@ -94,35 +85,66 @@ where // reorder_clone.run().await.unwrap(); // }); + let arc_store = Arc::new(store); + let wait_handler = + WaitHandler::new(arc_store.clone(), network.get_node_status().info.serial); + let node = Arc::new(Node { - info: node_name, - event_store: Arc::new(store), + event_store: arc_store, + wait_handler, network, stats, semaphore: Arc::new(tokio::sync::Semaphore::new(10)), executor, - wait_handler: RwLock::new(None), - has_members: AtomicBool::new(false), - is_ready: Arc::new(AtomicBool::new(true)), + self_clone: RwLock::new(None), }); - let wait_handler = WaitHandler::new(node.clone()); - let wait_handler_clone = wait_handler.clone(); - tokio::spawn(async move { - wait_handler_clone.run().await.unwrap(); - }); - *node.wait_handler.write().await = Some(wait_handler); + node.self_clone + .write() + .expect("Locking self_clone failed") + .replace(node.clone()); - let ready = Arc::new(AtomicBool::new(true)); - let (replica, _) = ReplicaConfig::new(node.clone(), ready); + node.set_ready(); + + let replica = ReplicaConfig::new(node.clone()); node.network.spawn_server(replica).await?; + let node_clone = node.clone(); + tokio::spawn(async move { node_clone.run_check_recovery().await }); + // If no config / persistence -> default Ok(node) } - pub fn set_ready(&self) -> () { - self.is_ready.store(true, Ordering::Relaxed); + pub fn set_ready(&self) { + self.network + .get_node_status() + .info + .ready + .store(true, Ordering::Relaxed); + } + + pub fn is_ready(&self) -> bool { + self.network + .get_node_status() + .info + .ready + .load(Ordering::Relaxed) + } + + pub fn has_members(&self) -> bool { + self.network + .get_node_status() + .has_members + .load(Ordering::Relaxed) + } + + pub fn get_serial(&self) -> u16 { + self.network.get_node_status().info.serial + } + + pub fn get_ulid(&self) -> Ulid { + self.network.get_node_status().info.id } #[instrument(level = "trace", skip(network, executor, store))] @@ -134,38 +156,36 @@ where store: S, member_host: String, ) -> Result, SyneviError> { - let node_name = NodeInfo { id, serial }; - let stats = Stats { total_requests: AtomicU64::new(0), total_accepts: AtomicU64::new(0), total_recovers: AtomicU64::new(0), }; - let ready = Arc::new(AtomicBool::new(false)); + let arc_store = Arc::new(store); + let wait_handler = + WaitHandler::new(arc_store.clone(), network.get_node_status().info.serial); + let node = Arc::new(Node { - info: node_name, - event_store: Arc::new(store), + event_store: arc_store, network, + wait_handler, stats, semaphore: Arc::new(tokio::sync::Semaphore::new(10)), executor, - wait_handler: RwLock::new(None), - has_members: AtomicBool::new(false), - is_ready: ready.clone(), + self_clone: RwLock::new(None), }); - let wait_handler = WaitHandler::new(node.clone()); - let wait_handler_clone = wait_handler.clone(); - tokio::spawn(async move { - wait_handler_clone.run().await.unwrap(); - }); - *node.wait_handler.write().await = Some(wait_handler); + node.self_clone + .write() + .expect("Locking self_clone failed") + .replace(node.clone()); - let (replica, config_receiver) = ReplicaConfig::new(node.clone(), ready.clone()); + let replica = ReplicaConfig::new(node.clone()); node.network.spawn_server(replica.clone()).await?; - node.reconfigure(replica, member_host, config_receiver, ready) - .await?; + let node_clone = node.clone(); + tokio::spawn(async move { node_clone.run_check_recovery().await }); + node.reconfigure(replica, member_host).await?; Ok(node) } @@ -179,8 +199,6 @@ where ready: bool, ) -> Result<(), SyneviError> { self.network.add_member(id, serial, host, ready).await?; - self.has_members - .store(true, std::sync::atomic::Ordering::Relaxed); Ok(()) } @@ -190,42 +208,53 @@ where Ok(()) } + #[instrument(level = "trace", skip(self))] + pub fn get_event_by_id(&self, id: u128) -> Option { + self.event_store.get_event_by_id(id).ok().flatten() + } + + #[instrument(level = "trace", skip(self))] + pub fn decode_event(&self, event: Event) -> Result, SyneviError> { + let transaction = TransactionPayload::::from_bytes(event.transaction)?; + match transaction { + TransactionPayload::External(tx) => Ok(Some(tx)), + _ => Ok(None), + } + } + #[instrument(level = "trace", skip(self, transaction))] pub async fn transaction(self: Arc, id: u128, transaction: E::Tx) -> SyneviResult { - if !self.has_members.load(std::sync::atomic::Ordering::Relaxed) { + if !self.has_members() { tracing::warn!("Consensus omitted: No members in the network"); - } else if !self.is_ready.load(Ordering::Relaxed) { + } else if !self.is_ready() { return Err(SyneviError::NotReady); }; let _permit = self.semaphore.acquire().await?; - let mut coordinator = - Coordinator::new(self.clone(), TransactionPayload::External(transaction), id).await; - coordinator.run().await + let tx_result = Coordinator::new(self.clone()) + .pre_accept(id, TransactionPayload::External(transaction)) + .await?; + + match tx_result { + ExecutorResult::External(e) => Ok(e), + ExecutorResult::Internal(e) => { + Err(SyneviError::InternalTransaction(format!("{:?}", e))) + } + } } pub(super) async fn internal_transaction( self: Arc, id: u128, transaction: TransactionPayload, - ) -> SyneviResult { - if !self.has_members.load(std::sync::atomic::Ordering::Relaxed) { - tracing::warn!("Consensus omitted: No members in the network"); - } else if !self.is_ready.load(Ordering::Relaxed) { + ) -> InternalSyneviResult { + if !self.is_ready() { return Err(SyneviError::NotReady); }; let _permit = self.semaphore.acquire().await?; - let mut coordinator = - Coordinator::new(self.clone(), transaction, id).await; - coordinator.run().await - } - pub async fn get_wait_handler(&self) -> Result>, SyneviError> { - let lock = self.wait_handler.read().await; - let handler = lock - .as_ref() - .ok_or_else(|| SyneviError::MissingWaitHandler)? - .clone(); - Ok(handler) + Coordinator::new(self.clone()) + .pre_accept(id, transaction) + .await } pub fn get_stats(&self) -> (u64, u64, u64) { @@ -242,102 +271,258 @@ where ) } - pub fn get_info(&self) -> NodeInfo { - self.info.clone() + #[instrument(level = "trace", skip(self))] + pub(crate) async fn commit(&self, event: UpsertEvent) -> Result<(), SyneviError> { + let t0_commit = event.t_zero; + let t_commit = event.t; + let event_store = self.event_store.clone(); + let event_clone = event.clone(); + let prev_event = tokio::task::spawn_blocking(move || { + let prev_event = event_store.get_event(t0_commit)?; + event_store.upsert_tx(event_clone)?; + Ok::<_, SyneviError>(prev_event) + }) + .await??; + self.wait_handler.notify_commit(&t0_commit, &t_commit); + if !prev_event.is_some_and(|e| e.state > State::Committed || e.dependencies.is_empty()) { + if let Some(waiter) = self.wait_handler.get_waiter(&event) { + waiter.await.map_err(|e| { + tracing::error!("Error waiting for commit: {:?}", e); + SyneviError::ReceiveError("Error waiting for commit".to_string()) + })? + }; + } + + Ok(()) } - #[instrument(level = "trace", skip(self, replica))] - async fn reconfigure( + #[instrument(level = "trace", skip(self))] + pub(crate) async fn apply( &self, - replica: ReplicaConfig, - member_host: String, - config_receiver: Receiver, - ready: Arc, - ) -> Result<(), SyneviError> { - // 1. Broadcast self_config to other member - let (all_members, self_id) = self.network.broadcast_config(member_host).await?; + mut event: UpsertEvent, + request_hashes: Option, + ) -> Result<(InternalSyneviResult, Hashes), SyneviError> { + let t0_apply = event.t_zero; + let event_store = self.event_store.clone(); + + let needs_wait = if let Some(prev_event) = + tokio::task::spawn_blocking(move || event_store.get_event(t0_apply)).await?? + { + prev_event.state < State::Applied + } else { + let mut commit_event = event.clone(); + commit_event.state = State::Committed; + self.commit(commit_event).await?; + true + }; + if event + .dependencies + .as_ref() + .is_some_and(|deps| !deps.is_empty()) + && needs_wait + { + if let Some(waiter) = self.wait_handler.get_waiter(&event) { + waiter.await.map_err(|e| { + tracing::error!("Error waiting for commit: {:?}", e); + SyneviError::ReceiveError("Error waiting for commit".to_string()) + })?; + } + } - // 2. wait for JoinElectorate responses with expected majority and config from others - self.join_electorate(config_receiver, all_members, self_id, &replica) - .await?; + // - Check transaction hash -> SyneviError::MismatchingTransactionHash + let event_store = self.event_store.clone(); + let event_clone = event.clone(); + let mut node_hashes = tokio::task::spawn_blocking(move || { + event_store.get_or_update_transaction_hash(event_clone) + }) + .await??; + + if let Some(hashes) = &request_hashes { + if hashes.transaction_hash != node_hashes.transaction_hash { + error!(?hashes, ?node_hashes, "Mismatched hashes"); + return Err(SyneviError::MismatchedTransactionHashes); + } + } - // 3. Send ReadyJoinElectorate && set myself to ready - ready.store(true, Ordering::Relaxed); - self.network.ready_electorate().await?; - Ok(()) + // - Execute + let transaction = TransactionPayload::from_bytes( + event + .transaction + .clone() + .ok_or_else(|| SyneviError::TransactionNotFound)?, + )?; + let result = self.execute(event.id, transaction).await; + + let event_store = self.event_store.clone(); + + let (result, node_hashes) = tokio::task::spawn_blocking(move || { + // - Check execution hash -> SyneviError::MismatchingExecutionHash + let mut hasher = Sha3_256::new(); + postcard::to_io(&result, &mut hasher)?; + let execution_hash: [u8; 32] = hasher.finalize().into(); + if let Some(hashes) = request_hashes { + if hashes.execution_hash != execution_hash { + return Err(SyneviError::MismatchedExecutionHashes); + } + } + node_hashes.execution_hash = execution_hash; + event.hashes = Some(node_hashes.clone()); + + // - Upsert + event_store.upsert_tx(event)?; + Ok((result, node_hashes)) + }) + .await??; + self.wait_handler.notify_apply(&t0_apply); + Ok((result, node_hashes)) } - async fn join_electorate( + async fn execute( &self, - mut receiver: Receiver, - all_members: u32, - self_id: Vec, - replica: &ReplicaConfig, - ) -> Result<(), SyneviError> { - let mut member_count = 0; - while let Some(report) = receiver.recv().await { - self.add_member(report.node_id, report.node_serial, report.node_host, true) - .await?; - member_count += 1; - if member_count >= all_members { - break; + id: u128, + transaction: TransactionPayload<::Tx>, + ) -> Result::Tx>, SyneviError> { + // TODO: Refactor in execute function + + match transaction { + TransactionPayload::None => Err(SyneviError::TransactionNotFound), + TransactionPayload::External(tx) => self + .executor + .execute(id, tx) + .await + .map(ExecutorResult::<::Tx>::External), + TransactionPayload::Internal(request) => { + // TODO: Build special execution + let result = match &request { + InternalExecution::JoinElectorate { + id, + serial, + new_node_host, + } => { + if id != &self.get_ulid() { + let res = self + .add_member(*id, *serial, new_node_host.clone(), false) + .await; + self.network.report_config(new_node_host.clone()).await?; + res + } else { + Ok(()) + } + } + InternalExecution::ReadyElectorate { id, serial } => { + if id != &self.get_ulid() { + self.ready_member(*id, *serial).await + } else { + Ok(()) + } + } + }; + match result { + Ok(_) => Ok(ExecutorResult::Internal(Ok(request.clone()))), + Err(err) => Ok(ExecutorResult::Internal(Err(err))), + } } } + } - let (last_applied, _) = self.event_store.last_applied().await; - - // 2.1 if majority replies with 0 events -> skip to 2.4. - self.sync_events(last_applied, self_id, &replica).await?; + async fn run_check_recovery(&self) { + while !self.is_ready() { + tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; + } - // 2.4 Apply buffered commits & applies - let mut rcv = replica.send_buffered().await?; - let mut join_set = JoinSet::new(); - while let Some((_t0, _, request)) = rcv - .recv() - .await - .ok_or_else(|| SyneviError::ReceiveError("Channel closed".to_string()))? - { - match request { - BufferedMessage::Commit(req) => { - let clone = replica.clone(); - join_set.spawn(async move { - let res = HelperResponse::Commit(clone.commit(req, true).await?); - Ok::(res) + let self_clonable = self + .self_clone + .read() + .expect("Locking self_clone failed") + .clone() + .expect("Self clone is None"); + + loop { + match self.wait_handler.check_recovery() { + CheckResult::NoRecovery => (), + CheckResult::RecoverEvent(recover_event) => { + trace!( + "{}, Recovering event: {:?}", + self.get_serial(), + recover_event + ); + let self_clone = self_clonable.clone(); + tokio::spawn(async move { + match Coordinator::recover(self_clone, recover_event).await { + Ok(_) => (), + Err(e) => { + tracing::error!("JoinError recovering event: {:?}", e); + } + } }); } - BufferedMessage::Apply(req) => { - let clone = replica.clone(); - join_set.spawn(async move { - let res = HelperResponse::Apply(clone.apply(req, true).await?); - Ok::(res) - }); + CheckResult::RecoverUnknown(t0_recover) => { + trace!( + "{}, Recovering unknown: {:?}", + self.get_serial(), + t0_recover + ); + let interface = self.network.get_interface().await; + match interface.broadcast_recovery(t0_recover).await { + Ok(true) => (), + Ok(false) => { + error!("Unknown recovery failed"); + self.wait_handler.notify_apply(&t0_recover); + } + Err(err) => { + tracing::error!("Error broadcasting recovery: {:?}", err); + panic!("Error broadcasting recovery: {:?}", err); + } + } } } + tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; } - for task in join_set.join_all().await { - task?; + } + + #[instrument(level = "trace", skip(self, replica))] + async fn reconfigure( + &self, + replica: ReplicaConfig, + member_host: String, + ) -> Result<(), SyneviError> { + let expected = self.network.join_electorate(member_host.clone()).await?; + // 2. wait for JoinElectorate responses with expected majority and config from others + while self + .network + .get_node_status() + .members_responded + .load(Ordering::Relaxed) + < (expected / 2) + 1 + { + tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; } + let (last_applied, _) = self.event_store.last_applied(); + self.sync_events(last_applied, &replica).await?; + + // 3. Send ReadyJoinElectorate && set myself to ready + self.set_ready(); + self.network.ready_electorate(member_host).await?; + Ok(()) } async fn sync_events( &self, last_applied: T, - self_id: Vec, replica: &ReplicaConfig, ) -> Result<(), SyneviError> { // 2.2 else Request stream with events until last_applied (highest t of JoinElectorate) - let mut rcv = self - .network - .get_stream_events(last_applied.into(), self_id) - .await?; + let mut rcv = self.network.get_stream_events(last_applied.into()).await?; + let mut join_set = JoinSet::new(); while let Some(event) = rcv.recv().await { let state: State = event.state.into(); match state { State::Applied => { - replica - .apply( - ApplyRequest { + let clone = replica.clone(); + join_set.spawn(async move { + let _response = clone + .apply(ApplyRequest { id: event.id, event: event.transaction, timestamp_zero: event.t_zero, @@ -345,28 +530,34 @@ where dependencies: event.dependencies, execution_hash: event.execution_hash, transaction_hash: event.transaction_hash, - }, - false, - ) - .await?; + }) + .await?; + Ok::<(), SyneviError>(()) + }); } - State::Commited => { - replica - .commit( - CommitRequest { + State::Committed => { + let clone = replica.clone(); + join_set.spawn(async move { + let _response = clone + .commit(CommitRequest { id: event.id, event: event.transaction, timestamp_zero: event.t_zero, timestamp: event.t, dependencies: event.dependencies, - }, - false, - ) - .await?; + }) + .await?; + Ok::<(), SyneviError>(()) + }); } _ => (), } } + + while let Some(response) = join_set.join_next().await { + response.unwrap().unwrap() + } + Ok(()) } } @@ -379,10 +570,10 @@ mod tests { use std::net::SocketAddr; use std::str::FromStr; use std::sync::Arc; + use std::time::Duration; use synevi_network::network::GrpcNetwork; use synevi_network::network::Network; use synevi_types::traits::Store; - use synevi_types::types::ExecutorResult; use synevi_types::{Executor, State, SyneviError, T, T0}; use ulid::Ulid; @@ -397,13 +588,12 @@ mod tests { transaction: Vec, ) -> Result<(), SyneviError> { let _permit = self.semaphore.acquire().await?; - let mut coordinator = Coordinator::new( - self.clone(), - synevi_types::types::TransactionPayload::External(transaction), - id, - ) - .await; - coordinator.failing_pre_accept().await?; + Coordinator::new(self.clone()) + .failing_pre_accept( + id, + synevi_types::types::TransactionPayload::External(transaction), + ) + .await?; Ok(()) } } @@ -449,20 +639,20 @@ mod tests { let _result = coordinator .clone() - .transaction( - 2, - Vec::from("F"), - ) + .transaction(2, Vec::from("F")) .await .unwrap(); - let coord = coordinator.event_store.get_event_store().await; + // This sleep accounts for replicas apply step, that is not neccessarily completed after + // the coordinator returns its result + tokio::time::sleep(Duration::from_secs(1)).await; + let coord = coordinator.event_store.get_event_store(); for node in nodes { assert_eq!( - node.event_store.get_event_store().await, + node.event_store.get_event_store(), coord, "Node: {:?}", - node.get_info() + node.get_serial() ); } } @@ -510,30 +700,26 @@ mod tests { .await .unwrap(); } - match coordinator + coordinator .clone() - .transaction( - 0, - Vec::from("last transaction"), - ) + .transaction(0, Vec::from("last transaction")) .await .unwrap() - { - ExecutorResult::External(e) => e.unwrap(), - _ => panic!(), - }; + .unwrap(); + + // This sleep accounts for replicas apply step, that is not neccessarily completed after + // the coordinator returns its result + tokio::time::sleep(Duration::from_secs(1)).await; let coordinator_store: BTreeMap = coordinator .event_store .get_event_store() - .await .into_values() .map(|e| (e.t_zero, e.t)) .collect(); assert!(coordinator .event_store .get_event_store() - .await .iter() .all(|(_, e)| e.state == State::Applied)); @@ -542,20 +728,18 @@ mod tests { let node_store: BTreeMap = node .event_store .get_event_store() - .await .into_values() .map(|e| (e.t_zero, e.t)) .collect(); assert!(node .event_store .get_event_store() - .await .clone() .iter() .all(|(_, e)| e.state == State::Applied)); assert_eq!(coordinator_store.len(), node_store.len()); if coordinator_store != node_store { - println!("Node: {:?}", node.get_info()); + println!("Node: {:?}", node.get_serial()); let mut node_store_iter = node_store.iter(); for (k, v) in coordinator_store.iter() { if let Some(next) = node_store_iter.next() { @@ -580,7 +764,7 @@ mod tests { 0, synevi_network::network::GrpcNetwork::new( SocketAddr::from_str("0.0.0.0:1337").unwrap(), - format!("http://localhost:1337"), + "http://localhost:1337".to_string(), id, 0, ), @@ -589,14 +773,7 @@ mod tests { .await .unwrap(); - let result = match node - .transaction(0, vec![127u8]) - .await - .unwrap() - { - ExecutorResult::External(e) => e.unwrap(), - _ => panic!(), - }; + let result = node.transaction(0, vec![127u8]).await.unwrap().unwrap(); assert_eq!(result, vec![127u8]); } diff --git a/synevi_core/src/reorder_buffer.rs b/synevi_core/src/reorder_buffer.rs index b843426..f794b8a 100644 --- a/synevi_core/src/reorder_buffer.rs +++ b/synevi_core/src/reorder_buffer.rs @@ -7,6 +7,7 @@ use std::{ use synevi_network::network::Network; use synevi_types::{traits::Store, Executor, SyneviError, T, T0}; use tokio::{sync::oneshot, time::timeout}; +use tracing::error; use crate::{node::Node, utils::into_dependency}; @@ -85,7 +86,6 @@ where event, latency, })) => { - //println!("Received message: {:?} latency: {}", t0, latency); let now = Instant::now(); buffer.insert(t0, (notify, event, id)); if current_transaction.1 == T0::default() { @@ -101,11 +101,8 @@ where if entry.key() <= ¤t_transaction.1 { let (t0_buffer, (notify, event, id)) = entry.remove_entry(); - let (t, deps) = self - .node - .event_store - .pre_accept_tx(id, t0_buffer, event) - .await?; + let (t, deps) = + self.node.event_store.pre_accept_tx(id, t0_buffer, event)?; let _ = notify.send((t, into_dependency(&deps))); } else { break; @@ -115,18 +112,15 @@ where next_latency = latency; } Ok(Err(e)) => { - println!("Error receiving message {e}") + error!("Error receiving message {e}") } Err(_) => { // Elapsed more than 1.2x average (TODO) latency if (current_transaction.0.elapsed().as_micros() as u64) > next_latency { while let Some(entry) = buffer.first_entry() { let (t0_buffer, (notify, event, id)) = entry.remove_entry(); - let (t, deps) = self - .node - .event_store - .pre_accept_tx(id, t0_buffer, event) - .await?; + let (t, deps) = + self.node.event_store.pre_accept_tx(id, t0_buffer, event)?; let _ = notify.send((t, into_dependency(&deps))); } } diff --git a/synevi_core/src/replica.rs b/synevi_core/src/replica.rs index 24fd15e..5317dfb 100644 --- a/synevi_core/src/replica.rs +++ b/synevi_core/src/replica.rs @@ -1,28 +1,26 @@ +use crate::coordinator::Coordinator; use crate::node::Node; use crate::utils::{from_dependency, into_dependency}; -use crate::wait_handler::WaitAction; -use sha3::{Digest, Sha3_256}; -use std::collections::BTreeMap; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::Ordering; use std::sync::Arc; use synevi_network::configure_transport::{ Config, GetEventRequest, GetEventResponse, JoinElectorateRequest, JoinElectorateResponse, - ReadyElectorateRequest, ReadyElectorateResponse, ReportLastAppliedRequest, - ReportLastAppliedResponse, + ReadyElectorateRequest, ReadyElectorateResponse, ReportElectorateRequest, + ReportElectorateResponse, }; use synevi_network::consensus_transport::{ AcceptRequest, AcceptResponse, ApplyRequest, ApplyResponse, CommitRequest, CommitResponse, - PreAcceptRequest, PreAcceptResponse, RecoverRequest, RecoverResponse, + PreAcceptRequest, PreAcceptResponse, RecoverRequest, RecoverResponse, TryRecoveryRequest, + TryRecoveryResponse, }; use synevi_network::network::Network; -use synevi_network::reconfiguration::{BufferedMessage, Reconfiguration, Report}; +use synevi_network::reconfiguration::Reconfiguration; use synevi_network::replica::Replica; use synevi_types::traits::Store; -use synevi_types::types::{ExecutorResult, InternalExecution, TransactionPayload, UpsertEvent}; +use synevi_types::types::{Hashes, InternalExecution, TransactionPayload, UpsertEvent}; +use synevi_types::SyneviError; use synevi_types::{Ballot, Executor, State, T, T0}; -use synevi_types::{SyneviError, Transaction}; -use tokio::sync::mpsc::{channel, Receiver, Sender}; -use tokio::sync::Mutex; +use tokio::sync::mpsc::Receiver; use tracing::{instrument, trace}; use ulid::Ulid; @@ -33,10 +31,6 @@ where S: Store, { node: Arc>, - buffer: Arc>>, - notifier: Sender, - ready: Arc, - configuring: Arc, } impl ReplicaConfig @@ -45,50 +39,8 @@ where E: Executor, S: Store, { - pub fn new(node: Arc>, ready: Arc) -> (Self, Receiver) { - let (notifier, receiver) = channel(10); - ( - Self { - node, - buffer: Arc::new(Mutex::new(BTreeMap::default())), - notifier, - ready, - configuring: Arc::new(AtomicBool::new(false)), - }, - receiver, - ) - } - - pub async fn send_buffered( - &self, - ) -> Result>, SyneviError> { - let (sdx, rcv) = channel(100); - let inner = self.buffer.clone(); - let node = self.node.clone(); - let configure_lock = self.configuring.clone(); - tokio::spawn(async move { - configure_lock.store(true, Ordering::SeqCst); - loop { - let event = inner.lock().await.pop_first(); - if let Some(((t0, state), event)) = event { - sdx.send(Some((t0, state, event))).await.map_err(|_| { - SyneviError::SendError( - "Channel for receiving buffered messages closed".to_string(), - ) - })?; - } else { - node.set_ready(); - sdx.send(None).await.map_err(|_| { - SyneviError::SendError( - "Channel for receiving buffered messages closed".to_string(), - ) - })?; - break; - } - } - Ok::<(), SyneviError>(()) - }); - Ok(rcv) + pub fn new(node: Arc>) -> Self { + Self { node } } } @@ -99,87 +51,58 @@ where E: Executor + Send + Sync, S: Store + Send + Sync, { - fn is_ready(&self) -> bool { - self.ready.load(Ordering::SeqCst) - } #[instrument(level = "trace", skip(self, request))] async fn pre_accept( &self, request: PreAcceptRequest, _node_serial: u16, - ready: bool, ) -> Result { let t0 = T0::try_from(request.timestamp_zero.as_slice())?; - let request_id = u128::from_be_bytes(request.id.as_slice().try_into()?); - if !ready { + if !self.node.is_ready() { return Ok(PreAcceptResponse::default()); } - trace!(?request_id, "Replica: PreAccept"); + let request_id = u128::from_be_bytes(request.id.as_slice().try_into()?); - // TODO(performance): Remove the lock here - // Creates contention on the event store - if let Some(ballot) = self - .node - .event_store - .accept_tx_ballot(&t0, Ballot::default()) - .await - { - if ballot != Ballot::default() { - return Ok(PreAcceptResponse { - nack: true, - ..Default::default() - }); + trace!(?request_id, "Replica: PreAccept"); + let event_store = self.node.event_store.clone(); + tokio::task::spawn_blocking(move || { + if let Some(ballot) = event_store.accept_tx_ballot(&t0, Ballot::default()) { + if ballot != Ballot::default() { + return Ok(PreAcceptResponse { + nack: true, + ..Default::default() + }); + } } - } - - // let waiting_time = self.network.get_waiting_time(node_serial).await; - - // let (sx, rx) = oneshot::channel(); - let (t, deps) = self - .node - .event_store - .pre_accept_tx(request_id, t0, request.event) - .await?; - - // self.reorder_buffer - // .send_msg(t0, sx, request.event, waiting_time) - // .await?; - - // let (t, deps) = rx.await?; + let (t, deps) = event_store.pre_accept_tx(request_id, t0, request.event)?; - Ok(PreAcceptResponse { - timestamp: t.into(), - dependencies: into_dependency(&deps), - nack: false, + Ok(PreAcceptResponse { + timestamp: t.into(), + dependencies: into_dependency(&deps), + nack: false, + }) }) + .await? } #[instrument(level = "trace", skip(self, request))] - async fn accept( - &self, - request: AcceptRequest, - ready: bool, - ) -> Result { + async fn accept(&self, request: AcceptRequest) -> Result { + if !self.node.is_ready() { + return Ok(AcceptResponse::default()); + } let t_zero = T0::try_from(request.timestamp_zero.as_slice())?; let request_id = u128::from_be_bytes(request.id.as_slice().try_into()?); let t = T::try_from(request.timestamp.as_slice())?; let request_ballot = Ballot::try_from(request.ballot.as_slice())?; - if !ready { - return Ok(AcceptResponse::default()); - } trace!(?request_id, "Replica: Accept"); - let dependencies = { - if let Some(ballot) = self - .node - .event_store - .accept_tx_ballot(&t_zero, request_ballot) - .await - { + let event_store = self.node.event_store.clone(); + tokio::task::spawn_blocking(move || { + if let Some(ballot) = event_store.accept_tx_ballot(&t_zero, request_ballot) { if ballot != request_ballot { return Ok(AcceptResponse { dependencies: Vec::new(), @@ -188,170 +111,93 @@ where } } - self.node - .event_store - .upsert_tx(UpsertEvent { - id: request_id, - t_zero, - t, - state: State::Accepted, - transaction: Some(request.event), - dependencies: Some(from_dependency(request.dependencies)?), - ballot: Some(request_ballot), - execution_hash: None, - }) - .await?; - - self.node.event_store.get_tx_dependencies(&t, &t_zero).await - }; - Ok(AcceptResponse { - dependencies: into_dependency(&dependencies), - nack: false, + event_store.upsert_tx(UpsertEvent { + id: request_id, + t_zero, + t, + state: State::Accepted, + transaction: Some(request.event), + dependencies: Some(from_dependency(request.dependencies)?), + ballot: Some(request_ballot), + hashes: None, + })?; + Ok(AcceptResponse { + dependencies: into_dependency(&event_store.get_tx_dependencies(&t, &t_zero)), + nack: false, + }) }) + .await? } #[instrument(level = "trace", skip(self, request))] - async fn commit( - &self, - request: CommitRequest, - ready: bool, - ) -> Result { + async fn commit(&self, request: CommitRequest) -> Result { let t_zero = T0::try_from(request.timestamp_zero.as_slice())?; let t = T::try_from(request.timestamp.as_slice())?; let request_id = u128::from_be_bytes(request.id.as_slice().try_into()?); - if !self.configuring.load(Ordering::SeqCst) && !ready { - self.buffer - .lock() - .await - .insert((t_zero, State::Commited), BufferedMessage::Commit(request)); - return Ok(CommitResponse {}); - } trace!(?request_id, "Replica: Commit"); let deps = from_dependency(request.dependencies)?; - let (sx, rx) = tokio::sync::oneshot::channel(); + self.node - .get_wait_handler() - .await? - .send_msg( + .commit(UpsertEvent { + id: request_id, t_zero, t, - deps, - request.event, - WaitAction::CommitBefore, - sx, - request_id, - ) + state: State::Committed, + transaction: Some(request.event), + dependencies: Some(deps), + ballot: None, + hashes: None, + }) .await?; - let _ = rx.await; + Ok(CommitResponse {}) } #[instrument(level = "trace", skip(self, request))] - async fn apply( - &self, - request: ApplyRequest, - ready: bool, - ) -> Result { + async fn apply(&self, request: ApplyRequest) -> Result { let t_zero = T0::try_from(request.timestamp_zero.as_slice())?; let t = T::try_from(request.timestamp.as_slice())?; + let deps = from_dependency(request.dependencies.clone())?; + let request_id = u128::from_be_bytes(request.id.as_slice().try_into()?); - if !self.configuring.load(Ordering::SeqCst) && !ready { - self.buffer - .lock() - .await - .insert((t_zero, State::Applied), BufferedMessage::Apply(request)); - return Ok(ApplyResponse {}); - } trace!(?request_id, "Replica: Apply"); - let transaction: TransactionPayload<::Tx> = - TransactionPayload::from_bytes(request.event)?; - - let deps = from_dependency(request.dependencies)?; - let (sx, rx) = tokio::sync::oneshot::channel(); - - self.node - .get_wait_handler() - .await? - .send_msg( - t_zero, - t, - deps.clone(), - transaction.as_bytes(), - WaitAction::ApplyAfter, - sx, - request_id, + let _ = self + .node + .apply( + UpsertEvent { + id: request_id, + t_zero, + t, + state: State::Applied, + transaction: Some(request.event), + dependencies: Some(deps), + ballot: None, + hashes: None, + }, + Some(Hashes { + transaction_hash: request + .transaction_hash + .try_into() + .map_err(|_e| SyneviError::MissingTransactionHash)?, + execution_hash: request + .execution_hash + .try_into() + .map_err(|_e| SyneviError::MissingExecutionHash)?, + ..Default::default() + }), ) - .await?; - - rx.await - .map_err(|_| SyneviError::ReceiveError("Wait receiver closed".to_string()))?; - - let result = match transaction { - TransactionPayload::None => { - return Err(SyneviError::TransactionNotFound); - } - TransactionPayload::External(tx) => self.node.executor.execute(tx).await, - TransactionPayload::Internal(request) => { - // TODO: Build special execution - let result = match &request { - InternalExecution::JoinElectorate { id, serial, host } => { - if id != &self.node.info.id { - let res = self - .node - .add_member(*id, *serial, host.clone(), false) - .await; - let (t, hash) = self.node.event_store.last_applied_hash().await?; - self.node - .network - .report_config(t, hash, host.clone()) - .await?; - res - } else { - Ok(()) - } - } - InternalExecution::ReadyElectorate { id, serial } => { - if id != &self.node.info.id { - self.node.ready_member(*id, *serial).await - } else { - Ok(()) - } - } - }; - match result { - Ok(_) => Ok(ExecutorResult::Internal(Ok(request.clone()))), - Err(err) => Ok(ExecutorResult::Internal(Err(err))), - } - } - }; + .await + .unwrap(); - let mut hasher = Sha3_256::new(); - postcard::to_io(&result, &mut hasher)?; - let hash = hasher.finalize(); - let hashes = self - .node - .event_store - .get_and_update_hash(t_zero, hash.into()) - .await?; - if request.transaction_hash != hashes.transaction_hash - || request.execution_hash != hashes.execution_hash - { - Err(SyneviError::MismatchedHashes) - } else { - Ok(ApplyResponse {}) - } + Ok(ApplyResponse {}) } #[instrument(level = "trace", skip(self))] - async fn recover( - &self, - request: RecoverRequest, - ready: bool, - ) -> Result { - if !ready { + async fn recover(&self, request: RecoverRequest) -> Result { + if !self.node.is_ready() { return Ok(RecoverResponse::default()); } let request_id = u128::from_be_bytes(request.id.as_slice().try_into()?); @@ -361,57 +207,70 @@ where // TODO/WARNING: This was initially in one Mutex lock //let mut event_store = self.node.event_store.lock().await; - if let Some(state) = self.node.event_store.get_event_state(&t_zero).await { - // If another coordinator has started recovery with a higher ballot - // Return NACK with the higher ballot number - let request_ballot = Ballot::try_from(request.ballot.as_slice())?; - if let Some(ballot) = self - .node - .event_store - .accept_tx_ballot(&t_zero, request_ballot) - .await - { - if request_ballot != ballot { - return Ok(RecoverResponse { - nack: ballot.into(), - ..Default::default() - }); - } - } - - if matches!(state, State::Undefined) { - self.node - .event_store - .pre_accept_tx(request_id, t_zero, request.event) - .await?; - }; - } else { - self.node - .event_store - .pre_accept_tx(request_id, t_zero, request.event) - .await?; - } - let recover_deps = self.node.event_store.get_recover_deps(&t_zero).await?; - + let event_store = self.node.event_store.clone(); self.node .stats .total_recovers .fetch_add(1, Ordering::Relaxed); + tokio::task::spawn_blocking(move || { + if let Some(state) = event_store.get_event_state(&t_zero) { + // If another coordinator has started recovery with a higher ballot + // Return NACK with the higher ballot number + let request_ballot = Ballot::try_from(request.ballot.as_slice())?; + if let Some(ballot) = event_store.accept_tx_ballot(&t_zero, request_ballot) { + if request_ballot != ballot { + return Ok(RecoverResponse { + nack: ballot.into(), + ..Default::default() + }); + } + } - let local_state = self - .node - .event_store - .get_event_state(&t_zero) - .await - .ok_or_else(|| SyneviError::EventNotFound(t_zero.get_inner()))?; - Ok(RecoverResponse { - local_state: local_state.into(), - wait: into_dependency(&recover_deps.wait), - superseding: recover_deps.superseding, - dependencies: into_dependency(&recover_deps.dependencies), - timestamp: recover_deps.timestamp.into(), - nack: Ballot::default().into(), + if matches!(state, State::Undefined) { + event_store.pre_accept_tx(request_id, t_zero, request.event)?; + }; + } else { + event_store.pre_accept_tx(request_id, t_zero, request.event)?; + } + let recover_deps = event_store.get_recover_deps(&t_zero)?; + + let local_state = event_store + .get_event_state(&t_zero) + .ok_or_else(|| SyneviError::EventNotFound(t_zero.get_inner()))?; + Ok(RecoverResponse { + local_state: local_state.into(), + wait: into_dependency(&recover_deps.wait), + superseding: recover_deps.superseding, + dependencies: into_dependency(&recover_deps.dependencies), + timestamp: recover_deps.timestamp.into(), + nack: Ballot::default().into(), + }) }) + .await? + } + + #[instrument(level = "trace", skip(self))] + async fn try_recover( + &self, + request: TryRecoveryRequest, + ) -> Result { + let t0 = T0::try_from(request.timestamp_zero.as_slice())?; + + if !self.node.is_ready() { + if let Some(recover_event) = self + .node + .event_store + .recover_event(&t0, self.node.get_serial())? + { + tokio::spawn(Coordinator::recover(self.node.clone(), recover_event)); + return Ok(TryRecoveryResponse { accepted: true }); + } + } + + // This ensures that this t0 will not get a fast path in the future + let event_store = self.node.event_store.clone(); + tokio::task::spawn_blocking(move || event_store.inc_time_with_guard(t0)).await??; + Ok(TryRecoveryResponse { accepted: false }) } } @@ -426,13 +285,14 @@ where &self, request: JoinElectorateRequest, ) -> Result { - if !self.ready.load(Ordering::SeqCst) { + if !self.node.is_ready() { return Ok(JoinElectorateResponse::default()); } let Some(Config { node_id, node_serial, host, + .. }) = request.config else { return Err(SyneviError::TonicStatusError( @@ -441,7 +301,7 @@ where }; let node = self.node.clone(); - let majority = self.node.network.get_member_len().await; + let member_count = self.node.network.get_members().await.len() as u32; let self_event = Ulid::new(); let _res = node .internal_transaction( @@ -449,27 +309,23 @@ where TransactionPayload::Internal(InternalExecution::JoinElectorate { id: Ulid::from_bytes(node_id.as_slice().try_into()?), serial: node_serial.try_into()?, - host, + new_node_host: host, }), ) .await?; - Ok(JoinElectorateResponse { - majority, - self_event: self_event.to_bytes().to_vec(), - }) + Ok(JoinElectorateResponse { member_count }) } async fn get_events( &self, request: GetEventRequest, ) -> Result>, SyneviError> { - if !self.ready.load(Ordering::SeqCst) { + if !self.node.is_ready() { return Err(SyneviError::NotReady); } let (sdx, rcv) = tokio::sync::mpsc::channel(200); - let event_id = u128::from_be_bytes(request.self_event.as_slice().try_into()?); let last_applied = T::try_from(request.last_applied.as_slice())?; - let mut store_rcv = self.node.event_store.get_events_after(last_applied, event_id).await?; + let mut store_rcv = self.node.event_store.get_events_after(last_applied)?; tokio::spawn(async move { while let Some(Ok(event)) = store_rcv.recv().await { let response = { @@ -506,7 +362,6 @@ where sdx.send(response).await.unwrap(); } }); - println!("Returning streaming receiver"); // Stream all events to member Ok(rcv) } @@ -516,7 +371,7 @@ where &self, request: ReadyElectorateRequest, ) -> Result { - if !self.ready.load(Ordering::SeqCst) { + if !self.node.is_ready() { return Ok(ReadyElectorateResponse::default()); } // Start ready electorate transaction with NewMemberUlid @@ -539,39 +394,29 @@ where } // TODO: Move trait to Joining Node -> Rename to receive_config, Ready checks - async fn report_last_applied( + async fn report_electorate( &self, - request: ReportLastAppliedRequest, - ) -> Result { - if self.ready.load(Ordering::SeqCst) { - return Ok(ReportLastAppliedResponse::default()); + request: ReportElectorateRequest, + ) -> Result { + if self.node.is_ready() { + return Ok(ReportElectorateResponse::default()); } - let Some(Config { - node_serial, - node_id, - host, - }) = request.config - else { - return Err(SyneviError::InvalidConversionRequest( - "Invalid config".to_string(), - )); - }; - let report = Report { - node_id: Ulid::from_bytes(node_id.try_into().map_err(|_| { - SyneviError::InvalidConversionFromBytes("Invalid Ulid conversion".to_string()) - })?), - node_serial: node_serial.try_into()?, - node_host: host, - last_applied: request.last_applied.as_slice().try_into()?, - last_applied_hash: request.last_applied_hash.try_into().map_err(|_| { - SyneviError::InvalidConversionFromBytes("Invalid hash conversion".to_string()) - })?, - }; - //dbg!(&report); - self.notifier.send(report).await.map_err(|_| { - SyneviError::SendError("Sender for reporting last applied closed".to_string()) - })?; - Ok(ReportLastAppliedResponse {}) + for member in request.configs { + self.node + .add_member( + Ulid::from_bytes(member.node_id.as_slice().try_into()?), + member.node_serial as u16, + member.host, + member.ready, + ) + .await?; + } + self.node + .network + .get_node_status() + .members_responded + .fetch_add(1, Ordering::Relaxed); + Ok(ReportElectorateResponse {}) } } @@ -584,10 +429,6 @@ where fn clone(&self) -> Self { Self { node: self.node.clone(), - buffer: self.buffer.clone(), - notifier: self.notifier.clone(), - ready: self.ready.clone(), - configuring: self.configuring.clone(), } } } diff --git a/synevi_core/src/utils.rs b/synevi_core/src/utils.rs index 90f296b..978509d 100644 --- a/synevi_core/src/utils.rs +++ b/synevi_core/src/utils.rs @@ -36,7 +36,7 @@ where transaction: Some(value.transaction.as_bytes()), dependencies: Some(value.dependencies.clone()), ballot: Some(value.ballot), - execution_hash: None, + hashes: value.hashes.clone(), } } } diff --git a/synevi_core/src/wait_handler.rs b/synevi_core/src/wait_handler.rs index b8b7a37..5f96fba 100644 --- a/synevi_core/src/wait_handler.rs +++ b/synevi_core/src/wait_handler.rs @@ -1,379 +1,147 @@ -use crate::coordinator::Coordinator; -use crate::node::Node; use ahash::RandomState; -use async_channel::{Receiver, Sender}; -use std::collections::BTreeMap; -use std::sync::atomic::AtomicU8; use std::{ - collections::{BTreeSet, HashMap, HashSet}, - sync::Arc, - time::{Duration, Instant}, + collections::{HashMap, HashSet}, + sync::{Arc, Mutex}, }; -use synevi_network::network::Network; -use synevi_types::traits::Store; -use synevi_types::types::UpsertEvent; -use synevi_types::{Executor, State, SyneviError, T, T0}; -use tokio::{sync::oneshot, time::timeout}; - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum WaitAction { - CommitBefore, - ApplyAfter, -} +use synevi_types::{ + traits::Store, + types::{RecoverEvent, UpsertEvent}, + State, T, T0, +}; +use tokio::{sync::oneshot, time::Instant}; -#[derive(Debug)] -pub struct WaitMessage { - id: u128, - t_zero: T0, +pub struct Waiter { t: T, - deps: HashSet, - transaction: Vec, - action: WaitAction, - notify: Option>, + waited_since: Instant, + dependencies: HashSet, + sender: Vec>, } -#[derive(Clone)] -pub struct WaitHandler -where - N: Network + Send + Sync, - E: Executor + Send + Sync, - S: Store + Send + Sync, -{ - sender: Sender, - receiver: Receiver, - node: Arc>, +pub enum CheckResult { + NoRecovery, + RecoverEvent(RecoverEvent), + RecoverUnknown(T0), } -#[derive(Debug)] -struct WaitDependency { - wait_message: Option, - deps: HashSet, - started_at: Instant, +impl CheckResult { + pub fn replace_if_smaller(&mut self, other: CheckResult) { + match (&self, &other) { + (CheckResult::NoRecovery, _) => *self = other, + ( + CheckResult::RecoverEvent(recover_event_existing), + CheckResult::RecoverEvent(recover_event), + ) => { + if recover_event.t_zero < recover_event_existing.t_zero { + *self = other; + } + } + ( + CheckResult::RecoverEvent(recover_event_existing), + CheckResult::RecoverUnknown(t0), + ) => { + if *t0 < recover_event_existing.t_zero { + *self = other; + } + } + ( + CheckResult::RecoverUnknown(t0_existing), + CheckResult::RecoverEvent(recover_event), + ) => { + if recover_event.t_zero < *t0_existing { + *self = other; + } + } + (CheckResult::RecoverUnknown(t0_existing), CheckResult::RecoverUnknown(t0)) => { + if t0 < t0_existing { + *self = other; + } + } + _ => (), + } + } } -struct WaiterState { - events: HashMap, - committed: HashMap, - applied: HashSet, +pub struct WaitHandler +where + S: Store, +{ + waiters: Mutex>, + store: Arc, } -static _RECOVERY_CYCLE: AtomicU8 = AtomicU8::new(0); - -impl WaitHandler +impl WaitHandler where - N: Network + Send + Sync, - E: Executor + Send + Sync, - S: Store + Send + Sync, + S: Store, { - pub fn new(node: Arc>) -> Arc { - let (sender, receiver) = async_channel::bounded(1000); - Arc::new(Self { - sender, - receiver, - node, - }) - } - - #[allow(clippy::too_many_arguments)] - pub async fn send_msg( - &self, - t_zero: T0, - t: T, - deps: HashSet, - transaction: Vec, - action: WaitAction, - notify: oneshot::Sender<()>, - id: u128, - ) -> Result<(), SyneviError> { - self.sender - .send(WaitMessage { - id, - t_zero, - t, - deps, - transaction, - action, - notify: Some(notify), - }) - .await - .map_err(|e| SyneviError::SendError(e.to_string())) - } - - pub async fn run(self: Arc) -> Result<(), SyneviError> { - - let mut waiter_state = WaiterState::new(); - let mut recovering = BTreeSet::new(); - - loop { - match timeout(Duration::from_millis(50), self.receiver.recv()).await { - Ok(Ok(msg)) => match msg.action { - WaitAction::CommitBefore => { - if let Err(err) = self.commit_action(msg, &mut waiter_state).await { - tracing::error!("Error commit event: {:?}", err); - println!("Error commit event: {:?}", err); - continue; - }; - } - WaitAction::ApplyAfter => { - match &self.node.event_store.get_event(msg.t_zero).await? { - Some(event) if event.state < State::Commited => { - if let Err(err) = self - .commit_action( - WaitMessage { - id: msg.id, - t_zero: msg.t_zero, - t: msg.t, - deps: msg.deps.clone(), - transaction: msg.transaction.clone(), - action: WaitAction::CommitBefore, - notify: None, - }, - &mut waiter_state, - ) - .await - { - tracing::error!( - "Error committing event before apply: {:?}", - err - ); - println!("Error committing event bevore apply: {:?}", err); - continue; - }; - } - None => { - if let Err(err) = self - .commit_action( - WaitMessage { - id: msg.id, - t_zero: msg.t_zero, - t: msg.t, - deps: msg.deps.clone(), - transaction: msg.transaction.clone(), - action: WaitAction::CommitBefore, - notify: None, - }, - &mut waiter_state, - ) - .await - { - tracing::error!( - "Error committing event before apply: {:?}", - err - ); - println!("Error committing event before apply: {:?}", err); - continue; - }; - } - _ => (), - } - - if let Some(mut msg) = waiter_state.insert_apply(msg) { - if let Err(e) = self.upsert_event(&msg).await { - tracing::error!("Error upserting event: {:?}", e); - println!("Error upserting event: {:?}", e); - continue; - }; - if let Some(notify) = msg.notify.take() { - let _ = notify.send(()); - } - waiter_state.applied.insert(msg.t_zero); - let mut to_apply = BTreeMap::new(); - waiter_state.remove_from_waiter_apply(&msg.t_zero, &mut to_apply); - while let Some(mut apply) = to_apply.pop_first() { - apply.1.action = WaitAction::ApplyAfter; - if let Err(e) = self.upsert_event(&apply.1).await { - tracing::error!("Error upserting event: {:?}", e); - println!("Error upserting event: {:?}", e); - continue; - }; - waiter_state.applied.insert(apply.1.t_zero); - if let Some(notify) = apply.1.notify.take() { - let _ = notify.send(()); - } - waiter_state - .remove_from_waiter_apply(&apply.1.t_zero, &mut to_apply); - } - } - } - }, - _ => { - if let Some(t0_recover) = self.check_recovery(&mut waiter_state) { - recovering.insert(t0_recover); - let recover_t0 = recovering.pop_first().unwrap_or(t0_recover); - let wait_handler = self.clone(); - wait_handler.recover(recover_t0, &mut waiter_state).await; - } - } - } - } - } - - async fn commit_action( - &self, - msg: WaitMessage, - waiter_state: &mut WaiterState, - ) -> Result<(), SyneviError> { - self.upsert_event(&msg).await?; - waiter_state.committed.insert(msg.t_zero, msg.t); - let mut to_apply = waiter_state.remove_from_waiter_commit(&msg.t_zero, &msg.t); - while let Some(mut apply) = to_apply.pop_first() { - apply.1.action = WaitAction::ApplyAfter; - if let Err(e) = self.upsert_event(&apply.1).await { - tracing::error!("Error upserting event: {:?}", e); - println!("Error upserting event: {:?}", e); - continue; - }; - waiter_state.applied.insert(apply.1.t_zero); - if let Some(notify) = apply.1.notify.take() { - let _ = notify.send(()); - } - waiter_state.remove_from_waiter_apply(&apply.1.t_zero, &mut to_apply); + pub fn new(store: Arc, _serial: u16) -> Self { + Self { + waiters: Mutex::new(HashMap::default()), + store, } - waiter_state.insert_commit(msg); - Ok(()) } - async fn upsert_event( - &self, - WaitMessage { - id, - t_zero, - t, - action, - deps, - transaction, - .. - }: &WaitMessage, - ) -> Result<(), SyneviError> { - let state = match action { - WaitAction::CommitBefore => State::Commited, - WaitAction::ApplyAfter => State::Applied, - }; - self.node - .event_store - .upsert_tx(UpsertEvent { - id: *id, - t_zero: *t_zero, - t: *t, - state, - transaction: Some(transaction.clone()), - dependencies: Some(deps.clone()), - ..Default::default() - }) - .await?; - - Ok(()) - } + pub fn get_waiter(&self, upsert_event: &UpsertEvent) -> Option> { + let (sdx, rcv) = oneshot::channel(); + let mut waiter_lock = self.waiters.lock().expect("Locking waiters failed"); - async fn recover(self: Arc, t0_recover: T0, waiter_state: &mut WaiterState) { - if let Some(event) = waiter_state.events.get_mut(&t0_recover) { - event.started_at = Instant::now(); - } - let node = self.node.clone(); - tokio::spawn(async move { - if let Err(err) = Coordinator::recover(node, t0_recover).await { - tracing::error!("Error recovering event: {:?}", err); - println!("Error recovering event: {:?}", err); - } + let waiter = waiter_lock.entry(upsert_event.t_zero).or_insert(Waiter { + t: upsert_event.t, + waited_since: Instant::now(), + dependencies: upsert_event.dependencies.clone().unwrap_or_default(), + sender: Vec::new(), }); - } + waiter.waited_since = Instant::now(); - fn check_recovery(&self, waiter_state: &mut WaiterState) -> Option { - for ( - _t0, - WaitDependency { - deps, started_at, .. - }, - ) in waiter_state.events.iter_mut() - { - if started_at.elapsed() > Duration::from_secs(1) { - let sorted_deps: BTreeSet = deps.iter().cloned().collect(); + for dep_t0 in upsert_event.dependencies.clone().unwrap_or_default().iter() { + let Some(dep_event) = self.store.get_event(*dep_t0).ok().flatten() else { + continue; + }; - let mut min_dep = None; - for t0_dep in sorted_deps { - if let Some(t_dep) = waiter_state.committed.get(&t0_dep) { - // Check if lowest t0 is committed - // If yes -> Recover dep with lowest T - if let Some((t0_min, t_min)) = min_dep.as_mut() { - if t_dep < t_min { - *t0_min = t0_dep; - *t_min = *t_dep; - } - } else { - min_dep = Some((t0_dep, *t_dep)); - } - } else { - // Lowest T0 is not commited -> Recover lowest t0 to ensure commit - // Recover t0_dep - *started_at = Instant::now(); - return Some(t0_dep); - } + match dep_event.state { + State::Committed if dep_event.t > upsert_event.t => { + waiter.dependencies.remove(dep_t0); } - - // Recover min_dep - if let Some((t0_dep, _)) = min_dep { - *started_at = Instant::now(); - return Some(t0_dep); + State::Applied => { + waiter.dependencies.remove(dep_t0); } + _ => {} } } - None - } -} -impl WaiterState { - fn new() -> Self { - Self { - events: HashMap::default(), - committed: HashMap::default(), - applied: HashSet::default(), + if waiter.dependencies.is_empty() { + return None; } - } - fn remove_from_waiter_commit(&mut self, t0_dep: &T0, t_dep: &T) -> BTreeMap { - let mut apply_deps = BTreeMap::default(); - self.events.retain(|_, event| { - if let Some(msg) = &mut event.wait_message { - if msg.t_zero == *t0_dep { - return true; - } + waiter.sender.push(sdx); + Some(rcv) + } - if t_dep < &msg.t { - // Cannot remove must wait for apply -> retain - return true; - } - event.deps.remove(t0_dep); - if event.deps.is_empty() { - if msg.action != WaitAction::ApplyAfter { - if let Some(sender) = msg.notify.take() { - let _ = sender.send(()); - } - } else if let Some(msg) = event.wait_message.take() { - apply_deps.insert(msg.t, msg); + pub fn notify_commit(&self, t0_commit: &T0, t_commit: &T) { + let mut waiter_lock = self.waiters.lock().expect("Locking waiters failed"); + waiter_lock.retain(|_, waiter| { + if waiter.dependencies.contains(t0_commit) && t_commit > &waiter.t { + waiter.dependencies.remove(t0_commit); + waiter.waited_since = Instant::now(); + if waiter.dependencies.is_empty() { + for sdx in waiter.sender.drain(..) { + let _ = sdx.send(()); } return false; } } true }); - apply_deps } - fn remove_from_waiter_apply(&mut self, t0_dep: &T0, to_apply: &mut BTreeMap) { - self.events.retain(|_, event| { - event.deps.remove(t0_dep); - for wait_dep in to_apply.iter() { - event.deps.remove(&wait_dep.1.t_zero); - } - - if let Some(msg) = &mut event.wait_message { - if event.deps.is_empty() { - if msg.action != WaitAction::ApplyAfter { - if let Some(sender) = msg.notify.take() { - let _ = sender.send(()); - } - } else if let Some(msg) = event.wait_message.take() { - to_apply.insert(msg.t, msg); + pub fn notify_apply(&self, t0_commit: &T0) { + let mut waiter_lock = self.waiters.lock().expect("Locking waiters failed"); + waiter_lock.retain(|_, waiter| { + if waiter.dependencies.contains(t0_commit) { + waiter.dependencies.remove(t0_commit); + waiter.waited_since = Instant::now(); + if waiter.dependencies.is_empty() { + for sdx in waiter.sender.drain(..) { + let _ = sdx.send(()); } return false; } @@ -382,208 +150,68 @@ impl WaiterState { }); } - fn insert_commit(&mut self, mut wait_message: WaitMessage) { - if self.applied.contains(&wait_message.t_zero) { - if let Some(sender) = wait_message.notify.take() { - let _ = sender.send(()); - } - return; - } - let mut wait_dep = WaitDependency { - wait_message: Some(wait_message), - deps: HashSet::default(), - started_at: Instant::now(), - }; - if let Some(wait_message) = &mut wait_dep.wait_message { - for dep_t0 in wait_message.deps.iter() { - if !self.applied.contains(dep_t0) { - if let Some(stored_t) = self.committed.get(dep_t0) { - // Your T is lower than the dep commited t -> no wait necessary - if &wait_message.t < stored_t { - continue; - } - } - wait_dep.deps.insert(*dep_t0); - } - } - - if wait_dep.deps.is_empty() { - if let Some(sender) = wait_message.notify.take() { - let _ = sender.send(()); - } - return; - } - - if let Some(existing) = self.events.get_mut(&wait_message.t_zero) { - if let Some(existing_wait_message) = &mut existing.wait_message { - if let Some(sender) = existing_wait_message.notify.take() { - let _ = sender.send(()); - return; + pub fn check_recovery(&self) -> CheckResult { + let mut waiter_lock = self.waiters.lock().expect("Locking waiters failed"); + let len = waiter_lock.len() as u128 + 10; + let mut smallest_hanging_dep = CheckResult::NoRecovery; + for (t0, waiter) in waiter_lock.iter_mut() { + if waiter.waited_since.elapsed().as_millis() > len * 2 { + // Get deps and find smallest dep that is not committed / applied + let Some(event) = self.store.get_event(*t0).ok().flatten() else { + tracing::error!( + "Unexpected state in wait_handler: Event timed out, but not found in store" + ); + continue; + }; + for dep in event.dependencies.iter() { + let Some(event_dep) = self.store.get_event(*dep).ok().flatten() else { + smallest_hanging_dep.replace_if_smaller(CheckResult::RecoverUnknown(*dep)); + continue; + }; + if event_dep.t_zero > event.t_zero { + tracing::error!("Error: Dependency is newer than event"); + continue; } - } - } - self.events.insert(wait_message.t_zero, wait_dep); - } - } - - fn insert_apply(&mut self, mut wait_message: WaitMessage) -> Option { - if self.applied.contains(&wait_message.t_zero) { - if let Some(sender) = wait_message.notify.take() { - let _ = sender.send(()); - } - return None; - } - let mut wait_dep = WaitDependency { - wait_message: Some(wait_message), - deps: HashSet::default(), - started_at: Instant::now(), - }; - - if let Some(wait_message) = &wait_dep.wait_message { - for dep_t0 in wait_message.deps.iter() { - if !self.applied.contains(dep_t0) { - if let Some(stored_t) = self.committed.get(dep_t0) { - // Your T is lower than the dep commited t -> no wait necessary - if &wait_message.t < stored_t { + match event_dep.state { + State::Committed => { + if event_dep.t > event.t { + // Dependency is newer than event (and already commited) + continue; + } + smallest_hanging_dep + .replace_if_smaller(CheckResult::RecoverEvent(event_dep.into())); + } + State::Applied => { + // Already applied (no problem) continue; } + _ => { + smallest_hanging_dep + .replace_if_smaller(CheckResult::RecoverEvent(event_dep.into())); + } } - // if not applied and not comitted with lower t - wait_dep.deps.insert(*dep_t0); } - } - - if wait_dep.deps.is_empty() { - if let Some(wait_msg) = wait_dep.wait_message.take() { - return Some(wait_msg); + if !matches!(smallest_hanging_dep, CheckResult::NoRecovery) { + waiter.waited_since = Instant::now(); + return smallest_hanging_dep; } - } else { - self.events.insert(wait_message.t_zero, wait_dep); } } - None + CheckResult::NoRecovery } } -#[cfg(test)] -mod tests { - use monotime::MonoTime; - use ulid::Ulid; - - use crate::{ - tests::{DummyExecutor, NetworkMock}, - wait_handler::*, - }; - - #[tokio::test] - async fn test_wait_handler() { - let (sender, receiver): (Sender, Receiver) = - async_channel::unbounded(); - - let node = Node::new_with_network_and_executor( - Ulid::new(), - 1, - NetworkMock::default(), - DummyExecutor, - ) - .await - .unwrap(); - let wait_handler = WaitHandler { - sender, - receiver, - node, - }; - - let (sx11, rx11) = tokio::sync::oneshot::channel(); - let (sx12, rx12) = tokio::sync::oneshot::channel(); - let (sx21, _rx21) = tokio::sync::oneshot::channel(); - - // let notify_2_1_future = notify_2_1.notified(); - // let notify_2_2_future = notify_2_2.notified(); - - let id_1 = u128::from_be_bytes(Ulid::new().to_bytes()); - let _id_2 = u128::from_be_bytes(Ulid::new().to_bytes()); - let t0_1 = T0(MonoTime::new_with_time(1u128, 0, 0)); - let t0_2 = T0(MonoTime::new_with_time(2u128, 0, 0)); - let t_1 = T(MonoTime::new_with_time(1u128, 0, 0)); - let t_2 = T(MonoTime::new_with_time(2u128, 0, 0)); - let deps_2 = HashSet::from_iter([t0_1]); - wait_handler - .send_msg( - t0_2, - t_2, - deps_2.clone(), - Vec::new(), - WaitAction::CommitBefore, - sx11, - id_1, - ) - .await - .unwrap(); - wait_handler - .send_msg( - t0_1, - t_1, - HashSet::default(), - Vec::new(), - WaitAction::CommitBefore, - sx12, - id_1, - ) - .await - .unwrap(); - - wait_handler - .send_msg( - t0_1, - t_1, - HashSet::default(), - Vec::new(), - WaitAction::ApplyAfter, - sx21, - id_1, - ) - .await - .unwrap(); - // wait_handler - // .send_msg( - // t0_2.clone(), - // t_2.clone(), - // deps_2.clone(), - // Bytes::new(), - // WaitAction::CommitBefore, - // notify_2_1.clone(), - // ) - // .await - // .unwrap(); - // wait_handler - // .send_msg( - // t0_1, - // t_1, - // HashMap::new(), - // Bytes::new(), - // WaitAction::ApplyAfter, - // notify_1_2.clone(), - // ) - // .await - // .unwrap(); - - let wait_handler = Arc::new(wait_handler); - - tokio::spawn(async move { wait_handler.run().await.unwrap() }); - timeout(Duration::from_millis(10), rx11) - .await - .unwrap() - .unwrap(); - timeout(Duration::from_millis(10), rx12) - .await - .unwrap() - .unwrap(); - // timeout(Duration::from_millis(10), notify_2_1_future) - // .await - // .unwrap(); - // timeout(Duration::from_millis(10), notify_2_2_future) - // .await - // .unwrap(); - } -} +// Tx1 = dep[Tx0] + +// -> Tx0 commit +// -> for each waiter: is tx0 in deps? +// -> if yes! -> is t(tx0) > t(tx1) +// -> y -> do nothing +// -> n -> increase dep_state +1 +// -> if dep_state == dep.len() -> send signal to waiter +// +// +//loop { +// if waiter.waited_since > 10s -> Find inital tx everyone is waiting for -> +// +//} diff --git a/synevi_kv/src/kv_store.rs b/synevi_kv/src/kv_store.rs index b51d483..ca5fbd5 100644 --- a/synevi_kv/src/kv_store.rs +++ b/synevi_kv/src/kv_store.rs @@ -6,7 +6,7 @@ use std::fmt::Debug; use std::sync::{Arc, Mutex}; use synevi_core::node::Node; use synevi_network::network::Network; -use synevi_types::types::{ExecutorResult, SyneviResult}; +use synevi_types::types::SyneviResult; use synevi_types::{error::SyneviError, Executor}; use ulid::Ulid; @@ -58,33 +58,33 @@ impl synevi_types::Transaction for Transaction { #[async_trait::async_trait] impl Executor for KVExecutor { type Tx = Transaction; - async fn execute(&self, transaction: Self::Tx) -> SyneviResult { + async fn execute(&self, _id: u128, transaction: Self::Tx) -> SyneviResult { Ok(match transaction { Transaction::Read { key } => { let Some(key) = self.store.lock().unwrap().get(&key).cloned() else { - return Ok(ExecutorResult::External(Err(KVError::KeyNotFound))); + return Ok(Err(KVError::KeyNotFound)); }; - ExecutorResult::External(Ok(key)) + Ok(key) } Transaction::Write { key, value } => { self.store .lock() .unwrap() .insert(key.clone(), value.clone()); - ExecutorResult::External(Ok(value)) + Ok(value) } Transaction::Cas { key, from, to } => { let mut store = self.store.lock().unwrap(); let Some(entry) = store.get_mut(&key) else { - return Ok(ExecutorResult::External(Err(KVError::KeyNotFound))); + return Ok(Err(KVError::KeyNotFound)); }; if entry == &from { *entry = to.clone(); - ExecutorResult::External(Ok(to)) + Ok(to) } else { - return Ok(ExecutorResult::External(Err(KVError::MismatchError))); + return Ok(Err(KVError::MismatchError)); } } }) @@ -116,16 +116,8 @@ where async fn transaction(&self, id: Ulid, transaction: Transaction) -> Result { let node = self.node.clone(); - match node - .transaction( - u128::from_be_bytes(id.to_bytes()), - transaction, - ) + node.transaction(u128::from_be_bytes(id.to_bytes()), transaction) .await? - { - ExecutorResult::External(result) => result, - _ => Err(KVError::MismatchError), // TODO: Make a new error for this case - } } pub async fn read(&self, key: String) -> Result { diff --git a/synevi_network/Cargo.toml b/synevi_network/Cargo.toml index 8ffaebd..2613aa8 100644 --- a/synevi_network/Cargo.toml +++ b/synevi_network/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true description.workspace = true [dependencies] -prost = "0.13.0" +prost = "0.13.3" tokio = {workspace = true} tokio-stream = {workspace = true} futures-util = "0.3.30" @@ -23,4 +23,4 @@ synevi_types = { path = "../synevi_types", version = "0.1.0"} ahash = {workspace = true} [build-dependencies] -tonic-build = "0.12.0" +tonic-build = "0.12.3" diff --git a/synevi_network/src/latency_service.rs b/synevi_network/src/latency_service.rs index 78cff30..fd21a28 100644 --- a/synevi_network/src/latency_service.rs +++ b/synevi_network/src/latency_service.rs @@ -1,6 +1,5 @@ -use std::collections::HashMap; use bytes::{BufMut, BytesMut}; -use ulid::Ulid; +use std::collections::HashMap; use std::{ sync::Arc, time::{self, Duration, Instant}, @@ -8,6 +7,7 @@ use std::{ use synevi_types::error::SyneviError; use tokio::sync::RwLock; use tonic::{Request, Response}; +use ulid::Ulid; use crate::{ configure_transport::{ @@ -18,9 +18,11 @@ use crate::{ const LATENCY_INTERVAL: u64 = 10; -pub async fn get_latency(members: Arc>>) -> Result<(), SyneviError> { +pub async fn get_latency( + members: Arc, ahash::RandomState>>>, +) -> Result<(), SyneviError> { loop { - for (_ ,member) in members.read().await.iter() { + for (_, member) in members.read().await.iter() { let mut client = TimeServiceClient::new(member.member.channel.clone()); let time = time::SystemTime::now() .duration_since(time::UNIX_EPOCH) diff --git a/synevi_network/src/network.rs b/synevi_network/src/network.rs index fdcbbdf..dd1fdfd 100644 --- a/synevi_network/src/network.rs +++ b/synevi_network/src/network.rs @@ -5,9 +5,9 @@ use crate::configure_transport::reconfiguration_service_server::ReconfigurationS use crate::configure_transport::time_service_server::TimeServiceServer; use crate::configure_transport::{ Config, GetEventRequest, GetEventResponse, JoinElectorateRequest, ReadyElectorateRequest, - ReportLastAppliedRequest, + ReportElectorateRequest, }; -use crate::consensus_transport::{RecoverRequest, RecoverResponse}; +use crate::consensus_transport::{RecoverRequest, RecoverResponse, TryRecoveryRequest}; use crate::latency_service::get_latency; use crate::reconfiguration::Reconfiguration; use crate::{ @@ -20,14 +20,15 @@ use crate::{ replica::{Replica, ReplicaBox}, }; use std::collections::HashMap; -use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU32, AtomicU64, Ordering}; use std::{net::SocketAddr, sync::Arc}; use synevi_types::error::SyneviError; -use synevi_types::T; +use synevi_types::T0; use tokio::sync::{Mutex, RwLock}; use tokio::task::JoinSet; use tonic::metadata::{AsciiMetadataKey, AsciiMetadataValue}; use tonic::transport::{Channel, Server}; +use tracing::error; use ulid::Ulid; #[async_trait::async_trait] @@ -36,6 +37,7 @@ pub trait NetworkInterface: Send + Sync { &self, request: BroadcastRequest, ) -> Result, SyneviError>; + async fn broadcast_recovery(&self, t0: T0) -> Result; // All members } #[async_trait::async_trait] @@ -55,20 +57,15 @@ pub trait Network: Send + Sync + 'static { ) -> Result<(), SyneviError>; async fn get_interface(&self) -> Arc; async fn get_waiting_time(&self, node_serial: u16) -> u64; - async fn get_member_len(&self) -> u32; - async fn broadcast_config(&self, host: String) -> Result<(u32, Vec), SyneviError>; // All members - async fn report_config( - &self, - last_applied: T, - last_applied_hash: [u8; 32], - host: String, - ) -> Result<(), SyneviError>; + async fn get_members(&self) -> Vec>; + fn get_node_status(&self) -> Arc; + async fn join_electorate(&self, host: String) -> Result; // All members + async fn report_config(&self, host: String) -> Result<(), SyneviError>; async fn get_stream_events( &self, last_applied: Vec, - self_event: Vec, ) -> Result, SyneviError>; - async fn ready_electorate(&self) -> Result<(), SyneviError>; + async fn ready_electorate(&self, host: String) -> Result<(), SyneviError>; async fn ready_member(&self, id: Ulid, serial: u16) -> Result<(), SyneviError>; } @@ -109,39 +106,34 @@ where self.as_ref().get_waiting_time(node_serial).await } - async fn get_member_len(&self) -> u32 { - self.as_ref().get_member_len().await + async fn get_members(&self) -> Vec> { + self.as_ref().get_members().await } - async fn broadcast_config(&self, host: String) -> Result<(u32, Vec), SyneviError> { - self.as_ref().broadcast_config(host).await + + fn get_node_status(&self) -> Arc { + self.as_ref().get_node_status() + } + + async fn join_electorate(&self, host: String) -> Result { + self.as_ref().join_electorate(host).await } async fn get_stream_events( &self, last_applied: Vec, - self_event: Vec, ) -> Result, SyneviError> { - self.as_ref() - .get_stream_events(last_applied, self_event) - .await + self.as_ref().get_stream_events(last_applied).await } - async fn ready_electorate(&self) -> Result<(), SyneviError> { - self.as_ref().ready_electorate().await + async fn ready_electorate(&self, host: String) -> Result<(), SyneviError> { + self.as_ref().ready_electorate(host).await } async fn ready_member(&self, id: Ulid, serial: u16) -> Result<(), SyneviError> { self.as_ref().ready_member(id, serial).await } - async fn report_config( - &self, - last_applied: T, - last_applied_hash: [u8; 32], - host: String, - ) -> Result<(), SyneviError> { - self.as_ref() - .report_config(last_applied, last_applied_hash, host) - .await + async fn report_config(&self, host: String) -> Result<(), SyneviError> { + self.as_ref().report_config(host).await } } @@ -156,25 +148,35 @@ where ) -> Result, SyneviError> { self.as_ref().broadcast(request).await } + async fn broadcast_recovery(&self, t0: T0) -> Result { + self.as_ref().broadcast_recovery(t0).await + } } -#[derive(Clone, Debug, Default)] +#[derive(Debug, Default)] pub struct NodeInfo { pub id: Ulid, pub serial: u16, + pub host: String, + pub ready: AtomicBool, +} + +#[derive(Debug)] +pub struct NodeStatus { + pub info: NodeInfo, + pub members_responded: AtomicU32, + pub has_members: AtomicBool, } -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct Member { pub info: NodeInfo, - pub host: String, pub channel: Channel, - pub ready_electorate: bool, } #[derive(Debug)] pub struct MemberWithLatency { - pub member: Arc, + pub member: Member, pub latency: AtomicU64, pub skew: AtomicI64, } @@ -200,27 +202,30 @@ pub enum BroadcastResponse { #[derive(Debug)] pub struct GrpcNetwork { pub socket_addr: SocketAddr, - pub self_info: (NodeInfo, String), - pub members: Arc>>, + pub self_status: Arc, + pub members: Arc, ahash::RandomState>>>, join_set: Mutex>>, } #[derive(Debug)] pub struct GrpcNetworkSet { - members: Vec>, + members: Vec>, } impl GrpcNetwork { - pub fn new(socket_addr: SocketAddr, address: String, node_id: Ulid, node_serial: u16) -> Self { + pub fn new(socket_addr: SocketAddr, host: String, node_id: Ulid, node_serial: u16) -> Self { Self { socket_addr, - self_info: ( - NodeInfo { + self_status: Arc::new(NodeStatus { + info: NodeInfo { id: node_id, serial: node_serial, + host, + ready: AtomicBool::new(false), }, - address, - ), + members_responded: AtomicU32::new(0), + has_members: AtomicBool::new(false), + }), members: Arc::new(RwLock::new(HashMap::default())), join_set: Mutex::new(JoinSet::new()), } @@ -228,13 +233,7 @@ impl GrpcNetwork { pub async fn create_network_set(&self) -> Arc { Arc::new(GrpcNetworkSet { - members: self - .members - .read() - .await - .iter() - .map(|(_, e)| e.member.clone()) - .collect(), + members: self.members.read().await.values().cloned().collect(), }) } } @@ -256,23 +255,47 @@ impl Network for GrpcNetwork { host: String, ready: bool, ) -> Result<(), SyneviError> { - let channel = Channel::from_shared(host.clone())?.connect().await?; + if self.self_status.info.id == id { + return Ok(()); + } + let endpoint = Channel::from_shared(host.clone())?; + // Retry connecting to member + let mut backoff = 0u64; + let channel = loop { + match endpoint.connect().await { + Ok(channel) => break channel, + Err(e) => { + if backoff < 5 { + backoff += 1; + } else { + tracing::error!("Backoff limit reached, connecting to member"); + return Err(SyneviError::TonicTransportError(e)); + } + tracing::error!("Error connecting to member: {:?}", e); + tokio::time::sleep(tokio::time::Duration::from_secs(backoff)).await; + } + } + }; let mut writer = self.members.write().await; if writer.get(&id).is_none() { writer.insert( id, - MemberWithLatency { - member: Arc::new(Member { - info: NodeInfo { id, serial }, - host, + Arc::new(MemberWithLatency { + member: Member { + info: NodeInfo { + id, + serial, + host, + ready: AtomicBool::new(ready), + }, channel, - ready_electorate: ready, - }), + }, latency: AtomicU64::new(500), skew: AtomicI64::new(0), - }, + }), ); } + self.self_status.has_members.store(true, Ordering::Relaxed); Ok(()) } @@ -320,15 +343,20 @@ impl Network for GrpcNetwork { (max_latency) - (node_latency / 2) } - async fn get_member_len(&self) -> u32 { - (self.members.read().await.len() + 1) as u32 + async fn get_members(&self) -> Vec> { + self.members.read().await.values().cloned().collect() } - async fn broadcast_config(&self, host: String) -> Result<(u32, Vec), SyneviError> { + fn get_node_status(&self) -> Arc { + self.self_status.clone() + } + + async fn join_electorate(&self, host: String) -> Result { let config = Config { - node_serial: self.self_info.0.serial as u32, - node_id: self.self_info.0.id.to_bytes().to_vec(), - host: self.self_info.1.clone(), + node_serial: self.self_status.info.serial as u32, + node_id: self.self_status.info.id.to_bytes().to_vec(), + host: self.self_status.info.host.clone(), + ready: self.self_status.info.ready.load(Ordering::Relaxed), }; let channel = Channel::from_shared(host)?.connect().await?; let request = tonic::Request::new(JoinElectorateRequest { @@ -337,46 +365,50 @@ impl Network for GrpcNetwork { let mut client = ReconfigurationServiceClient::new(channel); let response = client.join_electorate(request).await?; let response = response.into_inner(); - Ok((response.majority, response.self_event)) + Ok(response.member_count) } - async fn report_config( - &self, - last_applied: T, - last_applied_hash: [u8; 32], - host: String, - ) -> Result<(), SyneviError> { + async fn report_config(&self, host: String) -> Result<(), SyneviError> { let config = Config { - node_serial: self.self_info.0.serial as u32, - node_id: self.self_info.0.id.to_bytes().to_vec(), - host: self.self_info.1.clone(), + node_serial: self.self_status.info.serial as u32, + node_id: self.self_status.info.id.to_bytes().to_vec(), + host: self.self_status.info.host.clone(), + ready: self.self_status.info.ready.load(Ordering::Relaxed), }; + + let mut configs: Vec<_> = self + .members + .read() + .await + .iter() + .map(|(_, m)| Config { + node_serial: m.member.info.serial as u32, + node_id: m.member.info.id.to_bytes().to_vec(), + host: m.member.info.host.clone(), + ready: m.member.info.ready.load(Ordering::Relaxed), + }) + .collect(); + configs.push(config); + let channel = Channel::from_shared(host)?.connect().await?; - let request = tonic::Request::new(ReportLastAppliedRequest { - config: Some(config), - last_applied: last_applied.into(), - last_applied_hash: last_applied_hash.into(), - }); + let request = tonic::Request::new(ReportElectorateRequest { configs }); let mut client = InitServiceClient::new(channel); - let _res = client.report_last_applied(request).await?; + let _res = client.report_electorate(request).await?; Ok(()) } async fn get_stream_events( &self, last_applied: Vec, - self_event: Vec, ) -> Result, SyneviError> { let lock = self.members.read().await; let mut members = lock.iter(); - let Some((_, member)) = members.find(|(_, m)| m.member.ready_electorate) else { + let Some((_, member)) = members.find(|(_, m)| m.member.info.ready.load(Ordering::Relaxed)) + else { return Err(SyneviError::NoMembersFound); }; let channel = member.member.channel.clone(); - let request = GetEventRequest { - last_applied, - self_event, - }; + let request = GetEventRequest { last_applied }; let (sdx, rcv) = tokio::sync::mpsc::channel(200); tokio::spawn(async move { @@ -402,16 +434,11 @@ impl Network for GrpcNetwork { Ok(rcv) } - async fn ready_electorate(&self) -> Result<(), SyneviError> { - let lock = self.members.read().await; - let mut members = lock.iter(); - let Some((_, member)) = members.next() else { - return Err(SyneviError::NoMembersFound); - }; - let channel = member.member.channel.clone(); + async fn ready_electorate(&self, host: String) -> Result<(), SyneviError> { + let channel = Channel::from_shared(host.clone())?.connect().await?; let request = tonic::Request::new(ReadyElectorateRequest { - node_id: self.self_info.0.id.to_bytes().to_vec(), - node_serial: self.self_info.0.serial as u32, + node_id: self.self_status.info.id.to_bytes().to_vec(), + node_serial: self.self_status.info.serial as u32, }); let mut client = ReconfigurationServiceClient::new(channel); let _res = client.ready_electorate(request).await?.into_inner(); @@ -419,15 +446,9 @@ impl Network for GrpcNetwork { } async fn ready_member(&self, id: Ulid, _serial: u16) -> Result<(), SyneviError> { - let mut lock = self.members.write().await; - if let Some(member) = lock.get_mut(&id) { - let new_member = Member { - info: member.member.info.clone(), - host: member.member.host.clone(), - channel: member.member.channel.clone(), - ready_electorate: true, - }; - member.member = Arc::new(new_member); + let lock = self.members.read().await; + if let Some(member) = lock.get(&id) { + member.member.info.ready.store(true, Ordering::Relaxed); } else { return Err(SyneviError::NoMembersFound); } @@ -452,8 +473,8 @@ impl NetworkInterface for GrpcNetworkSet { BroadcastRequest::PreAccept(req, serial) => { // ... and then iterate over every member ... for replica in &self.members { - let ready = replica.ready_electorate; - let channel = replica.channel.clone(); + let ready = replica.member.info.ready.load(Ordering::Relaxed); + let channel = replica.member.channel.clone(); let inner = req.clone(); let mut request = tonic::Request::new(inner); request.metadata_mut().append( @@ -474,8 +495,8 @@ impl NetworkInterface for GrpcNetworkSet { } BroadcastRequest::Accept(req) => { for replica in &self.members { - let ready = replica.ready_electorate; - let channel = replica.channel.clone(); + let ready = replica.member.info.ready.load(Ordering::Relaxed); + let channel = replica.member.channel.clone(); let request = req.clone(); responses.spawn(async move { let mut client = ConsensusTransportClient::new(channel); @@ -488,8 +509,8 @@ impl NetworkInterface for GrpcNetworkSet { } BroadcastRequest::Commit(req) => { for replica in &self.members { - let ready = replica.ready_electorate; - let channel = replica.channel.clone(); + let ready = replica.member.info.ready.load(Ordering::Relaxed); + let channel = replica.member.channel.clone(); let request = req.clone(); responses.spawn(async move { let mut client = ConsensusTransportClient::new(channel); @@ -503,8 +524,8 @@ impl NetworkInterface for GrpcNetworkSet { BroadcastRequest::Apply(req) => { await_majority = false; for replica in &self.members { - let ready = replica.ready_electorate; - let channel = replica.channel.clone(); + let ready = replica.member.info.ready.load(Ordering::Relaxed); + let channel = replica.member.channel.clone(); let request = req.clone(); responses.spawn(async move { let mut client = ConsensusTransportClient::new(channel); @@ -520,8 +541,8 @@ impl NetworkInterface for GrpcNetworkSet { broadcast_all = true; for replica in &self.members { // TODO: Not sure if neccessary - let ready = replica.ready_electorate; - let channel = replica.channel.clone(); + let ready = replica.member.info.ready.load(Ordering::Relaxed); + let channel = replica.member.channel.clone(); let request = req.clone(); responses.spawn(async move { let mut client = ConsensusTransportClient::new(channel); @@ -537,7 +558,7 @@ impl NetworkInterface for GrpcNetworkSet { let all = self .members .iter() - .filter(|member| member.ready_electorate) + .filter(|member| member.member.info.ready.load(Ordering::Relaxed)) .count(); let majority = if all == 0 { 0 } else { (all / 2) + 1 }; let mut counter = 0_usize; @@ -559,12 +580,10 @@ impl NetworkInterface for GrpcNetworkSet { } Ok(Err(e)) => { tracing::error!("Error in response: {:?}", e); - println!("Error in response: {:?}", e); continue; } Err(_) => { tracing::error!("Join error"); - println!("Join error"); continue; } }; @@ -581,39 +600,75 @@ impl NetworkInterface for GrpcNetworkSet { result.push(res); } _ => { - println!("Recover: Join error"); tracing::error!("Recover: Join error"); continue; } } } } else { - //tokio::spawn(async move { - while let Some(r) = &responses.join_next().await { - match r { - Ok(Err(e)) => { - println!("Apply: Error in response: {:?}", e); - tracing::error!("Apply: Error in response: {:?}", e); - continue; - } - Err(_) => { - println!("Apply: Join error"); - tracing::error!("Apply: Join error"); - continue; - } - _ => {} - }; - } + tokio::spawn(async move { + while let Some(r) = &responses.join_next().await { + match r { + Ok(Err(e)) => { + tracing::error!("Apply: Error in response: {:?}", e); + continue; + } + Err(_) => { + tracing::error!("Apply: Join error"); + continue; + } + _ => {} + }; + } + }); + //}); return Ok(result); // No majority needed -> return early } } if result.len() < majority && !self.members.is_empty() { - println!("Majority not reached: {:?}/{}", result, majority); - println!("Members: {:?}", &self.members); + error!("Majority not reached: {:?}/{}", result, majority); + error!("Members: {:?}", &self.members); return Err(SyneviError::MajorityNotReached); } Ok(result) } + + async fn broadcast_recovery(&self, t0: T0) -> Result { + let mut responses: JoinSet> = JoinSet::new(); + let inner_request = TryRecoveryRequest { + timestamp_zero: t0.into(), + }; + for replica in &self.members { + let channel = replica.member.channel.clone(); + let request = tonic::Request::new(inner_request.clone()); + responses.spawn(async move { + let mut client = ConsensusTransportClient::new(channel); + let result = client.try_recovery(request).await?.into_inner().accepted; + Ok(result) + }); + } + + let mut counter = 0; + while let Some(result) = responses.join_next().await { + match result { + Ok(Ok(true)) => return Ok(true), + Ok(Ok(false)) => { + counter += 1; + continue; + } + errors => { + tracing::error!("Error in broadcast try_recovery: {:?}", errors); + continue; + } + } + } + + if counter > (self.members.len() / 2) { + Ok(false) + } else { + Err(SyneviError::UnrecoverableTransaction) + } + } } diff --git a/synevi_network/src/protos/configure_transport.proto b/synevi_network/src/protos/configure_transport.proto index 34307c5..48e41d2 100644 --- a/synevi_network/src/protos/configure_transport.proto +++ b/synevi_network/src/protos/configure_transport.proto @@ -22,28 +22,6 @@ message GetTimeResponse { // Skew ~ LocalTimestamp - (Timestamp + RoundTripLatency / 2) -> Between Node Get and Node Respond -service BroadcastService { - rpc RestartNode(RestartNodeRequest) returns (RestartNodeResponse) {} - rpc CustomBroadcast(CustomBroadcastRequest) returns (CustomBroadcastResponse) {} -} - -message RestartNodeRequest { - bytes last_applied = 1; // if new T0::default, else last_applied - bytes last_applied_hash = 2; -} - -message RestartNodeResponse { - bytes event = 1; // encoded events -} - -message CustomBroadcastRequest { - bytes message = 1; -} - -message CustomBroadcastResponse {} - - - service ReconfigurationService { rpc JoinElectorate(JoinElectorateRequest) returns (JoinElectorateResponse) {} rpc GetEvents(GetEventRequest) returns (stream GetEventResponse) {} @@ -58,16 +36,15 @@ message Config { uint32 node_serial = 1; bytes node_id = 2; string host = 3; + bool ready = 4; } message JoinElectorateResponse { - uint32 majority = 1; - bytes self_event = 2; + uint32 member_count = 1; } message GetEventRequest { bytes last_applied = 1; - bytes self_event = 2; } message GetEventResponse { @@ -93,14 +70,12 @@ message ReadyElectorateResponse {} service InitService { - rpc ReportLastApplied(ReportLastAppliedRequest) returns (ReportLastAppliedResponse) {} + rpc ReportElectorate(ReportElectorateRequest) returns (ReportElectorateResponse) {} } -message ReportLastAppliedRequest { - Config config = 1; - bytes last_applied = 2; - bytes last_applied_hash = 3; +message ReportElectorateRequest { + repeated Config configs = 1; } -message ReportLastAppliedResponse {} +message ReportElectorateResponse {} diff --git a/synevi_network/src/protos/consensus_transport.proto b/synevi_network/src/protos/consensus_transport.proto index 5447781..08356ae 100644 --- a/synevi_network/src/protos/consensus_transport.proto +++ b/synevi_network/src/protos/consensus_transport.proto @@ -8,6 +8,7 @@ service ConsensusTransport { rpc Accept(AcceptRequest) returns (AcceptResponse) {} rpc Apply(ApplyRequest) returns (ApplyResponse) {} rpc Recover(RecoverRequest) returns (RecoverResponse) {} + rpc TryRecovery(TryRecoveryRequest) returns (TryRecoveryResponse) {} } message PreAcceptRequest { @@ -86,3 +87,11 @@ message RecoverResponse { bytes timestamp = 5; bytes nack = 6; } + +message TryRecoveryRequest { + bytes timestamp_zero = 1; +} + +message TryRecoveryResponse { + bool accepted = 1; // True if recovery can be initiated else false +} diff --git a/synevi_network/src/reconfiguration.rs b/synevi_network/src/reconfiguration.rs index b8776b6..0bc4a38 100644 --- a/synevi_network/src/reconfiguration.rs +++ b/synevi_network/src/reconfiguration.rs @@ -1,63 +1,11 @@ -use crate::{ - configure_transport::{ - GetEventRequest, GetEventResponse, - JoinElectorateRequest, JoinElectorateResponse, ReadyElectorateRequest, - ReadyElectorateResponse, ReportLastAppliedRequest, ReportLastAppliedResponse, - }, - consensus_transport::{ - ApplyRequest, CommitRequest, - }, -}; -use std::{collections::BTreeMap, sync::Arc}; -use synevi_types::{SyneviError, T, T0}; -use tokio::sync::{ - mpsc::{channel, Receiver, Sender}, - Mutex, +use crate::configure_transport::{ + GetEventRequest, GetEventResponse, JoinElectorateRequest, JoinElectorateResponse, + ReadyElectorateRequest, ReadyElectorateResponse, ReportElectorateRequest, + ReportElectorateResponse, }; +use synevi_types::{SyneviError, T}; use ulid::Ulid; -pub struct ReplicaBuffer { - inner: Arc>>, - _notifier: Sender, -} - -impl ReplicaBuffer { - pub fn new(sdx: Sender) -> Self { - ReplicaBuffer { - inner: Arc::new(Mutex::new(BTreeMap::new())), - _notifier: sdx, - } - } - - pub async fn send_buffered( - &self, - ) -> Result>, SyneviError> { - let (sdx, rcv) = channel(100); - let inner = self.inner.clone(); - tokio::spawn(async move { - loop { - let mut lock = inner.lock().await; - if let Some(event) = lock.pop_first() { - sdx.send(Some(event)).await.map_err(|_| { - SyneviError::SendError( - "Channel for receiving buffered messages closed".to_string(), - ) - })?; - } else { - sdx.send(None).await.map_err(|_| { - SyneviError::SendError( - "Channel for receiving buffered messages closed".to_string(), - ) - })?; - break; - } - } - Ok::<(), SyneviError>(()) - }); - Ok(rcv) - } -} - #[async_trait::async_trait] pub trait Reconfiguration { // Existing nodes @@ -75,16 +23,10 @@ pub trait Reconfiguration { ) -> Result; // Joining node - async fn report_last_applied( + async fn report_electorate( &self, - request: ReportLastAppliedRequest, - ) -> Result; -} - -#[derive(Debug, Clone)] -pub enum BufferedMessage { - Commit(CommitRequest), - Apply(ApplyRequest), + request: ReportElectorateRequest, + ) -> Result; } #[derive(Debug)] diff --git a/synevi_network/src/replica.rs b/synevi_network/src/replica.rs index 7d33dfd..0743790 100644 --- a/synevi_network/src/replica.rs +++ b/synevi_network/src/replica.rs @@ -3,7 +3,7 @@ use crate::{ init_service_server::InitService, reconfiguration_service_server::ReconfigurationService, time_service_server::TimeService, GetEventRequest, GetEventResponse, GetTimeRequest, GetTimeResponse, JoinElectorateRequest, JoinElectorateResponse, ReadyElectorateRequest, - ReadyElectorateResponse, ReportLastAppliedRequest, ReportLastAppliedResponse, + ReadyElectorateResponse, ReportElectorateRequest, ReportElectorateResponse, }, consensus_transport::*, reconfiguration::Reconfiguration, @@ -22,31 +22,20 @@ pub trait Replica: Send + Sync { &self, request: PreAcceptRequest, node_serial: u16, - ready: bool, ) -> Result; - async fn accept( - &self, - request: AcceptRequest, - ready: bool, - ) -> Result; + async fn accept(&self, request: AcceptRequest) -> Result; - async fn commit( - &self, - request: CommitRequest, - ready: bool, - ) -> Result; + async fn commit(&self, request: CommitRequest) -> Result; - async fn apply(&self, request: ApplyRequest, ready: bool) - -> Result; + async fn apply(&self, request: ApplyRequest) -> Result; - async fn recover( - &self, - request: RecoverRequest, - ready: bool, - ) -> Result; + async fn recover(&self, request: RecoverRequest) -> Result; - fn is_ready(&self) -> bool; + async fn try_recover( + &self, + request: TryRecoveryRequest, + ) -> Result; } pub struct ReplicaBox @@ -136,7 +125,7 @@ where Ok(Response::new( self.inner - .pre_accept(request, serial, self.inner.is_ready()) + .pre_accept(request, serial) .await .map_err(|e| Status::internal(e.to_string()))?, )) @@ -148,7 +137,7 @@ where ) -> Result, Status> { Ok(Response::new( self.inner - .accept(request.into_inner(), self.inner.is_ready()) + .accept(request.into_inner()) .await .map_err(|e| tonic::Status::internal(e.to_string()))?, )) @@ -160,7 +149,7 @@ where ) -> Result, Status> { Ok(Response::new( self.inner - .commit(request.into_inner(), self.inner.is_ready()) + .commit(request.into_inner()) .await .map_err(|e| tonic::Status::internal(e.to_string()))?, )) @@ -172,7 +161,7 @@ where ) -> Result, Status> { Ok(Response::new( self.inner - .apply(request.into_inner(), self.inner.is_ready()) + .apply(request.into_inner()) .await .map_err(|e| tonic::Status::internal(e.to_string()))?, )) @@ -184,7 +173,19 @@ where ) -> Result, Status> { Ok(Response::new( self.inner - .recover(request.into_inner(), self.inner.is_ready()) + .recover(request.into_inner()) + .await + .map_err(|e| tonic::Status::internal(e.to_string()))?, + )) + } + + async fn try_recovery( + &self, + request: Request, + ) -> Result, Status> { + Ok(Response::new( + self.inner + .try_recover(request.into_inner()) .await .map_err(|e| tonic::Status::internal(e.to_string()))?, )) @@ -246,13 +247,13 @@ impl ReconfigurationService for ReplicaB } #[async_trait::async_trait] impl InitService for ReplicaBox { - async fn report_last_applied( + async fn report_electorate( &self, - request: tonic::Request, - ) -> Result, tonic::Status> { + request: tonic::Request, + ) -> Result, tonic::Status> { Ok(Response::new( self.inner - .report_last_applied(request.into_inner()) + .report_electorate(request.into_inner()) .await .map_err(|e| tonic::Status::internal(e.to_string()))?, )) diff --git a/synevi_persistence/Cargo.toml b/synevi_persistence/Cargo.toml index 27aba4e..25cc2af 100644 --- a/synevi_persistence/Cargo.toml +++ b/synevi_persistence/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true description.workspace = true [dependencies] -monotime = { path = "../monotime", version = "0.1.0"} +monotime = { path = "../monotime", version = "0.1.0" } synevi_types = { path = "../synevi_types", version = "0.1.0" } bytes = { workspace = true } tokio = { workspace = true } @@ -26,3 +26,4 @@ thiserror = { workspace = true } async-channel = "2.3.1" ahash = { workspace = true } bincode = "1.3.3" +redb = { version = "2.1.4" } diff --git a/synevi_persistence/src/lib.rs b/synevi_persistence/src/lib.rs index 3800e03..e287d2e 100644 --- a/synevi_persistence/src/lib.rs +++ b/synevi_persistence/src/lib.rs @@ -1,2 +1,3 @@ -pub mod database; +pub mod lmdb_store; pub mod mem_store; +pub mod redb_store; diff --git a/synevi_persistence/src/database.rs b/synevi_persistence/src/lmdb_store.rs similarity index 60% rename from synevi_persistence/src/database.rs rename to synevi_persistence/src/lmdb_store.rs index c47150e..4e898a1 100644 --- a/synevi_persistence/src/database.rs +++ b/synevi_persistence/src/lmdb_store.rs @@ -2,48 +2,57 @@ use ahash::RandomState; use heed::{ byteorder::BigEndian, types::{SerdeBincode, U128}, - Database, Env, EnvOpenOptions, + Database, Env, EnvFlags, EnvOpenOptions, }; use monotime::MonoTime; -use std::collections::{BTreeMap, HashSet}; +use std::{ + collections::{BTreeMap, HashSet}, + sync::{Arc, Mutex}, +}; use synevi_types::{ error::SyneviError, traits::Store, types::{Event, Hashes, RecoverDependencies, RecoverEvent, UpsertEvent}, Ballot, State, T, T0, }; -use tokio::sync::{mpsc::Receiver, Mutex}; +use tokio::sync::mpsc::Receiver; use tracing::instrument; const EVENT_DB_NAME: &str = "events"; +const ID_MAPPINGS_DB_NAME: &str = "id_mappings"; type EventDb = Database, SerdeBincode>; -#[derive(Debug)] -pub struct PersistentStore { - data: Mutex, +#[derive(Clone, Debug)] +pub struct LmdbStore { + data: Arc>, } #[derive(Clone, Debug)] -struct MutableData { +struct InternalData { db: Env, + events: EventDb, + id_mappings: Database, U128>, pub(crate) mappings: BTreeMap, // Key: t, value t0 pub last_applied: T, // t of last applied entry - pub(crate) latest_t0: T0, // last created or recognized t0 + pub(crate) latest_time: MonoTime, // last created or recognized t0 pub node_serial: u16, latest_hash: [u8; 32], } -impl PersistentStore { - pub fn new(path: String, node_serial: u16) -> Result { +impl LmdbStore { + pub fn new(path: String, node_serial: u16) -> Result { let env = unsafe { EnvOpenOptions::new() - .map_size(1024 * 1024 * 1024) + .map_size(10 * 1024 * 1024 * 1024) .max_dbs(16) + .flags(EnvFlags::MAP_ASYNC | EnvFlags::WRITE_MAP) .open(path)? }; let env_clone = env.clone(); let mut write_txn = env.write_txn()?; let events_db: Option = env.open_database(&write_txn, Some(EVENT_DB_NAME))?; + let id_mappings: Database<_, _> = + env.create_database(&mut write_txn, Some(ID_MAPPINGS_DB_NAME))?; match events_db { Some(db) => { let result = db @@ -59,7 +68,7 @@ impl PersistentStore { let mut mappings = BTreeMap::default(); let mut last_applied = T::default(); - let mut latest_t0 = T0::default(); + let mut latest_time = MonoTime::default(); let mut latest_hash: [u8; 32] = [0; 32]; for event in result { mappings.insert(event.t, event.t_zero); @@ -71,50 +80,56 @@ impl PersistentStore { return Err(SyneviError::MissingTransactionHash); }; } - if event.t_zero > latest_t0 { - latest_t0 = event.t_zero; + if *event.t > latest_time { + latest_time = *event.t; } } write_txn.commit()?; - Ok(PersistentStore { + Ok(LmdbStore { //db: env_clone, - data: Mutex::new(MutableData { + data: Arc::new(Mutex::new(InternalData { db: env_clone, + events: db, + id_mappings, mappings, last_applied, - latest_t0, + latest_time, node_serial, latest_hash, - }), + })), }) } None => { - let _: EventDb = env.create_database(&mut write_txn, Some(EVENT_DB_NAME))?; + let events: EventDb = env.create_database(&mut write_txn, Some(EVENT_DB_NAME))?; write_txn.commit()?; - Ok(PersistentStore { - data: Mutex::new(MutableData { + Ok(LmdbStore { + data: Arc::new(Mutex::new(InternalData { db: env_clone, + events, + id_mappings, mappings: BTreeMap::default(), last_applied: T::default(), - latest_t0: T0::default(), + latest_time: MonoTime::default(), node_serial, latest_hash: [0; 32], - }), + })), }) } } } } -#[async_trait::async_trait] -impl Store for PersistentStore { +impl Store for LmdbStore { #[instrument(level = "trace")] - async fn init_t_zero(&self, node_serial: u16) -> T0 { - self.data.lock().await.init_t_zero(node_serial).await + fn init_t_zero(&self, node_serial: u16) -> T0 { + self.data + .lock() + .expect("poisoned lock, aborting") + .init_t_zero(node_serial) } #[instrument(level = "trace")] - async fn pre_accept_tx( + fn pre_accept_tx( &self, id: u128, t_zero: T0, @@ -122,127 +137,160 @@ impl Store for PersistentStore { ) -> Result<(T, HashSet), SyneviError> { self.data .lock() - .await + .expect("poisoned lock, aborting") .pre_accept_tx(id, t_zero, transaction) - .await } #[instrument(level = "trace")] - async fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> HashSet { - self.data.lock().await.get_tx_dependencies(t, t_zero).await + fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> HashSet { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_tx_dependencies(t, t_zero) } #[instrument(level = "trace")] - async fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { + fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { self.data .lock() - .await + .expect("poisoned lock, aborting") .accept_tx_ballot(t_zero, ballot) - .await } #[instrument(level = "trace", skip(self))] - async fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { - self.data.lock().await.upsert_tx(upsert_event).await + fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .upsert_tx(upsert_event) } #[instrument(level = "trace")] - async fn get_recover_deps(&self, t_zero: &T0) -> Result { - self.data.lock().await.get_recover_deps(t_zero).await + fn get_recover_deps(&self, t_zero: &T0) -> Result { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_recover_deps(t_zero) } #[instrument(level = "trace")] - async fn get_event_state(&self, t_zero: &T0) -> Option { - self.data.lock().await.get_event_state(t_zero).await + fn get_event_state(&self, t_zero: &T0) -> Option { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_event_state(t_zero) } #[instrument(level = "trace")] - async fn recover_event( + fn recover_event( &self, t_zero_recover: &T0, node_serial: u16, - ) -> Result { + ) -> Result, SyneviError> { self.data .lock() - .await + .expect("poisoned lock, aborting") .recover_event(t_zero_recover, node_serial) - .await } #[instrument(level = "trace")] - async fn get_event_store(&self) -> BTreeMap { - self.data.lock().await.get_event_store().await + fn get_event_store(&self) -> BTreeMap { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_event_store() } #[instrument(level = "trace")] - async fn last_applied(&self) -> (T, T0) { - self.data.lock().await.last_applied().await + fn last_applied(&self) -> (T, T0) { + self.data + .lock() + .expect("poisoned lock, aborting") + .last_applied() } #[instrument(level = "trace")] - async fn get_events_after( + fn get_events_after( &self, last_applied: T, - self_event: u128, ) -> Result>, SyneviError> { self.data .lock() - .await - .get_events_after(last_applied, self_event) - .await + .expect("poisoned lock, aborting") + .get_events_after(last_applied) } #[instrument(level = "trace", skip(self))] - async fn get_event(&self, t_zero: T0) -> Result, SyneviError> { - self.data.lock().await.get_event(t_zero).await - } - - async fn get_and_update_hash( - &self, - t_zero: T0, - execution_hash: [u8; 32], - ) -> Result { + fn get_event(&self, t_zero: T0) -> Result, SyneviError> { self.data .lock() - .await - .get_and_update_hash(t_zero, execution_hash) - .await + .expect("poisoned lock, aborting") + .get_event(t_zero) } - #[instrument(level = "trace", skip(self))] - async fn last_applied_hash(&self) -> Result<(T, [u8; 32]), SyneviError> { - self.data.lock().await.last_applied_hash().await + fn inc_time_with_guard(&self, guard: T0) -> Result<(), SyneviError> { + let mut lock = self.data.lock().expect("poisoned lock, aborting"); + lock.latest_time = lock + .latest_time + .next_with_guard_and_node(&guard, lock.node_serial) + .into_time(); + Ok(()) + } + + fn get_or_update_transaction_hash(&self, event: UpsertEvent) -> Result { + let lock = self.data.lock().expect("poisoned lock, aborting"); + if let Some(event) = lock.get_event(event.t_zero)? { + if event.state == State::Applied { + if let Some(hashes) = event.hashes { + return Ok(hashes); + } + } + } + let mut event = Event::from(event); + event.state = State::Applied; + Ok(event.hash_event(lock.latest_hash)) + } + + fn get_event_by_id(&self, id: u128) -> Result, SyneviError> { + let lock = self.data.lock().expect("poisoned lock, aborting"); + let read_txn = lock.db.read_txn()?; + let Some(mapping) = lock.id_mappings.get(&read_txn, &id)? else { + return Ok(None); + }; + let result = lock.events.get(&read_txn, &mapping).map_err(Into::into); + read_txn.commit()?; + result } } -impl MutableData { + +impl InternalData { #[instrument(level = "trace")] - async fn init_t_zero(&mut self, node_serial: u16) -> T0 { - let t0 = T0(self.latest_t0.next_with_node(node_serial).into_time()); - self.latest_t0 = t0; - t0 + fn init_t_zero(&mut self, node_serial: u16) -> T0 { + let next_time = self.latest_time.next_with_node(node_serial).into_time(); + self.latest_time = next_time; + T0(next_time) } #[instrument(level = "trace")] - async fn pre_accept_tx( + fn pre_accept_tx( &mut self, id: u128, t_zero: T0, transaction: Vec, ) -> Result<(T, HashSet), SyneviError> { let (t, deps) = { - let t = T(if let Some((last_t, _)) = self.mappings.last_key_value() { - if **last_t > *t_zero { - t_zero - .next_with_guard_and_node(last_t, self.node_serial) - .into_time() - } else { - *t_zero - } + let t = if self.latest_time > *t_zero { + let new_time_t = t_zero + .next_with_guard_and_node(&self.latest_time, self.node_serial) + .into_time(); + + self.latest_time = new_time_t; + T(new_time_t) } else { - // No entries in the map -> insert the new event - *t_zero - }); - let deps = self.get_tx_dependencies(&t, &t_zero).await; + T(*t_zero) + }; + // This might not be necessary to re-use the write lock here + let deps = self.get_tx_dependencies(&t, &t_zero); (t, deps) }; @@ -255,12 +303,13 @@ impl MutableData { dependencies: Some(deps.clone()), ..Default::default() }; - self.upsert_tx(event).await?; + self.upsert_tx(event)?; + //self.db.force_sync()?; Ok((t, deps)) } #[instrument(level = "trace")] - async fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> HashSet { + fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> HashSet { if self.last_applied == *t { return HashSet::default(); } @@ -286,41 +335,43 @@ impl MutableData { } #[instrument(level = "trace")] - async fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { + fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { let mut write_txn = self.db.write_txn().ok()?; - let events_db: EventDb = self - .db - .open_database(&write_txn, Some(EVENT_DB_NAME)) - .ok()??; - let mut event = events_db.get(&write_txn, &t_zero.get_inner()).ok()??; + let mut event = self.events.get(&write_txn, &t_zero.get_inner()).ok()??; if event.ballot < ballot { event.ballot = ballot; - let _ = events_db.put(&mut write_txn, &t_zero.get_inner(), &event); + let _ = self.events.put(&mut write_txn, &t_zero.get_inner(), &event); } write_txn.commit().ok()?; + //self.db.force_sync().ok()?; Some(event.ballot) } #[instrument(level = "trace", skip(self))] - async fn upsert_tx(&mut self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { + fn upsert_tx(&mut self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { //let db = self.db.clone(); + // Update the latest time + if self.latest_time < *upsert_event.t { + self.latest_time = *upsert_event.t; + } + let mut write_txn = self.db.write_txn()?; - let events_db: EventDb = self - .db - .open_database(&write_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; + let events_db: EventDb = self.events; + let event = events_db.get(&write_txn, &upsert_event.t_zero.get_inner())?; let Some(mut event) = event else { let mut event = Event::from(upsert_event.clone()); - // Update the latest t0 - if self.latest_t0 < event.t_zero { - self.latest_t0 = event.t_zero; - } + // Not an update -> Add events id mapping + self.id_mappings.put( + &mut write_txn, + &upsert_event.id, + &upsert_event.t_zero.get_inner(), + )?; if matches!(event.state, State::Applied) { self.mappings.insert(event.t, event.t_zero); @@ -340,11 +391,14 @@ impl MutableData { } let last_t = self.last_applied; + // Safeguard assert!(last_t < event.t); self.last_applied = event.t; - let hashes = event.hash_event(self.latest_hash); + let hashes = upsert_event + .hashes + .ok_or_else(|| SyneviError::MissingExecutionHash)?; self.latest_hash = hashes.transaction_hash; event.hashes = Some(hashes.clone()); @@ -357,11 +411,6 @@ impl MutableData { return Ok(()); }; - // Update the latest t0 - if self.latest_t0 < event.t_zero { - self.latest_t0 = event.t_zero; - } - // Do not update to a "lower" state if upsert_event.state < event.state { write_txn.commit()?; @@ -396,11 +445,16 @@ impl MutableData { if event.state == State::Applied { let last_t = self.last_applied; - // Safeguard + + if last_t > event.t { + println!("last_t: {:?}, event.t: {:?}", last_t, event.t); + } assert!(last_t < event.t); self.last_applied = event.t; - let hashes = event.hash_event(self.latest_hash); + let hashes = upsert_event + .hashes + .ok_or_else(|| SyneviError::MissingExecutionHash)?; self.latest_hash = hashes.transaction_hash; event.hashes = Some(hashes.clone()); }; @@ -414,13 +468,10 @@ impl MutableData { } #[instrument(level = "trace")] - async fn get_recover_deps(&self, t_zero: &T0) -> Result { + fn get_recover_deps(&self, t_zero: &T0) -> Result { let read_txn = self.db.read_txn()?; - let db: EventDb = self - .db - .open_database(&read_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; - let timestamp = db + let timestamp = self + .events .get(&read_txn, &t_zero.get_inner())? .ok_or_else(|| SyneviError::EventNotFound(t_zero.get_inner()))? .t; @@ -430,7 +481,8 @@ impl MutableData { }; for (t_dep, t_zero_dep) in self.mappings.range(self.last_applied..) { - let dep_event = db + let dep_event = self + .events .get(&read_txn, &t_zero_dep.get_inner())? .ok_or_else(|| SyneviError::DependencyNotFound(t_zero_dep.get_inner()))?; match dep_event.state { @@ -450,7 +502,7 @@ impl MutableData { } } } - State::Commited => { + State::Committed => { if dep_event .dependencies .iter() @@ -469,51 +521,47 @@ impl MutableData { recover_deps.dependencies.insert(*t_zero_dep); } } + read_txn.commit()?; Ok(recover_deps) } - async fn get_event_state(&self, t_zero: &T0) -> Option { + fn get_event_state(&self, t_zero: &T0) -> Option { let read_txn = self.db.read_txn().ok()?; - let db: EventDb = self - .db - .open_database(&read_txn, Some(EVENT_DB_NAME)) - .ok()??; - let state = db + let state = self + .events .get(&read_txn, &t_zero.get_inner()) .ok()? .ok_or_else(|| SyneviError::EventNotFound(t_zero.get_inner())) .ok()? .state; + read_txn.commit().ok()?; Some(state) } - async fn recover_event( + fn recover_event( &self, t_zero_recover: &T0, node_serial: u16, - ) -> Result { - let Some(state) = self.get_event_state(t_zero_recover).await else { - return Err(SyneviError::EventNotFound(t_zero_recover.get_inner())); + ) -> Result, SyneviError> { + let Some(state) = self.get_event_state(t_zero_recover) else { + return Ok(None); }; if matches!(state, synevi_types::State::Undefined) { return Err(SyneviError::UndefinedRecovery); } let mut write_txn = self.db.write_txn()?; - let db: EventDb = self - .db - .open_database(&write_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; - let event = db.get(&write_txn, &t_zero_recover.get_inner())?; + let event = self.events.get(&write_txn, &t_zero_recover.get_inner())?; if let Some(mut event) = event { event.ballot = Ballot(event.ballot.next_with_node(node_serial).into_time()); - db.put(&mut write_txn, &t_zero_recover.get_inner(), &event)?; + self.events + .put(&mut write_txn, &t_zero_recover.get_inner(), &event)?; write_txn.commit()?; - Ok(RecoverEvent { + Ok(Some(RecoverEvent { id: event.id, t_zero: event.t_zero, t: event.t, @@ -521,22 +569,18 @@ impl MutableData { transaction: event.transaction.clone(), dependencies: event.dependencies.clone(), ballot: event.ballot, - }) + })) } else { - Err(SyneviError::EventNotFound(t_zero_recover.get_inner())) + write_txn.commit()?; + Ok(None) } } - async fn get_event_store(&self) -> BTreeMap { + fn get_event_store(&self) -> BTreeMap { // TODO: Remove unwrap and change trait result let read_txn = self.db.read_txn().unwrap(); - let events_db: Database, SerdeBincode> = self - .db - .open_database(&read_txn, Some(EVENT_DB_NAME)) - .unwrap() - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME)) - .unwrap(); - let result = events_db + let result = self + .events .iter(&read_txn) .unwrap() .filter_map(|e| { @@ -547,20 +591,20 @@ impl MutableData { } }) .collect::>(); + read_txn.commit().unwrap(); result } - async fn last_applied(&self) -> (T, T0) { - let t = self.last_applied.clone(); + fn last_applied(&self) -> (T, T0) { + let t = self.last_applied; let t0 = self.mappings.get(&t).cloned().unwrap_or(T0::default()); (t, t0) } - async fn get_events_after( + fn get_events_after( &self, last_applied: T, - _self_event: u128, ) -> Result>, SyneviError> { let (sdx, rcv) = tokio::sync::mpsc::channel(200); let db = self.db.clone(); @@ -569,88 +613,28 @@ impl MutableData { None if last_applied == T::default() => T0::default(), _ => return Err(SyneviError::EventNotFound(last_applied.get_inner())), }; + + let events_db = self.events; tokio::task::spawn_blocking(move || { let read_txn = db.read_txn()?; let range = last_applied_t0.get_inner()..; - let events_db: EventDb = db - .open_database(&read_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; - for result in events_db.range(&read_txn, &range)? { let (_t0, event) = result?; sdx.blocking_send(Ok(event)) .map_err(|e| SyneviError::SendError(e.to_string()))?; } + read_txn.commit()?; Ok::<(), SyneviError>(()) }); Ok(rcv) } - async fn get_event(&self, t_zero: T0) -> Result, SyneviError> { + fn get_event(&self, t_zero: T0) -> Result, SyneviError> { let read_txn = self.db.read_txn()?; - let db: EventDb = self - .db - .open_database(&read_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; - let event = db.get(&read_txn, &t_zero.get_inner())?; + let event = self.events.get(&read_txn, &t_zero.get_inner())?; + read_txn.commit()?; Ok(event) } - - async fn get_and_update_hash( - &self, - t_zero: T0, - execution_hash: [u8; 32], - ) -> Result { - let t_zero = t_zero.get_inner(); - let mut write_txn = self.db.write_txn()?; - let db: EventDb = self - .db - .open_database(&write_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; - let Some(mut event) = db.get(&write_txn, &t_zero)? else { - return Err(SyneviError::EventNotFound(t_zero)); - }; - let Some(mut hashes) = event.hashes else { - return Err(SyneviError::MissingTransactionHash); - }; - hashes.execution_hash = execution_hash; - event.hashes = Some(hashes.clone()); - - db.put(&mut write_txn, &t_zero, &event)?; - write_txn.commit()?; - Ok(hashes) - } - - async fn last_applied_hash(&self) -> Result<(T, [u8; 32]), SyneviError> { - let last = self.last_applied; - let last_t0 = self - .mappings - .get(&last) - .ok_or_else(|| SyneviError::EventNotFound(last.get_inner()))?; - let read_txn = self.db.read_txn()?; - let db: EventDb = self - .db - .open_database(&read_txn, Some(EVENT_DB_NAME))? - .ok_or_else(|| SyneviError::DatabaseNotFound(EVENT_DB_NAME))?; - let event = db - .get(&read_txn, &last_t0.get_inner())? - .ok_or_else(|| SyneviError::EventNotFound(last_t0.get_inner()))? - .hashes - .ok_or_else(|| SyneviError::MissingExecutionHash)?; - Ok((last, event.execution_hash)) - } -} - -#[cfg(test)] -mod tests { - - #[test] - fn test_db() { - // TODO - //let db = Database::new("../../tests/database".to_string()).unwrap(); - //db.init(Bytes::from("key"), Bytes::from("value")) - // .unwrap() - } } diff --git a/synevi_persistence/src/mem_store.rs b/synevi_persistence/src/mem_store.rs index f4e80d8..619df16 100644 --- a/synevi_persistence/src/mem_store.rs +++ b/synevi_persistence/src/mem_store.rs @@ -1,7 +1,8 @@ use ahash::RandomState; -use std::collections::{BTreeMap, HashSet}; +use monotime::MonoTime; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use synevi_types::error::SyneviError; use synevi_types::traits::{Dependencies, Store}; use synevi_types::types::RecoverEvent; @@ -9,32 +10,33 @@ use synevi_types::types::{Event, Hashes, RecoverDependencies, UpsertEvent}; use synevi_types::State; use synevi_types::{Ballot, T, T0}; use tokio::sync::mpsc::{Receiver, Sender}; -use tokio::sync::Mutex; use tracing::instrument; -#[derive(Debug)] -pub struct InternalStore { - pub events: BTreeMap, // Key: t0, value: Event - pub(crate) mappings: BTreeMap, // Key: t, value t0 - pub last_applied: T, // t of last applied entry - pub(crate) latest_t0: T0, // last created or recognized t0 +#[derive(Debug, Clone)] +struct InternalStore { + pub(crate) id_map: HashMap, // Key: id, value: t0 + pub events: BTreeMap, // Key: t0, value: Event + pub(crate) mappings: BTreeMap, // Key: t, value t0 + pub last_applied: T, // t of last applied entry + pub(crate) latest_time: MonoTime, // last created or recognized time pub node_serial: u16, latest_hash: [u8; 32], } #[derive(Debug)] pub struct MemStore { - pub store: Arc>, + store: Arc>, } impl MemStore { #[instrument(level = "trace")] pub fn new(node_serial: u16) -> Result { let store = Arc::new(Mutex::new(InternalStore { + id_map: HashMap::default(), events: BTreeMap::default(), mappings: BTreeMap::default(), last_applied: T::default(), - latest_t0: T0::default(), + latest_time: MonoTime::default(), node_serial, latest_hash: [0; 32], })); @@ -42,13 +44,15 @@ impl MemStore { } } -#[async_trait::async_trait] impl Store for MemStore { - async fn init_t_zero(&self, node_serial: u16) -> T0 { - self.store.lock().await.init_t_zero(node_serial) + fn init_t_zero(&self, node_serial: u16) -> T0 { + self.store + .lock() + .expect("poisoned lock, aborting") + .init_t_zero(node_serial) } - async fn pre_accept_tx( + fn pre_accept_tx( &self, id: u128, t_zero: T0, @@ -56,114 +60,136 @@ impl Store for MemStore { ) -> Result<(T, Dependencies), SyneviError> { self.store .lock() - .await + .expect("poisoned lock, aborting") .pre_accept_tx(id, t_zero, transaction) } - async fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> Dependencies { - self.store.lock().await.get_tx_dependencies(t, t_zero) + fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> Dependencies { + self.store + .lock() + .expect("poisoned lock, aborting") + .get_tx_dependencies(t, t_zero) } - async fn get_recover_deps(&self, t_zero: &T0) -> Result { - self.store.lock().await.get_recover_deps(t_zero) + fn get_recover_deps(&self, t_zero: &T0) -> Result { + self.store + .lock() + .expect("poisoned lock, aborting") + .get_recover_deps(t_zero) } - async fn recover_event( + fn recover_event( &self, t_zero_recover: &T0, node_serial: u16, - ) -> Result { + ) -> Result, SyneviError> { self.store .lock() - .await + .expect("poisoned lock, aborting") .recover_event(t_zero_recover, node_serial) } - async fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { - self.store.lock().await.accept_tx_ballot(t_zero, ballot) + fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { + self.store + .lock() + .expect("poisoned lock, aborting") + .accept_tx_ballot(t_zero, ballot) } - async fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { - self.store.lock().await.upsert_tx(upsert_event) + fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { + self.store + .lock() + .expect("poisoned lock, aborting") + .upsert_tx(upsert_event) } - async fn get_event_state(&self, t_zero: &T0) -> Option { - self.store.lock().await.get_event_state(t_zero) + fn get_event_state(&self, t_zero: &T0) -> Option { + self.store + .lock() + .expect("poisoned lock, aborting") + .get_event_state(t_zero) } - async fn get_event_store(&self) -> BTreeMap { - self.store.lock().await.get_event_store() + fn get_event_store(&self) -> BTreeMap { + self.store + .lock() + .expect("poisoned lock, aborting") + .get_event_store() } - async fn last_applied(&self) -> (T, T0) { - self.store.lock().await.last_applied() + fn last_applied(&self) -> (T, T0) { + self.store + .lock() + .expect("poisoned lock, aborting") + .last_applied() } - async fn get_events_after( + fn get_events_after( &self, last_applied: T, - self_event: u128, ) -> Result>, SyneviError> { let (sdx, rcv) = tokio::sync::mpsc::channel(100); let store = self.store.clone(); - tokio::spawn(async move { + tokio::task::spawn_blocking(move || { store .lock() - .await - .get_events_until(last_applied, self_event, sdx) - .await?; + .expect("poisoned lock, aborting") + .get_events_after(last_applied, sdx)?; Ok::<(), SyneviError>(()) }); Ok(rcv) } - async fn get_event(&self, t_zero: T0) -> Result, SyneviError> { - Ok(self.store.lock().await.events.get(&t_zero).cloned()) + fn get_event(&self, t_zero: T0) -> Result, SyneviError> { + Ok(self + .store + .lock() + .expect("poisoned lock, aborting") + .events + .get(&t_zero) + .cloned()) } - async fn get_and_update_hash( - &self, - t_zero: T0, - execution_hash: [u8; 32], - ) -> Result { - let mut lock = self.store.lock().await; - if let Some(event) = lock.events.get_mut(&t_zero) { - let hashes = event - .hashes - .as_mut() - .ok_or_else(|| SyneviError::MissingTransactionHash)?; - hashes.execution_hash = execution_hash; - Ok(hashes.clone()) - } else { - Err(SyneviError::EventNotFound(t_zero.get_inner())) - } + fn get_event_by_id(&self, id: u128) -> Result, SyneviError> { + let store = self.store.lock().expect("poisoned lock, aborting"); + Ok(store + .id_map + .get(&id) + .and_then(|t0| store.events.get(t0)) + .cloned()) } - async fn last_applied_hash(&self) -> Result<(T, [u8; 32]), SyneviError> { - let lock = self.store.lock().await; - let last = lock.last_applied; - let last_t0 = lock - .mappings - .get(&last) - .ok_or_else(|| SyneviError::EventNotFound(last.get_inner()))?; - let hash = lock - .events - .get(last_t0) - .cloned() - .ok_or_else(|| SyneviError::EventNotFound(last.get_inner()))? - .hashes - .ok_or_else(|| SyneviError::MissingExecutionHash)?; - Ok((last, hash.execution_hash)) + fn inc_time_with_guard(&self, guard: T0) -> Result<(), SyneviError> { + let mut lock = self.store.lock().expect("poisoned lock, aborting"); + lock.latest_time = lock + .latest_time + .next_with_guard_and_node(&guard, lock.node_serial) + .into_time(); + Ok(()) + } + + fn get_or_update_transaction_hash(&self, event: UpsertEvent) -> Result { + let lock = self.store.lock().expect("poisoned lock, aborting"); + if let Some(event) = lock.events.get(&event.t_zero) { + if event.state == State::Applied { + if let Some(hashes) = &event.hashes { + return Ok(hashes.clone()); + } + } + } + let mut event = Event::from(event); + event.state = State::Applied; + Ok(event.hash_event(lock.latest_hash)) } } impl InternalStore { #[instrument(level = "trace")] fn init_t_zero(&mut self, node_serial: u16) -> T0 { - let t0 = T0(self.latest_t0.next_with_node(node_serial).into_time()); - self.latest_t0 = t0; - t0 + let next_time = self.latest_time.next_with_node(node_serial).into_time(); + self.latest_time = next_time; + T0(next_time) } #[instrument(level = "trace")] @@ -174,18 +200,16 @@ impl InternalStore { transaction: Vec, ) -> Result<(T, HashSet), SyneviError> { let (t, deps) = { - let t = T(if let Some((last_t, _)) = self.mappings.last_key_value() { - if **last_t > *t_zero { - t_zero - .next_with_guard_and_node(last_t, self.node_serial) - .into_time() - } else { - *t_zero - } + let t = if self.latest_time > *t_zero { + let new_time_t = t_zero + .next_with_guard_and_node(&self.latest_time, self.node_serial) + .into_time(); + + self.latest_time = new_time_t; + T(new_time_t) } else { - // No entries in the map -> insert the new event - *t_zero - }); + T(*t_zero) + }; // This might not be necessary to re-use the write lock here let deps = self.get_tx_dependencies(&t, &t_zero); (t, deps) @@ -238,8 +262,17 @@ impl InternalStore { #[instrument(level = "trace")] fn upsert_tx(&mut self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { + // Update the latest time + if self.latest_time < *upsert_event.t { + self.latest_time = *upsert_event.t; + } + let Some(event) = self.events.get_mut(&upsert_event.t_zero) else { let mut event = Event::from(upsert_event.clone()); + + // Not an update -> Add id mapping + self.id_map.insert(event.id, event.t_zero); + if matches!(event.state, State::Applied) { self.mappings.insert(event.t, event.t_zero); if let Some(deps) = upsert_event.dependencies { @@ -258,7 +291,9 @@ impl InternalStore { } assert!(self.last_applied < event.t); self.last_applied = event.t; - let hashes = event.hash_event(self.latest_hash); + let hashes = upsert_event + .hashes + .ok_or_else(|| SyneviError::MissingExecutionHash)?; self.latest_hash = hashes.transaction_hash; event.hashes = Some(hashes); self.events.insert(upsert_event.t_zero, event); @@ -270,11 +305,6 @@ impl InternalStore { return Ok(()); }; - // Update the latest t0 - if self.latest_t0 < event.t_zero { - self.latest_t0 = event.t_zero; - } - // Do not update to a "lower" state if upsert_event.state < event.state { return Ok(()); @@ -308,7 +338,9 @@ impl InternalStore { if event.state == State::Applied { assert!(self.last_applied < event.t); self.last_applied = event.t; - let hashes = event.hash_event(self.latest_hash); + let hashes = upsert_event + .hashes + .ok_or_else(|| SyneviError::MissingExecutionHash)?; self.latest_hash = hashes.transaction_hash; event.hashes = Some(hashes); }; @@ -351,7 +383,7 @@ impl InternalStore { } } } - State::Commited => { + State::Committed => { if dep_event .dependencies .iter() @@ -381,9 +413,9 @@ impl InternalStore { &mut self, t_zero_recover: &T0, node_serial: u16, - ) -> Result { + ) -> Result, SyneviError> { let Some(state) = self.get_event_state(t_zero_recover) else { - return Err(SyneviError::EventNotFound(t_zero_recover.get_inner())); + return Ok(None); }; if matches!(state, synevi_types::State::Undefined) { return Err(SyneviError::UndefinedRecovery); @@ -392,7 +424,7 @@ impl InternalStore { if let Some(event) = self.events.get_mut(t_zero_recover) { event.ballot = Ballot(event.ballot.next_with_node(node_serial).into_time()); - Ok(RecoverEvent { + Ok(Some(RecoverEvent { id: event.id, t_zero: event.t_zero, t: event.t, @@ -400,9 +432,9 @@ impl InternalStore { transaction: event.transaction.clone(), dependencies: event.dependencies.clone(), ballot: event.ballot, - }) + })) } else { - Err(SyneviError::EventNotFound(t_zero_recover.get_inner())) + Ok(None) } } @@ -419,10 +451,9 @@ impl InternalStore { (self.last_applied, t0) } - async fn get_events_until( + fn get_events_after( &self, last_applied: T, - _self_event: u128, sdx: Sender>, ) -> Result<(), SyneviError> { let last_applied_t0 = match self.mappings.get(&last_applied) { @@ -431,8 +462,7 @@ impl InternalStore { _ => return Err(SyneviError::EventNotFound(last_applied.get_inner())), }; for (_, event) in self.events.range(last_applied_t0..) { - sdx.send(Ok(event.clone())) - .await + sdx.blocking_send(Ok(event.clone())) .map_err(|e| SyneviError::SendError(e.to_string()))?; } Ok(()) diff --git a/synevi_persistence/src/redb_store.rs b/synevi_persistence/src/redb_store.rs new file mode 100644 index 0000000..d4d529e --- /dev/null +++ b/synevi_persistence/src/redb_store.rs @@ -0,0 +1,676 @@ +use ahash::RandomState; +use monotime::MonoTime; +use redb::{Database, ReadableTable, ReadableTableMetadata, TableDefinition}; +use std::{ + collections::{BTreeMap, HashSet}, + sync::{Arc, Mutex}, +}; +use synevi_types::{ + error::SyneviError, + traits::Store, + types::{Event, Hashes, RecoverDependencies, RecoverEvent, UpsertEvent}, + Ballot, State, T, T0, +}; +use tokio::sync::mpsc::Receiver; +use tracing::instrument; + +const TABLE: TableDefinition = TableDefinition::new("events"); +const ID_MAPPINGS_DB: TableDefinition = TableDefinition::new("id_mappings"); + +#[derive(Clone, Debug)] +pub struct RedbStore { + data: Arc>, +} + +#[derive(Clone, Debug)] +struct InternalData { + db: Arc, + pub(crate) mappings: BTreeMap, // Key: t, value t0 + pub last_applied: T, // t of last applied entry + pub(crate) latest_time: MonoTime, // last created or recognized t0 + pub node_serial: u16, + latest_hash: [u8; 32], +} + +impl RedbStore { + pub fn new(path: String, node_serial: u16) -> Result { + let db = Database::create(path).unwrap(); + { + let write_txn = db.begin_write().unwrap(); + let _ = write_txn.open_table(TABLE).unwrap(); + let _ = write_txn.open_table(ID_MAPPINGS_DB).unwrap(); + write_txn.commit().unwrap(); + } + let read_txn = db.begin_read().unwrap(); + + let events_db = read_txn.open_table(TABLE).unwrap(); + + if !events_db.is_empty().unwrap() { + let result = events_db + .range(0..) + .unwrap() + .filter_map(|e| { + if let Ok((_, event)) = e { + Some(bincode::deserialize(event.value()).unwrap()) + } else { + None + } + }) + .collect::>(); + + let mut mappings = BTreeMap::default(); + let mut last_applied = T::default(); + let mut latest_time = MonoTime::default(); + let mut latest_hash: [u8; 32] = [0; 32]; + for event in result { + mappings.insert(event.t, event.t_zero); + if event.state == State::Applied && event.t > last_applied { + last_applied = event.t; + latest_hash = if let Some(hashes) = event.hashes { + hashes.transaction_hash + } else { + return Err(SyneviError::MissingTransactionHash); + }; + } + if *event.t > latest_time { + latest_time = *event.t; + } + } + Ok(RedbStore { + //db: env_clone, + data: Arc::new(Mutex::new(InternalData { + db: Arc::new(db), + mappings, + last_applied, + latest_time, + node_serial, + latest_hash, + })), + }) + } else { + Ok(RedbStore { + data: Arc::new(Mutex::new(InternalData { + db: Arc::new(db), + mappings: BTreeMap::default(), + last_applied: T::default(), + latest_time: MonoTime::default(), + node_serial, + latest_hash: [0; 32], + })), + }) + } + } +} + +impl Store for RedbStore { + #[instrument(level = "trace")] + fn init_t_zero(&self, node_serial: u16) -> T0 { + self.data + .lock() + .expect("poisoned lock, aborting") + .init_t_zero(node_serial) + } + + #[instrument(level = "trace")] + fn pre_accept_tx( + &self, + id: u128, + t_zero: T0, + transaction: Vec, + ) -> Result<(T, HashSet), SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .pre_accept_tx(id, t_zero, transaction) + } + + #[instrument(level = "trace")] + fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> HashSet { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_tx_dependencies(t, t_zero) + } + + #[instrument(level = "trace")] + fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { + self.data + .lock() + .expect("poisoned lock, aborting") + .accept_tx_ballot(t_zero, ballot) + } + + #[instrument(level = "trace", skip(self))] + fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .upsert_tx(upsert_event) + } + + #[instrument(level = "trace")] + fn get_recover_deps(&self, t_zero: &T0) -> Result { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_recover_deps(t_zero) + } + + #[instrument(level = "trace")] + fn get_event_state(&self, t_zero: &T0) -> Option { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_event_state(t_zero) + } + + #[instrument(level = "trace")] + fn recover_event( + &self, + t_zero_recover: &T0, + node_serial: u16, + ) -> Result, SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .recover_event(t_zero_recover, node_serial) + } + + #[instrument(level = "trace")] + fn get_event_store(&self) -> BTreeMap { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_event_store() + } + + #[instrument(level = "trace")] + fn last_applied(&self) -> (T, T0) { + self.data + .lock() + .expect("poisoned lock, aborting") + .last_applied() + } + + #[instrument(level = "trace")] + fn get_events_after( + &self, + last_applied: T, + ) -> Result>, SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_events_after(last_applied) + } + + #[instrument(level = "trace", skip(self))] + fn get_event(&self, t_zero: T0) -> Result, SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_event(t_zero) + } + + #[instrument(level = "trace", skip(self))] + fn get_event_by_id(&self, t_zero: u128) -> Result, SyneviError> { + self.data + .lock() + .expect("poisoned lock, aborting") + .get_event_by_id(t_zero) + } + + fn inc_time_with_guard(&self, guard: T0) -> Result<(), SyneviError> { + let mut lock = self.data.lock().expect("poisoned lock, aborting"); + lock.latest_time = lock + .latest_time + .next_with_guard_and_node(&guard, lock.node_serial) + .into_time(); + Ok(()) + } + + fn get_or_update_transaction_hash(&self, event: UpsertEvent) -> Result { + let lock = self.data.lock().expect("poisoned lock, aborting"); + if let Some(event) = lock.get_event(event.t_zero)? { + if event.state == State::Applied { + if let Some(hashes) = event.hashes { + return Ok(hashes); + } + } + } + let mut event = Event::from(event); + event.state = State::Applied; + Ok(event.hash_event(lock.latest_hash)) + } +} + +impl InternalData { + #[instrument(level = "trace")] + fn init_t_zero(&mut self, node_serial: u16) -> T0 { + let next_time = self.latest_time.next_with_node(node_serial).into_time(); + self.latest_time = next_time; + T0(next_time) + } + + #[instrument(level = "trace")] + fn pre_accept_tx( + &mut self, + id: u128, + t_zero: T0, + transaction: Vec, + ) -> Result<(T, HashSet), SyneviError> { + let (t, deps) = { + let t = if self.latest_time > *t_zero { + let new_time_t = t_zero + .next_with_guard_and_node(&self.latest_time, self.node_serial) + .into_time(); + + self.latest_time = new_time_t; + T(new_time_t) + } else { + T(*t_zero) + }; + // This might not be necessary to re-use the write lock here + let deps = self.get_tx_dependencies(&t, &t_zero); + (t, deps) + }; + + let event = UpsertEvent { + id, + t_zero, + t, + state: State::PreAccepted, + transaction: Some(transaction), + dependencies: Some(deps.clone()), + ..Default::default() + }; + self.upsert_tx(event)?; + Ok((t, deps)) + } + + #[instrument(level = "trace")] + fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> HashSet { + if self.last_applied == *t { + return HashSet::default(); + } + assert!(self.last_applied < *t); + let mut deps = HashSet::default(); + if let Some(last_applied_t0) = self.mappings.get(&self.last_applied) { + if last_applied_t0 != &T0::default() { + deps.insert(*last_applied_t0); + } + } + // What about deps with dep_t0 < last_applied_t0 && dep_t > t? + + // Dependencies are where any of these cases match: + // - t_dep < t if not applied + // - t0_dep < t0_last_applied, if t_dep > t0 + // - t_dep > t if t0_dep < t + for (_, t0_dep) in self.mappings.range(self.last_applied..) { + if t0_dep != t_zero && (t0_dep < &T0(**t)) { + deps.insert(*t0_dep); + } + } + deps + } + + #[instrument(level = "trace")] + fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option { + let write_txn = self.db.begin_write().ok()?; + let mut event: Event = { + let table = write_txn.open_table(TABLE).ok()?; + let event = table.get(&t_zero.get_inner()).ok()??; + bincode::deserialize(event.value()).ok()? + }; + + if event.ballot < ballot { + event.ballot = ballot; + let mut table = write_txn.open_table(TABLE).ok()?; + + let bytes = bincode::serialize(&event).ok()?; + let _ = table.insert(&t_zero.get_inner(), bytes.as_slice()); + } + write_txn.commit().ok()?; + + Some(event.ballot) + } + + #[instrument(level = "trace", skip(self))] + fn upsert_tx(&mut self, upsert_event: UpsertEvent) -> Result<(), SyneviError> { + //let db = self.db.clone(); + + // Update the latest time + if self.latest_time < *upsert_event.t { + self.latest_time = *upsert_event.t; + } + + let write_txn = self.db.begin_write().ok().unwrap(); + let event: Option = { + let table = write_txn.open_table(TABLE).ok().unwrap(); + table + .get(&upsert_event.t_zero.get_inner()) + .ok() + .unwrap() + .map(|e| bincode::deserialize(e.value()).ok().unwrap()) + }; + + let Some(mut event) = event else { + let mut event = Event::from(upsert_event.clone()); + + // Not an update -> Insert mapping + { + let mut table = write_txn.open_table(ID_MAPPINGS_DB).ok().unwrap(); + let _ = table.insert(&upsert_event.id, &upsert_event.t_zero.get_inner()); + } + + if matches!(event.state, State::Applied) { + self.mappings.insert(event.t, event.t_zero); + if let Some(deps) = upsert_event.dependencies { + event.dependencies = deps; + } + if let Some(transaction) = upsert_event.transaction { + if event.transaction.is_empty() && !transaction.is_empty() { + event.transaction = transaction; + } + } + event.state = upsert_event.state; + if let Some(ballot) = upsert_event.ballot { + if event.ballot < ballot { + event.ballot = ballot; + } + } + + let last_t = self.last_applied; + + // Safeguard + assert!(last_t < event.t); + + self.last_applied = event.t; + let hashes = upsert_event + .hashes + .ok_or_else(|| SyneviError::MissingExecutionHash)?; + self.latest_hash = hashes.transaction_hash; + event.hashes = Some(hashes.clone()); + { + let mut table = write_txn.open_table(TABLE).ok().unwrap(); + let bytes = bincode::serialize(&event).ok().unwrap(); + let _ = table.insert(&upsert_event.t_zero.get_inner(), bytes.as_slice()); + } + } else { + { + let mut table = write_txn.open_table(TABLE).ok().unwrap(); + let bytes = bincode::serialize(&event).ok().unwrap(); + let _ = table.insert(&upsert_event.t_zero.get_inner(), bytes.as_slice()); + } + self.mappings.insert(upsert_event.t, upsert_event.t_zero); + } + write_txn.commit().unwrap(); + return Ok(()); + }; + + // Do not update to a "lower" state + if upsert_event.state < event.state { + write_txn.commit().unwrap(); + return Ok(()); + } + + // Event is already applied + if event.state == State::Applied { + write_txn.commit().unwrap(); + return Ok(()); + } + + if event.is_update(&upsert_event) { + if let Some(old_t) = event.update_t(upsert_event.t) { + self.mappings.remove(&old_t); + self.mappings.insert(event.t, event.t_zero); + } + if let Some(deps) = upsert_event.dependencies { + event.dependencies = deps; + } + if let Some(transaction) = upsert_event.transaction { + if event.transaction.is_empty() && !transaction.is_empty() { + event.transaction = transaction; + } + } + event.state = upsert_event.state; + if let Some(ballot) = upsert_event.ballot { + if event.ballot < ballot { + event.ballot = ballot; + } + } + + if event.state == State::Applied { + let last_t = self.last_applied; + + if last_t > event.t { + println!("last_t: {:?}, event.t: {:?}", last_t, event.t); + } + + // Safeguard + assert!(last_t < event.t); + + self.last_applied = event.t; + let hashes = upsert_event + .hashes + .ok_or_else(|| SyneviError::MissingExecutionHash)?; + self.latest_hash = hashes.transaction_hash; + event.hashes = Some(hashes.clone()); + }; + { + let mut table = write_txn.open_table(TABLE).ok().unwrap(); + let bytes = bincode::serialize(&event).ok().unwrap(); + let _ = table.insert(&upsert_event.t_zero.get_inner(), bytes.as_slice()); + } + write_txn.commit().unwrap(); + Ok(()) + } else { + write_txn.commit().unwrap(); + Ok(()) + } + } + + #[instrument(level = "trace")] + fn get_recover_deps(&self, t_zero: &T0) -> Result { + let read_txn = self.db.begin_read().unwrap(); + let timestamp = { + let table = read_txn.open_table(TABLE).ok().unwrap(); + table + .get(&t_zero.get_inner()) + .ok() + .unwrap() + .map(|e| bincode::deserialize::(e.value()).ok().unwrap()) + .unwrap() + .t + }; + let mut recover_deps = RecoverDependencies { + timestamp, + ..Default::default() + }; + + for (t_dep, t_zero_dep) in self.mappings.range(self.last_applied..) { + let dep_event = { + let table = read_txn.open_table(TABLE).ok().unwrap(); + table + .get(&t_zero.get_inner()) + .ok() + .unwrap() + .map(|e| bincode::deserialize::(e.value()).ok().unwrap()) + .unwrap() + }; + match dep_event.state { + State::Accepted => { + if dep_event + .dependencies + .iter() + .any(|t_zero_dep_dep| t_zero == t_zero_dep_dep) + { + // Wait -> Accord p19 l7 + l9 + if t_zero_dep < t_zero && **t_dep > **t_zero { + recover_deps.wait.insert(*t_zero_dep); + } + // Superseding -> Accord: p19 l10 + if t_zero_dep > t_zero { + recover_deps.superseding = true; + } + } + } + State::Committed => { + if dep_event + .dependencies + .iter() + .any(|t_zero_dep_dep| t_zero == t_zero_dep_dep) + { + // Superseding -> Accord: p19 l11 + if **t_dep > **t_zero { + recover_deps.superseding = true; + } + } + } + _ => {} + } + // Collect "normal" deps -> Accord: p19 l16 + if t_zero_dep < t_zero { + recover_deps.dependencies.insert(*t_zero_dep); + } + } + Ok(recover_deps) + } + + fn get_event_state(&self, t_zero: &T0) -> Option { + self.get_event(*t_zero) + .map(|event| event.map(|e| e.state)) + .ok() + .flatten() + } + + fn recover_event( + &self, + t_zero_recover: &T0, + node_serial: u16, + ) -> Result, SyneviError> { + let Some(state) = self.get_event_state(t_zero_recover) else { + return Ok(None); + }; + if matches!(state, synevi_types::State::Undefined) { + return Err(SyneviError::UndefinedRecovery); + } + + let write_txn = self.db.begin_write().ok().unwrap(); + let event = { + let table = write_txn.open_table(TABLE).ok().unwrap(); + table + .get(&t_zero_recover.get_inner()) + .ok() + .unwrap() + .map(|e| bincode::deserialize::(e.value()).ok().unwrap()) + }; + + if let Some(mut event) = event { + event.ballot = Ballot(event.ballot.next_with_node(node_serial).into_time()); + { + let mut table = write_txn.open_table(TABLE).ok().unwrap(); + let bytes = bincode::serialize(&event).ok().unwrap(); + let _ = table.insert(&t_zero_recover.get_inner(), bytes.as_slice()); + } + write_txn.commit().unwrap(); + + Ok(Some(RecoverEvent { + id: event.id, + t_zero: event.t_zero, + t: event.t, + state, + transaction: event.transaction.clone(), + dependencies: event.dependencies.clone(), + ballot: event.ballot, + })) + } else { + Ok(None) + } + } + + fn get_event_store(&self) -> BTreeMap { + // TODO: Remove unwrap and change trait result + let read_txn = self.db.begin_read().ok().unwrap(); + let table = read_txn.open_table(TABLE).ok().unwrap(); + let range = table.range(0..).unwrap(); + let result = range + .filter_map(|e| { + if let Ok((t0, event)) = e { + Some(( + T0(MonoTime::from(t0.value())), + bincode::deserialize(event.value()).ok().unwrap(), + )) + } else { + None + } + }) + .collect::>(); + result + } + + fn last_applied(&self) -> (T, T0) { + let t = self.last_applied; + let t0 = self.mappings.get(&t).cloned().unwrap_or(T0::default()); + (t, t0) + } + + fn get_events_after( + &self, + last_applied: T, + ) -> Result>, SyneviError> { + let (sdx, rcv) = tokio::sync::mpsc::channel(200); + let db = self.db.clone(); + let last_applied_t0 = match self.mappings.get(&last_applied) { + Some(t0) => *t0, + None if last_applied == T::default() => T0::default(), + _ => return Err(SyneviError::EventNotFound(last_applied.get_inner())), + }; + tokio::task::spawn_blocking(move || { + let write_txn = db.begin_read().ok().unwrap(); + let table = write_txn.open_table(TABLE).ok().unwrap(); + for result in table.range(last_applied_t0.get_inner()..).unwrap() { + let event = bincode::deserialize(result.unwrap().1.value()) + .ok() + .unwrap(); + sdx.blocking_send(Ok(event)) + .map_err(|e| SyneviError::SendError(e.to_string()))?; + } + Ok::<(), SyneviError>(()) + }); + Ok(rcv) + } + + fn get_event(&self, t_zero: T0) -> Result, SyneviError> { + let write_txn = self.db.begin_read().ok().unwrap(); + let event = { + let table = write_txn.open_table(TABLE).ok().unwrap(); + table + .get(&t_zero.get_inner()) + .ok() + .unwrap() + .map(|e| bincode::deserialize::(e.value()).ok().unwrap()) + }; + Ok(event) + } + + fn get_event_by_id(&self, t_zero: u128) -> Result, SyneviError> { + let read_txn = self.db.begin_read().ok().unwrap(); + let event = { + let mapping_table = read_txn.open_table(ID_MAPPINGS_DB).ok().unwrap(); + let t_zero = mapping_table + .get(&t_zero) + .ok() + .unwrap() + .map(|e| e.value()) + .ok_or_else(|| SyneviError::EventNotFound(t_zero))?; + + let table = read_txn.open_table(TABLE).ok().unwrap(); + table + .get(&t_zero) + .ok() + .unwrap() + .map(|e| bincode::deserialize::(e.value()).ok().unwrap()) + }; + Ok(event) + } +} diff --git a/synevi_types/Cargo.toml b/synevi_types/Cargo.toml index 38c643e..fe91f31 100644 --- a/synevi_types/Cargo.toml +++ b/synevi_types/Cargo.toml @@ -10,7 +10,7 @@ description.workspace = true [dependencies] bytes = { workspace = true } tokio = { workspace = true } -monotime = { path = "../monotime", version = "0.1.0"} +monotime = { path = "../monotime", version = "0.1.0" } serde = { workspace = true } serde_json = { workspace = true } tonic = { workspace = true } @@ -21,6 +21,7 @@ async-channel = "2.3.1" ahash = { workspace = true } heed = { workspace = true } http = "1.1.0" -postcard = {version = "1.0.10", features = ["use-std"]} +postcard = { version = "1.0.10", features = ["use-std"] } sha3 = "0.10.8" -ulid = {workspace = true} +ulid = { workspace = true } +redb = { version = "2.1.4" } \ No newline at end of file diff --git a/synevi_types/src/error.rs b/synevi_types/src/error.rs index 8d9e20a..6a8ec74 100644 --- a/synevi_types/src/error.rs +++ b/synevi_types/src/error.rs @@ -66,8 +66,14 @@ pub enum SyneviError { NoMembersFound, #[error("Not ready for transactions")] NotReady, - #[error("Mismatched hashes")] - MismatchedHashes, + #[error("Mismatched execution hashes")] + MismatchedExecutionHashes, + #[error("Mismatched transaction hashes")] + MismatchedTransactionHashes, + #[error("Unrecoverable transaction")] + UnrecoverableTransaction, + #[error("Expected external transaction: {0}")] + InternalTransaction(String), } impl Serialize for SyneviError { diff --git a/synevi_types/src/traits.rs b/synevi_types/src/traits.rs index dd6f70f..53aed86 100644 --- a/synevi_types/src/traits.rs +++ b/synevi_types/src/traits.rs @@ -1,5 +1,5 @@ use ahash::RandomState; -use serde::Serialize; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{ collections::{BTreeMap, HashSet}, sync::{Arc, Weak}, @@ -34,9 +34,9 @@ impl Transaction for Vec { #[async_trait::async_trait] pub trait Executor: Send + Sync + 'static { - type Tx: Transaction + Serialize; + type Tx: Transaction + Serialize + DeserializeOwned; // Executor expects a type with interior mutability - async fn execute(&self, transaction: Self::Tx) -> SyneviResult; + async fn execute(&self, id: u128, transaction: Self::Tx) -> SyneviResult; } #[async_trait::async_trait] @@ -45,8 +45,8 @@ where E: Executor, { type Tx = E::Tx; - async fn execute(&self, transaction: Self::Tx) -> SyneviResult { - self.as_ref().execute(transaction).await + async fn execute(&self, id: u128, transaction: Self::Tx) -> SyneviResult { + self.as_ref().execute(id, transaction).await } } @@ -57,57 +57,66 @@ where { type Tx = E::Tx; - async fn execute(&self, transaction: Self::Tx) -> SyneviResult { + async fn execute(&self, id: u128, transaction: Self::Tx) -> SyneviResult { self.upgrade() .ok_or_else(|| SyneviError::ArcDropped)? .as_ref() - .execute(transaction) + .execute(id, transaction) .await } } pub type Dependencies = HashSet; -#[async_trait::async_trait] pub trait Store: Send + Sync + Sized + 'static { // fn new(node_serial: u16) -> Result; // Initialize a new t0 - async fn init_t_zero(&self, node_serial: u16) -> T0; + fn init_t_zero(&self, node_serial: u16) -> T0; // Pre-accept a transaction - async fn pre_accept_tx( + fn pre_accept_tx( &self, id: u128, t_zero: T0, transaction: Vec, ) -> Result<(T, Dependencies), SyneviError>; // Get the dependencies for a transaction - async fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> Dependencies; + fn get_tx_dependencies(&self, t: &T, t_zero: &T0) -> Dependencies; // Get the recover dependencies for a transaction - async fn get_recover_deps(&self, t_zero: &T0) -> Result; + fn get_recover_deps(&self, t_zero: &T0) -> Result; // Tries to recover an unfinished event from the store - async fn recover_event( + fn recover_event( &self, t_zero_recover: &T0, node_serial: u16, - ) -> Result; + ) -> Result, SyneviError>; // Check and update the ballot for a transaction // Returns true if the ballot was accepted (current <= ballot) - async fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option; + fn accept_tx_ballot(&self, t_zero: &T0, ballot: Ballot) -> Option; // Update or insert a transaction, returns the hash of the transaction if applied - async fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError>; - - async fn get_event_state(&self, t_zero: &T0) -> Option; + fn upsert_tx(&self, upsert_event: UpsertEvent) -> Result<(), SyneviError>; - async fn get_event_store(&self) -> BTreeMap; - async fn last_applied(&self) -> (T, T0); - async fn last_applied_hash(&self) -> Result<(T, [u8; 32]), SyneviError>; + fn get_event_state(&self, t_zero: &T0) -> Option; - async fn get_event(&self, t_zero: T0) -> Result, SyneviError>; - async fn get_events_after(&self, last_applied: T, self_event: u128) -> Result>, SyneviError>; + fn get_event_store(&self) -> BTreeMap; + fn last_applied(&self) -> (T, T0); + // fn last_applied_hash(&self) -> Result<(T, [u8; 32]), SyneviError>; - async fn get_and_update_hash( + fn get_event(&self, t_zero: T0) -> Result, SyneviError>; + fn get_event_by_id(&self, id: u128) -> Result, SyneviError>; + fn get_events_after( &self, - t_zero: T0, - execution_hash: [u8; 32], - ) -> Result; + last_applied: T, + ) -> Result>, SyneviError>; + + // fn get_and_update_hash( + // &self, + // t_zero: T0, + // execution_hash: [u8; 32], + // ) -> Result; + + fn get_or_update_transaction_hash(&self, event: UpsertEvent) -> Result; + + // Increases the max time to be above the specified guard + // Ensures that the guards t0 will not get a fast path afterwards + fn inc_time_with_guard(&self, guard: T0) -> Result<(), SyneviError>; } diff --git a/synevi_types/src/types.rs b/synevi_types/src/types.rs index 5f7e93e..22f88aa 100644 --- a/synevi_types/src/types.rs +++ b/synevi_types/src/types.rs @@ -7,23 +7,31 @@ use sha3::{Digest, Sha3_256}; use std::{ collections::HashSet, ops::Deref, - time::{SystemTime, UNIX_EPOCH}, + time::{Instant, SystemTime, UNIX_EPOCH}, }; +use tokio::sync::oneshot; use ulid::Ulid; pub type SyneviResult = Result< - ExecutorResult<::Tx>, - //Result<<::Tx as Transaction>::TxOk, <::Tx as Transaction>::TxErr>, + Result<<::Tx as Transaction>::TxOk, <::Tx as Transaction>::TxErr>, SyneviError, >; +pub type InternalSyneviResult = Result::Tx>, SyneviError>; + #[derive(Serialize)] pub enum ExecutorResult { External(Result), Internal(Result), } -#[derive(Default, PartialEq, PartialOrd, Ord, Eq, Clone, Debug, Serialize)] +pub struct Waiter { + pub waited_since: Instant, + pub dependency_states: u64, + pub sender: Vec>, +} + +#[derive(Default, PartialEq, PartialOrd, Ord, Eq, Clone, Debug, Serialize, Deserialize)] pub enum TransactionPayload { #[default] None, @@ -31,10 +39,17 @@ pub enum TransactionPayload { Internal(InternalExecution), } -#[derive(Debug, Clone, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Debug, Clone, Serialize, Eq, PartialEq, PartialOrd, Ord, Deserialize)] pub enum InternalExecution { - JoinElectorate { id: Ulid, serial: u16, host: String }, - ReadyElectorate { id: Ulid, serial: u16 }, + JoinElectorate { + id: Ulid, + serial: u16, + new_node_host: String, + }, + ReadyElectorate { + id: Ulid, + serial: u16, + }, } // #[derive(Debug, Clone, Serialize, Eq, PartialEq, PartialOrd, Ord)] @@ -82,11 +97,15 @@ impl Transaction for InternalExecution { let mut bytes = Vec::new(); match self { - InternalExecution::JoinElectorate { id, serial, host } => { + InternalExecution::JoinElectorate { + id, + serial, + new_node_host, + } => { bytes.push(0); bytes.extend_from_slice(&id.to_bytes()); bytes.extend_from_slice(&serial.to_be_bytes()); - bytes.extend_from_slice(&host.as_bytes()); + bytes.extend_from_slice(new_node_host.as_bytes()); } InternalExecution::ReadyElectorate { id, serial } => { bytes.push(1); @@ -104,15 +123,19 @@ impl Transaction for InternalExecution { Some(0) => { let (id, rest) = rest.split_at(16); let id = Ulid::from_bytes(id.try_into()?); - let (serial, host) = rest.split_at(2); + let (serial, new_node_host) = rest.split_at(2); let serial = u16::from_be_bytes( serial .try_into() .map_err(|_| SyneviError::InvalidConversionFromBytes(String::new()))?, ); - let host = String::from_utf8(host.to_owned()) + let new_node_host = String::from_utf8(new_node_host.to_owned()) .map_err(|e| SyneviError::InvalidConversionFromBytes(e.to_string()))?; - Ok(InternalExecution::JoinElectorate { id, serial, host }) + Ok(InternalExecution::JoinElectorate { + id, + serial, + new_node_host, + }) } Some(1) => { let (id, serial) = rest.split_at(16); @@ -252,7 +275,7 @@ pub enum State { Undefined = 0, PreAccepted = 1, Accepted = 2, - Commited = 3, + Committed = 3, Applied = 4, } @@ -261,7 +284,7 @@ impl From for State { match value { 1 => Self::PreAccepted, 2 => Self::Accepted, - 3 => Self::Commited, + 3 => Self::Committed, 4 => Self::Applied, _ => Self::Undefined, } @@ -273,7 +296,7 @@ impl From for i32 { match val { State::PreAccepted => 1, State::Accepted => 2, - State::Commited => 3, + State::Committed => 3, State::Applied => 4, _ => 0, } @@ -336,7 +359,7 @@ pub struct UpsertEvent { pub transaction: Option>, pub dependencies: Option>, pub ballot: Option, - pub execution_hash: Option<[u8; 32]>, + pub hashes: Option, } impl Event { @@ -349,7 +372,7 @@ impl Event { hasher.update(self.transaction.as_slice()); hasher.update(previous_hash); - let event_hash: [u8;32] = hasher.finalize().into(); + let event_hash: [u8; 32] = hasher.finalize().into(); Hashes { previous_hash, transaction_hash: event_hash, @@ -412,11 +435,7 @@ impl From for Event { transaction: value.transaction.unwrap_or_default(), dependencies: value.dependencies.unwrap_or_default(), ballot: value.ballot.unwrap_or_default(), - hashes: value.execution_hash.map(|hash| Hashes { - previous_hash: [0; 32], - transaction_hash:[0;32], - execution_hash: hash, - }), + hashes: value.hashes, last_updated: SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap() // This must fail if the system clock is before the UNIX_EPOCH @@ -425,6 +444,20 @@ impl From for Event { } } +impl From for RecoverEvent { + fn from(value: Event) -> Self { + RecoverEvent { + id: value.id, + t_zero: value.t_zero, + t: value.t, + state: value.state, + transaction: value.transaction, + dependencies: value.dependencies, + ballot: value.ballot, + } + } +} + #[cfg(test)] mod test { use crate::{types::TransactionPayload, Transaction}; @@ -440,7 +473,7 @@ mod test { TransactionPayload::Internal(crate::types::InternalExecution::JoinElectorate { id: ulid::Ulid::new(), serial: 1, - host: "http://test.org:1234".to_string(), + new_node_host: "http://test.org:1234".to_string(), }); let bytes = internal_join.as_bytes(); assert_eq!( diff --git a/tests/Cargo.toml b/tests/Cargo.toml index f707ea4..5962a5c 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -40,7 +40,4 @@ path = "maelstrom/echo.rs" [[bin]] name = "maelstrom_lin_kv" -path = "maelstrom/lin_kv.rs" - -[profile.release] -panic = "abort" +path = "maelstrom/lin_kv.rs" \ No newline at end of file diff --git a/tests/consensus_e2e.rs b/tests/consensus_e2e.rs index 7040f68..79d7c1c 100644 --- a/tests/consensus_e2e.rs +++ b/tests/consensus_e2e.rs @@ -6,11 +6,12 @@ mod tests { use std::net::SocketAddr; use std::str::FromStr; use std::sync::Arc; + use std::time::Duration; use synevi::{State, Store, T, T0}; use synevi_core::node::Node; use synevi_core::tests::DummyExecutor; use synevi_network::network::GrpcNetwork; - use synevi_persistence::database::PersistentStore; + use synevi_persistence::lmdb_store::LmdbStore; use synevi_persistence::mem_store::MemStore; use tokio::fs; use tokio::runtime::Builder; @@ -19,9 +20,13 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn parallel_execution() { let node_names: Vec<_> = (0..5).map(|_| Ulid::new()).collect(); - let mut nodes: Vec>> = vec![]; + let mut nodes: Vec>> = vec![]; for (i, m) in node_names.iter().enumerate() { + let test_path = format!("/dev/shm/{m}"); + fs::create_dir(&test_path).await.unwrap(); + dbg!(&test_path); + let store = LmdbStore::new(test_path, i as u16).unwrap(); let socket_addr = SocketAddr::from_str(&format!("0.0.0.0:{}", 10000 + i)).unwrap(); let network = synevi_network::network::GrpcNetwork::new( socket_addr, @@ -29,7 +34,7 @@ mod tests { *m, i as u16, ); - let node = Node::new_with_network_and_executor(*m, i as u16, network, DummyExecutor) + let node = Node::new(*m, i as u16, network, DummyExecutor, store) .await .unwrap(); nodes.push(node); @@ -53,26 +58,16 @@ mod tests { let mut joinset = tokio::task::JoinSet::new(); - for i in 0..1000 { + for i in 0..100 { let coordinator = coordinator.clone(); joinset.spawn(async move { coordinator - .transaction( - i, - Vec::from("This is a transaction"), - ) + .transaction(i, Vec::from("This is a transaction")) .await }); } while let Some(res) = joinset.join_next().await { - match res.unwrap().unwrap() { - synevi::ExecutorResult::External(res) => { - res.unwrap(); - } - synevi::ExecutorResult::Internal(res) => { - res.unwrap(); - } - }; + res.unwrap().unwrap().unwrap(); } let (total, accepts, recovers) = coordinator.get_stats(); @@ -85,11 +80,11 @@ mod tests { ); //assert_eq!(recovers, 0); + tokio::time::sleep(Duration::from_secs(1)).await; let coordinator_store: BTreeMap)> = coordinator .event_store .get_event_store() - .await .into_values() .map(|e| (e.t_zero, (e.t, e.get_latest_hash()))) .collect(); @@ -97,7 +92,6 @@ mod tests { assert!(coordinator .event_store .get_event_store() - .await .iter() .all(|(_, e)| e.state == State::Applied)); @@ -106,22 +100,20 @@ mod tests { let node_store: BTreeMap)> = node .event_store .get_event_store() - .await .into_values() .map(|e| (e.t_zero, (e.t, e.get_latest_hash()))) .collect(); assert!( node.event_store .get_event_store() - .await .iter() .all(|(_, e)| e.state == State::Applied), "Not all applied @ {:?}", - node.get_info() + node.get_serial() ); assert_eq!(coordinator_store.len(), node_store.len()); if coordinator_store != node_store { - println!("Node: {:?}", node.get_info()); + println!("Node: {:?}", node.get_serial()); let mut node_store_iter = node_store.iter(); for (k, v) in coordinator_store.iter() { if let Some(next) = node_store_iter.next() { @@ -186,7 +178,7 @@ mod tests { let start = std::time::Instant::now(); - for _ in 0..10000 { + for _ in 0..100 { let coordinator1 = coordinator1.clone(); let coordinator2 = coordinator2.clone(); let coordinator3 = coordinator3.clone(); @@ -194,54 +186,35 @@ mod tests { let coordinator5 = coordinator5.clone(); joinset.spawn(async move { coordinator1 - .transaction( - u128::from_be_bytes(Ulid::new().to_bytes()), - Vec::from("C1"), - ) + .transaction(u128::from_be_bytes(Ulid::new().to_bytes()), Vec::from("C1")) .await }); joinset.spawn(async move { coordinator2 - .transaction( - u128::from_be_bytes(Ulid::new().to_bytes()), - Vec::from("C2"), - ) + .transaction(u128::from_be_bytes(Ulid::new().to_bytes()), Vec::from("C2")) .await }); joinset.spawn(async move { coordinator3 - .transaction( - u128::from_be_bytes(Ulid::new().to_bytes()), - Vec::from("C3"), - ) + .transaction(u128::from_be_bytes(Ulid::new().to_bytes()), Vec::from("C3")) .await }); joinset.spawn(async move { coordinator4 - .transaction( - u128::from_be_bytes(Ulid::new().to_bytes()), - Vec::from("C4"), - ) + .transaction(u128::from_be_bytes(Ulid::new().to_bytes()), Vec::from("C4")) .await }); joinset.spawn(async move { coordinator5 - .transaction( - u128::from_be_bytes(Ulid::new().to_bytes()), - Vec::from("C5"), - ) + .transaction(u128::from_be_bytes(Ulid::new().to_bytes()), Vec::from("C5")) .await }); } + let mut counter: u16 = 0; while let Some(res) = joinset.join_next().await { - match res.unwrap().unwrap() { - synevi::ExecutorResult::External(res) => { - res.unwrap(); - } - synevi::ExecutorResult::Internal(res) => { - res.unwrap(); - } - }; + counter += 1; + println!("Got: {counter}"); + res.unwrap().unwrap().unwrap(); } println!("Time: {:?}", start.elapsed()); @@ -290,12 +263,13 @@ mod tests { recovers ); - assert_eq!(recovers, 0); + tokio::time::sleep(Duration::from_secs(5)).await; + + //assert_eq!(recovers, 0); let coordinator_store: BTreeMap)> = coordinator1 .event_store .get_event_store() - .await .into_values() .map(|e| (e.t_zero, (e.t, e.get_latest_hash()))) .collect(); @@ -310,19 +284,17 @@ mod tests { let node_store: BTreeMap)> = node .event_store .get_event_store() - .await .into_values() .map(|e| (e.t_zero, (e.t, e.get_latest_hash()))) .collect(); assert!(node .event_store .get_event_store() - .await .iter() .all(|(_, e)| e.state == State::Applied)); assert_eq!(coordinator_store.len(), node_store.len()); if coordinator_store != node_store { - println!("Node: {:?}", node.get_info()); + println!("Node: {:?}", node.get_serial()); let mut node_store_iter = node_store.iter(); for (k, v) in coordinator_store.iter() { if let Some(next) = node_store_iter.next() { @@ -385,23 +357,13 @@ mod tests { .unwrap(); } - for i in 0..1000 { - match coordinator + for i in 0..100 { + coordinator .clone() - .transaction( - i, - Vec::from("This is a transaction"), - ) + .transaction(i, Vec::from("This is a transaction")) .await .unwrap() - { - synevi::ExecutorResult::Internal(res) => { - res.unwrap(); - } - synevi::ExecutorResult::External(res) => { - res.unwrap(); - } - }; + .unwrap(); } runtime.shutdown_background(); @@ -411,7 +373,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn reconfiguration() { let node_names: Vec<_> = (0..5).map(|_| Ulid::new()).collect(); - let mut nodes: Vec>> = vec![]; + let mut nodes: Vec>> = vec![]; //let mut nodes: Vec>> = vec![]; for (i, m) in node_names.iter().enumerate() { @@ -428,7 +390,7 @@ mod tests { let test_path = format!("/dev/shm/{m}/"); fs::create_dir(&test_path).await.unwrap(); dbg!(&test_path); - let store = PersistentStore::new(test_path, i as u16).unwrap(); + let store = LmdbStore::new(test_path, i as u16).unwrap(); //let store = MemStore::new(i as u16).unwrap(); let node = Node::new(*m, i as u16, network, DummyExecutor, store) .await @@ -446,8 +408,7 @@ mod tests { // Copy & create db let test_path = format!("/dev/shm/{m}/"); fs::create_dir(&test_path).await.unwrap(); - dbg!(&test_path); - let store = PersistentStore::new(test_path, i as u16).unwrap(); + let store = LmdbStore::new(test_path, i as u16).unwrap(); //let store = MemStore::new(i as u16).unwrap(); let node = Node::new_with_member( *m, @@ -468,27 +429,27 @@ mod tests { let mut joinset = tokio::task::JoinSet::new(); - let random_number = rand::thread_rng().gen_range(0..999); - dbg!(&random_number); - for i in 0..1000 { + let num = 100; + let random_number: u128 = rand::thread_rng().gen_range(0..num - 1); + for i in 0..num { if i == random_number { let id = Ulid::new(); let network = synevi_network::network::GrpcNetwork::new( SocketAddr::from_str("0.0.0.0:13006").unwrap(), "http://0.0.0.0:13006".to_string(), id, - 6, + 5, ); // Copy & create db let test_path = format!("/dev/shm/{id}/"); fs::create_dir(&test_path).await.unwrap(); dbg!(&test_path); - let store = PersistentStore::new(test_path, 6).unwrap(); + let store = LmdbStore::new(test_path, 6).unwrap(); //let store = MemStore::new(6).unwrap(); let node = Node::new_with_member( id, - 6, + 5, network, DummyExecutor, store, @@ -502,27 +463,17 @@ mod tests { let coordinator = coordinator.clone(); joinset.spawn(async move { coordinator - .transaction( - i, - Vec::from( - "This is a transaction", - ), - ) + .transaction(i, Vec::from("This is a transaction")) .await }); } } while let Some(res) = joinset.join_next().await { - match res.unwrap().unwrap() { - synevi::ExecutorResult::External(res) => { - res.unwrap(); - } - synevi::ExecutorResult::Internal(res) => { - res.unwrap(); - } - }; + res.unwrap().unwrap().unwrap(); } + tokio::time::sleep(Duration::from_secs(1)).await; + let (total, accepts, recovers) = coordinator.get_stats(); println!( "Fast: {:?}, Slow: {:?} Paths / {:?} Total / {:?} Recovers", @@ -547,7 +498,6 @@ mod tests { let coordinator_store: BTreeMap = coordinator .event_store .get_event_store() - .await .into_values() .map(|e| { ( @@ -560,7 +510,6 @@ mod tests { assert!(coordinator .event_store .get_event_store() - .await .iter() .all(|(_, e)| e.state == State::Applied)); @@ -571,7 +520,6 @@ mod tests { let node_store: BTreeMap = node .event_store .get_event_store() - .await .into_values() .map(|e| { ( @@ -584,15 +532,14 @@ mod tests { assert!( node.event_store .get_event_store() - .await .iter() .all(|(_, e)| e.state == State::Applied), "Not all applied @ {:?}", - node.get_info() + node.get_serial() ); assert_eq!(coordinator_store.len(), node_store.len()); if coordinator_store != node_store { - println!("Node: {:?}", node.get_info()); + println!("Node: {:?}", node.get_serial()); let mut node_store_iter = node_store.iter(); for (k, v) in coordinator_store.iter() { if let Some(next) = node_store_iter.next() { diff --git a/tests/maelstrom/network.rs b/tests/maelstrom/network.rs index 17ed147..4d7f245 100644 --- a/tests/maelstrom/network.rs +++ b/tests/maelstrom/network.rs @@ -9,8 +9,9 @@ use synevi::network::requests::{ RecoverRequest, RecoverResponse, }; use synevi::network::{Network, NetworkInterface, Replica}; -use synevi::{State, SyneviError, T, T0}; +use synevi::{State, SyneviError, T0}; use synevi_network::configure_transport::GetEventResponse; +use synevi_network::network::{MemberWithLatency, NodeStatus}; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Mutex; use tokio::task::JoinSet; @@ -70,6 +71,10 @@ impl Network for MaelstromNetwork { } } + fn get_node_status(&self) -> Arc { + todo!() + } + async fn add_member( &self, _id: Ulid, @@ -172,32 +177,26 @@ impl Network for MaelstromNetwork { todo!() } - async fn ready_electorate(&self) -> Result<(), SyneviError> { + async fn ready_electorate(&self, _host: String) -> Result<(), SyneviError> { todo!() } async fn get_stream_events( &self, _last_applied: Vec, - _self_event: Vec, ) -> Result, SyneviError> { todo!() } - async fn broadcast_config(&self, _host: String) -> Result<(u32, Vec), SyneviError> { + async fn join_electorate(&self, _host: String) -> Result { todo!() } - async fn get_member_len(&self) -> u32 { + async fn get_members(&self) -> Vec> { todo!() } - async fn report_config( - &self, - _last_applied: T, - _last_applied_hash: [u8; 32], - _host: String, - ) -> Result<(), SyneviError> { + async fn report_config(&self, _host: String) -> Result<(), SyneviError> { todo!() } } @@ -283,7 +282,7 @@ impl NetworkInterface for MaelstromNetwork { self.broadcast_responses .lock() .await - .insert((State::Commited, t0), sx); + .insert((State::Committed, t0), sx); for replica in members { if let Err(err) = self .message_sender @@ -309,7 +308,7 @@ impl NetworkInterface for MaelstromNetwork { continue; }; } - (State::Commited, t0) + (State::Committed, t0) } BroadcastRequest::Apply(req) => { let t0 = T0::try_from(req.timestamp_zero.as_slice()).unwrap(); @@ -418,6 +417,9 @@ impl NetworkInterface for MaelstromNetwork { Ok(result) } + async fn broadcast_recovery(&self, _t0: T0) -> Result { + todo!() + } } pub(crate) async fn replica_dispatch( @@ -442,7 +444,6 @@ pub(crate) async fn replica_dispatch( last_applied: last_applied.clone(), }, node as u16, - true, ) .await .unwrap(); @@ -470,18 +471,15 @@ pub(crate) async fn replica_dispatch( ref last_applied, } => { let response = replica - .accept( - AcceptRequest { - id: id.clone(), - ballot: ballot.clone(), - event: event.clone(), - timestamp_zero: t0.clone(), - timestamp: t.clone(), - dependencies: deps.clone(), - last_applied: last_applied.clone(), - }, - true, - ) + .accept(AcceptRequest { + id: id.clone(), + ballot: ballot.clone(), + event: event.clone(), + timestamp_zero: t0.clone(), + timestamp: t.clone(), + dependencies: deps.clone(), + last_applied: last_applied.clone(), + }) .await?; let reply = msg.reply(Body { @@ -504,16 +502,13 @@ pub(crate) async fn replica_dispatch( ref deps, } => { replica - .commit( - CommitRequest { - id: id.clone(), - event: event.clone(), - timestamp_zero: t0.clone(), - timestamp: t.clone(), - dependencies: deps.clone(), - }, - true, - ) + .commit(CommitRequest { + id: id.clone(), + event: event.clone(), + timestamp_zero: t0.clone(), + timestamp: t.clone(), + dependencies: deps.clone(), + }) .await?; let reply = msg.reply(Body { @@ -535,18 +530,15 @@ pub(crate) async fn replica_dispatch( } => { eprintln!("Replica dispatch apply {:?}", t0); replica - .apply( - ApplyRequest { - id: id.clone(), - event: event.clone(), - timestamp_zero: t0.clone(), - timestamp: t.clone(), - dependencies: deps.clone(), - transaction_hash: transaction_hash.clone(), - execution_hash: execution_hash.clone(), - }, - true, - ) + .apply(ApplyRequest { + id: id.clone(), + event: event.clone(), + timestamp_zero: t0.clone(), + timestamp: t.clone(), + dependencies: deps.clone(), + transaction_hash: transaction_hash.clone(), + execution_hash: execution_hash.clone(), + }) .await?; let reply = msg.reply(Body { @@ -564,15 +556,12 @@ pub(crate) async fn replica_dispatch( ref t0, } => { let result = replica - .recover( - RecoverRequest { - id: id.clone(), - ballot: ballot.clone(), - event: event.clone(), - timestamp_zero: t0.clone(), - }, - true, - ) + .recover(RecoverRequest { + id: id.clone(), + ballot: ballot.clone(), + event: event.clone(), + timestamp_zero: t0.clone(), + }) .await?; let reply = msg.reply(Body { @@ -646,7 +635,7 @@ impl MaelstromNetwork { MessageType::CommitOk { t0 } => { let key = T0::try_from(t0.as_slice())?; let lock = self.broadcast_responses.lock().await; - if let Some(entry) = lock.get(&(State::Commited, key)) { + if let Some(entry) = lock.get(&(State::Committed, key)) { entry .send(BroadcastResponse::Commit(CommitResponse {})) .await?;