Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Single state machine across main and object stores #1077

Open
wants to merge 77 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
e1db784
Simpler state machine for checkpointing
badrishc Feb 24, 2025
49b9c16
cleanup
badrishc Feb 24, 2025
d111602
updates
badrishc Feb 25, 2025
9f838f7
remove dead code
badrishc Feb 25, 2025
c907987
updates
badrishc Feb 26, 2025
e715503
updates
badrishc Feb 26, 2025
7529ade
updates
badrishc Feb 26, 2025
6da12ce
update
badrishc Feb 27, 2025
e24720c
kill code
badrishc Feb 27, 2025
1a8ba12
updates
badrishc Feb 27, 2025
a1b87a9
simplify LightEpoch
badrishc Feb 27, 2025
69c07e9
move epvs to test
badrishc Feb 27, 2025
6ec5233
nits
badrishc Feb 27, 2025
9599202
updates
badrishc Feb 27, 2025
a24e7e6
updates
badrishc Feb 28, 2025
223abf6
formatting
badrishc Feb 28, 2025
4d17abb
fix garnet
badrishc Feb 28, 2025
cc4e677
Merge remote-tracking branch 'origin/main' into badrishc/state-machin…
badrishc Feb 28, 2025
2b830b1
nit
badrishc Feb 28, 2025
d92360a
comments
badrishc Feb 28, 2025
8145607
remove manualLockingActive
badrishc Feb 28, 2025
4ba164c
update the barrier condition and remove checkpoint version switch bar…
badrishc Feb 28, 2025
aa36bf4
remove INTERMEDIATE state
badrishc Mar 1, 2025
6119e97
Merge remote-tracking branch 'origin/main' into badrishc/state-machin…
badrishc Mar 1, 2025
528c227
Remove CPR_SHIFT_DETECTED and LartchDestination.Retry
badrishc Mar 1, 2025
2067cb9
add black box test for checkpointing version switch state machine
badrishc Mar 3, 2025
174ae9a
add transaction test
badrishc Mar 3, 2025
8f72d00
clean the test
badrishc Mar 3, 2025
d304ecd
cleanup
badrishc Mar 3, 2025
36a5e18
Refactor the phases of various machines
badrishc Mar 4, 2025
0c68988
format
badrishc Mar 4, 2025
b4763c9
Merge remote-tracking branch 'origin/main' into badrishc/state-machin…
badrishc Mar 4, 2025
efd8bd2
initial commit
badrishc Mar 4, 2025
511fe75
remove sessionName
badrishc Mar 6, 2025
8ed6101
update LightEpoch based on PR comment
badrishc Mar 6, 2025
17f04ac
fix break
badrishc Mar 6, 2025
6dfc383
Use session-local isAcquiredLockable as signal for threads to decide …
badrishc Mar 6, 2025
f1ffaec
address review comments
badrishc Mar 6, 2025
3e3a377
nit
badrishc Mar 6, 2025
7c40678
Merge from base
badrishc Mar 6, 2025
cdd91f9
Merge remote-tracking branch 'origin/main' into badrishc/state-machin…
badrishc Mar 7, 2025
aa3113c
Merge branch 'badrishc/state-machine-v2' into badrishc/two-store-chec…
badrishc Mar 7, 2025
1c49869
minor code move
badrishc Mar 7, 2025
443b8ab
use shared epoch across stores
badrishc Mar 7, 2025
3071098
nit
badrishc Mar 7, 2025
aa4b628
Merge from main
badrishc Mar 7, 2025
f8a1899
add unified checkpointing logic to garnet
badrishc Mar 8, 2025
9930388
Merge remote-tracking branch 'origin/main' into badrishc/two-store-ch…
badrishc Mar 8, 2025
b6df00c
nit
badrishc Mar 8, 2025
5b4c646
fix
badrishc Mar 8, 2025
3e8c2c7
use correct SMD
badrishc Mar 8, 2025
576f92d
nit
badrishc Mar 8, 2025
76430c1
fix
badrishc Mar 8, 2025
7b7bec5
nit
badrishc Mar 8, 2025
bbad4f0
fix test as versions are different due to unified ckpt
badrishc Mar 8, 2025
af870fb
add comment
badrishc Mar 8, 2025
2bccee7
remove targetVersion from checkpoint API, versions always progress by 1.
badrishc Mar 9, 2025
cde0a07
non-working state
badrishc Mar 10, 2025
4f6f529
updates
badrishc Mar 10, 2025
135adde
nits
badrishc Mar 10, 2025
e563edf
fix state machine
badrishc Mar 10, 2025
eb3ffe3
fixes
badrishc Mar 10, 2025
1d573ce
fixes
badrishc Mar 10, 2025
7cacb54
fix
badrishc Mar 10, 2025
d105638
format
badrishc Mar 10, 2025
7dfadda
Merge remote-tracking branch 'origin/main' into badrishc/two-store-ch…
badrishc Mar 10, 2025
5afca31
add assert for safe index growth with locks
badrishc Mar 10, 2025
319fa96
Reinstating cpr_shift_detected instead of barrier'ing threads on PREP…
badrishc Mar 11, 2025
dd16582
fixes based on fuzz test
badrishc Mar 11, 2025
f001933
Merge remote-tracking branch 'origin/main' into badrishc/two-store-ch…
badrishc Mar 11, 2025
a9ae134
improve SMD test to use timing fuzzing
badrishc Mar 11, 2025
f40086d
add test for index grow, fix bugs
badrishc Mar 11, 2025
de1b28e
updates and clean up, improve the test to be multi-iteration.
badrishc Mar 11, 2025
2498616
fix test
badrishc Mar 11, 2025
53df55e
remove isAsync, clarify comment
badrishc Mar 12, 2025
a4934ad
sigh, fix format.
badrishc Mar 12, 2025
037daf7
nit
badrishc Mar 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libs/cluster/Server/ClusterConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ public ClusterConfig BumpLocalNodeConfigEpoch()
/// Check if sender has same local worker epoch as the receiver node and resolve collision.
/// </summary>
/// <param name="senderConfig">Incoming configuration object.</param>
/// <param name="logger"></param>
/// <returns>ClusterConfig object with updates.</returns>
public ClusterConfig HandleConfigEpochCollision(ClusterConfig senderConfig, ILogger logger = null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ internal sealed class ReplicationLogCheckpointManager(
public string RecoveredReplicationId = string.Empty;

readonly bool isMainStore = isMainStore;
public Action<bool, long, long> checkpointVersionShift;
public Action<bool, long, long> checkpointVersionShiftStart;
public Action<bool, long, long> checkpointVersionShiftEnd;

readonly bool safelyRemoveOutdated = removeOutdated;

public override void CheckpointVersionShift(long oldVersion, long newVersion)
{
checkpointVersionShift?.Invoke(isMainStore, oldVersion, newVersion);
}
public override void CheckpointVersionShiftStart(long oldVersion, long newVersion)
=> checkpointVersionShiftStart?.Invoke(isMainStore, oldVersion, newVersion);

public override void CheckpointVersionShiftEnd(long oldVersion, long newVersion)
=> checkpointVersionShiftEnd?.Invoke(isMainStore, oldVersion, newVersion);

public void DeleteLogCheckpoint(Guid logToken)
=> deviceFactory.Delete(checkpointNamingScheme.LogCheckpointBase(logToken));
Expand Down
20 changes: 17 additions & 3 deletions libs/cluster/Server/Replication/ReplicationManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,13 @@ public ReplicationManager(ClusterProvider clusterProvider, ILogger logger = null
ReplicationOffset = 0;

// Set the appendOnlyFile field for all stores
clusterProvider.GetReplicationLogCheckpointManager(StoreType.Main).checkpointVersionShift = CheckpointVersionShift;
clusterProvider.GetReplicationLogCheckpointManager(StoreType.Main).checkpointVersionShiftStart = CheckpointVersionShiftStart;
clusterProvider.GetReplicationLogCheckpointManager(StoreType.Main).checkpointVersionShiftEnd = CheckpointVersionShiftEnd;
if (storeWrapper.objectStore != null)
clusterProvider.GetReplicationLogCheckpointManager(StoreType.Object).checkpointVersionShift = CheckpointVersionShift;
{
clusterProvider.GetReplicationLogCheckpointManager(StoreType.Object).checkpointVersionShiftStart = CheckpointVersionShiftStart;
clusterProvider.GetReplicationLogCheckpointManager(StoreType.Object).checkpointVersionShiftEnd = CheckpointVersionShiftEnd;
}

// If this node starts as replica, it cannot serve requests until it is connected to primary
if (clusterProvider.clusterManager.CurrentConfig.LocalNodeRole == NodeRole.REPLICA && clusterProvider.serverOptions.Recover && !StartRecovery())
Expand Down Expand Up @@ -152,7 +156,7 @@ public ReplicationManager(ClusterProvider clusterProvider, ILogger logger = null

public string GetBufferPoolStats() => networkPool.GetStats();

void CheckpointVersionShift(bool isMainStore, long oldVersion, long newVersion)
void CheckpointVersionShiftStart(bool isMainStore, long oldVersion, long newVersion)
{
if (clusterProvider.clusterManager.CurrentConfig.LocalNodeRole == NodeRole.REPLICA)
return;
Expand All @@ -162,6 +166,16 @@ void CheckpointVersionShift(bool isMainStore, long oldVersion, long newVersion)
storeWrapper.EnqueueCommit(entryType, newVersion);
}

void CheckpointVersionShiftEnd(bool isMainStore, long oldVersion, long newVersion)
{
if (clusterProvider.clusterManager.CurrentConfig.LocalNodeRole == NodeRole.REPLICA)
return;
var entryType = clusterProvider.serverOptions.ReplicaDisklessSync ?
(isMainStore ? AofEntryType.MainStoreStreamingCheckpointEndCommit : AofEntryType.ObjectStoreStreamingCheckpointEndCommit) :
(isMainStore ? AofEntryType.MainStoreCheckpointEndCommit : AofEntryType.ObjectStoreCheckpointEndCommit);
storeWrapper.EnqueueCommit(entryType, newVersion);
}

/// <summary>
/// Acquire recovery and checkpoint locks to prevent checkpoints and parallel recovery tasks
/// </summary>
Expand Down
3 changes: 2 additions & 1 deletion libs/host/GarnetServer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ private void InitializeServer()
if (!setMax && !ThreadPool.SetMaxThreads(maxThreads, maxCPThreads))
throw new Exception($"Unable to call ThreadPool.SetMaxThreads with {maxThreads}, {maxCPThreads}");

opts.Initialize(loggerFactory);
CreateMainStore(clusterFactory, out var checkpointDir);
CreateObjectStore(clusterFactory, customCommandManager, checkpointDir, out var objectStoreSizeTracker);

Expand Down Expand Up @@ -324,7 +325,7 @@ private void CreateObjectStore(IClusterFactory clusterFactory, CustomCommandMana
objectStoreSizeTracker = null;
if (!opts.DisableObjects)
{
objKvSettings = opts.GetObjectStoreSettings(this.loggerFactory?.CreateLogger("TsavoriteKV [obj]"),
objKvSettings = opts.GetObjectStoreSettings(loggerFactory,
out var objHeapMemorySize, out var objReadCacheHeapMemorySize);

// Run checkpoint on its own thread to control p99
Expand Down
5 changes: 2 additions & 3 deletions libs/server/AOF/AofProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,10 @@ public unsafe void ProcessAofRecordInternal(byte* ptr, int length, bool asReplic
break;
case AofEntryType.MainStoreCheckpointStartCommit:
if (asReplica && header.storeVersion > storeWrapper.store.CurrentVersion)
_ = storeWrapper.TakeCheckpoint(false, StoreType.Main, logger);
_ = storeWrapper.TakeCheckpoint(false, logger);
break;
case AofEntryType.ObjectStoreCheckpointStartCommit:
if (asReplica && header.storeVersion > storeWrapper.objectStore.CurrentVersion)
_ = storeWrapper.TakeCheckpoint(false, StoreType.Object, logger);
// With unified checkpoint, we do not need to take object store checkpoint separately
break;
case AofEntryType.MainStoreCheckpointEndCommit:
case AofEntryType.ObjectStoreCheckpointEndCommit:
Expand Down
6 changes: 6 additions & 0 deletions libs/server/Lua/LuaRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1988,6 +1988,7 @@ internal int UnsafeRunPreambleForRunner()
private void RunInTransaction<TResponse>(ref TResponse response)
where TResponse : struct, IResponseAdapter
{
var txnVersion = respServerSession.storageSession.stateMachineDriver.AcquireTransactionVersion();
try
{
respServerSession.storageSession.lockableContext.BeginLockable();
Expand All @@ -1996,6 +1997,10 @@ private void RunInTransaction<TResponse>(ref TResponse response)
respServerSession.SetTransactionMode(true);
txnKeyEntries.LockAllKeys();

txnVersion = respServerSession.storageSession.stateMachineDriver.VerifyTransactionVersion(txnVersion);
respServerSession.storageSession.lockableContext.LocksAcquired(txnVersion);
if (!respServerSession.storageSession.objectStoreLockableContext.IsNull)
respServerSession.storageSession.objectStoreLockableContext.LocksAcquired(txnVersion);
RunCommon(ref response);
}
finally
Expand All @@ -2005,6 +2010,7 @@ private void RunInTransaction<TResponse>(ref TResponse response)
respServerSession.storageSession.lockableContext.EndLockable();
if (!respServerSession.storageSession.objectStoreLockableContext.IsNull)
respServerSession.storageSession.objectStoreLockableContext.EndLockable();
respServerSession.storageSession.stateMachineDriver.EndTransaction(txnVersion);
}
}

Expand Down
4 changes: 2 additions & 2 deletions libs/server/Resp/AdminCommands.cs
Original file line number Diff line number Diff line change
Expand Up @@ -818,7 +818,7 @@ private bool NetworkSAVE()
return AbortWithWrongNumberOfArguments(nameof(RespCommand.SAVE));
}

if (!storeWrapper.TakeCheckpoint(false, StoreType.All, logger))
if (!storeWrapper.TakeCheckpoint(false, logger))
{
while (!RespWriteUtils.TryWriteError("ERR checkpoint already in progress"u8, ref dcurr, dend))
SendAndReset();
Expand Down Expand Up @@ -853,7 +853,7 @@ private bool NetworkBGSAVE()
return AbortWithWrongNumberOfArguments(nameof(RespCommand.BGSAVE));
}

var success = storeWrapper.TakeCheckpoint(true, StoreType.All, logger);
var success = storeWrapper.TakeCheckpoint(true, logger);
if (success)
{
while (!RespWriteUtils.TryWriteSimpleString("Background saving started"u8, ref dcurr, dend))
Expand Down
2 changes: 1 addition & 1 deletion libs/server/Resp/RespServerSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ public RespServerSession(
// Associate new session with default user and automatically authenticate, if possible
this.AuthenticateUser(Encoding.ASCII.GetBytes(this.storeWrapper.accessControlList.GetDefaultUserHandle().User.Name));

txnManager = new TransactionManager(this, storageSession, scratchBufferManager, storeWrapper.serverOptions.EnableCluster, logger);
txnManager = new TransactionManager(this, storageSession, scratchBufferManager, storeWrapper.serverOptions.StateMachineDriver, storeWrapper.serverOptions.EnableCluster, logger);
storageSession.txnManager = txnManager;

clusterSession = storeWrapper.clusterProvider?.CreateClusterSession(txnManager, this._authenticator, this._userHandle, sessionMetrics, basicGarnetApi, networkSender, logger);
Expand Down
40 changes: 32 additions & 8 deletions libs/server/Servers/GarnetServerOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,16 @@ public class GarnetServerOptions : ServerOptions
/// </summary>
public UnixFileMode UnixSocketPermission { get; set; }

/// <summary>
/// Epoch instance used by server
/// </summary>
public LightEpoch Epoch;

/// <summary>
/// Common state machine driver used by Garnet
/// </summary>
public StateMachineDriver StateMachineDriver;

/// <summary>
/// Constructor
/// </summary>
Expand All @@ -454,6 +464,16 @@ public GarnetServerOptions(ILogger logger = null) : base(logger)
this.logger = logger;
}

/// <summary>
/// Initialize Garnet server options
/// </summary>
/// <param name="loggerFactory"></param>
public void Initialize(ILoggerFactory loggerFactory = null)
{
Epoch = new LightEpoch();
StateMachineDriver = new(Epoch, loggerFactory?.CreateLogger($"StateMachineDriver"));
}

/// <summary>
/// Get main store settings
/// </summary>
Expand All @@ -466,15 +486,16 @@ public KVSettings<SpanByte, SpanByte> GetSettings(ILoggerFactory loggerFactory,
if (MutablePercent is < 10 or > 95)
throw new Exception("MutablePercent must be between 10 and 95");

KVSettings<SpanByte, SpanByte> kvSettings = new(baseDir: null, logger: logger);

var indexCacheLines = IndexSizeCachelines("hash index size", IndexSize);
kvSettings = new()

KVSettings<SpanByte, SpanByte> kvSettings = new()
{
IndexSize = indexCacheLines * 64L,
PreallocateLog = false,
MutableFraction = MutablePercent / 100.0,
PageSize = 1L << PageSizeBits(),
Epoch = Epoch,
StateMachineDriver = StateMachineDriver,
loggerFactory = loggerFactory,
logger = loggerFactory?.CreateLogger("TsavoriteKV [main]")
};
Expand Down Expand Up @@ -618,23 +639,26 @@ public static int MemorySizeBits(string memorySize, string storePageSize, out in
/// <summary>
/// Get KVSettings for the object store log
/// </summary>
public KVSettings<byte[], IGarnetObject> GetObjectStoreSettings(ILogger logger, out long objHeapMemorySize, out long objReadCacheHeapMemorySize)
public KVSettings<byte[], IGarnetObject> GetObjectStoreSettings(ILoggerFactory loggerFactory, out long objHeapMemorySize, out long objReadCacheHeapMemorySize)
{
objReadCacheHeapMemorySize = default;

if (ObjectStoreMutablePercent is < 10 or > 95)
throw new Exception("ObjectStoreMutablePercent must be between 10 and 95");

KVSettings<byte[], IGarnetObject> kvSettings = new(baseDir: null, logger: logger);

var indexCacheLines = IndexSizeCachelines("object store hash index size", ObjectStoreIndexSize);
kvSettings = new()
KVSettings<byte[], IGarnetObject> kvSettings = new()
{
IndexSize = indexCacheLines * 64L,
PreallocateLog = false,
MutableFraction = ObjectStoreMutablePercent / 100.0,
PageSize = 1L << ObjectStorePageSizeBits()
PageSize = 1L << ObjectStorePageSizeBits(),
Epoch = Epoch,
StateMachineDriver = StateMachineDriver,
loggerFactory = loggerFactory,
logger = loggerFactory?.CreateLogger("TsavoriteKV [obj]")
};

logger?.LogInformation("[Object Store] Using page size of {PageSize}", PrettySize(kvSettings.PageSize));
logger?.LogInformation("[Object Store] Each page can hold ~{PageSize} key-value pairs of objects", kvSettings.PageSize / 24);

Expand Down
3 changes: 2 additions & 1 deletion libs/server/Storage/Session/StorageSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ sealed partial class StorageSession : IDisposable
public readonly FunctionsState functionsState;

public TransactionManager txnManager;
public StateMachineDriver stateMachineDriver;
readonly ILogger logger;
private readonly CollectionItemBroker itemBroker;

Expand All @@ -64,7 +65,7 @@ public StorageSession(StoreWrapper storeWrapper,
this.scratchBufferManager = scratchBufferManager;
this.logger = logger;
this.itemBroker = storeWrapper.itemBroker;

this.stateMachineDriver = storeWrapper.serverOptions.StateMachineDriver;
parseState.Initialize();

functionsState = storeWrapper.CreateFunctionsState();
Expand Down
Loading