@@ -16,16 +16,27 @@ import (
16
16
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
17
17
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
18
18
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
19
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftlog"
19
20
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary/rspb"
20
21
"github.com/cockroachdb/cockroach/pkg/roachpb"
21
22
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
23
+ "github.com/cockroachdb/cockroach/pkg/util"
22
24
"github.com/cockroachdb/cockroach/pkg/util/hlc"
23
25
"github.com/cockroachdb/cockroach/pkg/util/log"
24
26
"github.com/cockroachdb/cockroach/pkg/util/stop"
25
27
"github.com/cockroachdb/errors"
26
28
"go.etcd.io/raft/v3"
27
29
)
28
30
31
+ // useReproposalsV2 activates prototype code that instead of reproposing using a
32
+ // modified lease index makes a new proposal (different CmdID), transfers the
33
+ // waiting caller (if any) to it, and proposes that. With this strategy, the
34
+ // *RaftCommand associated to a proposal becomes immutable, which simplifies the
35
+ // mental model and allows various simplifications in the proposal pipeline. For
36
+ // now, the old and new behavior coexist, and we want to keep exercising both.
37
+ // Once we have confidence, we'll hard-code true and remove all old code paths.
38
+ var useReproposalsV2 = util .ConstantWithMetamorphicTestBool ("reproposals-v2" , true )
39
+
29
40
// replica_application_*.go files provide concrete implementations of
30
41
// the interfaces defined in the storage/apply package:
31
42
//
@@ -105,6 +116,16 @@ func (r *Replica) prepareLocalResult(ctx context.Context, cmd *replicatedCmd) {
105
116
case kvserverbase .ProposalRejectionPermanent :
106
117
cmd .response .Err = pErr
107
118
case kvserverbase .ProposalRejectionIllegalLeaseIndex :
119
+ if useReproposalsV2 {
120
+ // If we're using V2 reproposals, this proposal is actually going to
121
+ // be fully rejected, but the client won't be listening to it at that
122
+ // point any more. But we should set the error. (This ends up being
123
+ // inconsequential but it's the right thing to do).
124
+ //
125
+ // TODO(tbg): once useReproposalsV2 is baked in, set the error unconditionally
126
+ // above the `switch`.
127
+ cmd .response .Err = pErr
128
+ }
108
129
// Reset the error as it's now going to be determined by the outcome of
109
130
// reproposing (or not); note that tryReproposeWithNewLeaseIndex will
110
131
// return `nil` if the entry is not eligible for reproposals.
@@ -171,7 +192,18 @@ func (r *Replica) prepareLocalResult(ctx context.Context, cmd *replicatedCmd) {
171
192
}
172
193
}
173
194
if pErr == nil { // since we might have injected an error
174
- pErr = r .tryReproposeWithNewLeaseIndex (ctx , cmd )
195
+ if useReproposalsV2 {
196
+ pErr = kvpb .NewError (r .tryReproposeWithNewLeaseIndexV2 (ctx , cmd ))
197
+ if pErr == nil {
198
+ // Avoid falling through below. We managed to repropose, but this
199
+ // proposal is still erroring out. We don't want to assign to
200
+ // localResult. If there is an error though, we do fall through into
201
+ // the existing tangle of correct but unreadable handling below.
202
+ return
203
+ }
204
+ } else {
205
+ pErr = r .tryReproposeWithNewLeaseIndex (ctx , cmd )
206
+ }
175
207
}
176
208
177
209
if pErr != nil {
@@ -210,10 +242,16 @@ func (r *Replica) prepareLocalResult(ctx context.Context, cmd *replicatedCmd) {
210
242
// https://github.com/cockroachdb/cockroach/issues/97633
211
243
log .Infof (ctx , "failed to repropose %s at idx %d with new lease index: %s" , cmd .ID , cmd .Index (), pErr )
212
244
cmd .response .Err = pErr
213
- } else {
245
+ // Fall through.
246
+ } else if ! useReproposalsV2 {
214
247
// Unbind the entry's local proposal because we just succeeded
215
248
// in reproposing it and we don't want to acknowledge the client
216
249
// yet.
250
+ //
251
+ // NB: in v2, reproposing already moved the waiting caller over to a new
252
+ // proposal, and by design we don't change the "Localness" of the old
253
+ // proposal mid-application but instead let it fail as a local proposal
254
+ // (which signals into an throwaway channel).
217
255
cmd .proposal = nil
218
256
return
219
257
}
@@ -225,6 +263,14 @@ func (r *Replica) prepareLocalResult(ctx context.Context, cmd *replicatedCmd) {
225
263
} else {
226
264
log .Fatalf (ctx , "proposal must return either a reply or an error: %+v" , cmd .proposal )
227
265
}
266
+
267
+ // The current proposal has no error (and wasn't reproposed successfully or we
268
+ // would've early returned already) OR it has an error AND we failed to
269
+ // repropose it.
270
+ //
271
+ // TODO(tbg): it doesn't make sense to assign to `cmd.response` unconditionally.
272
+ // We're returning an error; the response should be nil. The error tracking in
273
+ // this method should be cleaned up.
228
274
cmd .response .EncounteredIntents = cmd .proposal .Local .DetachEncounteredIntents ()
229
275
cmd .response .EndTxns = cmd .proposal .Local .DetachEndTxns (pErr != nil )
230
276
if pErr == nil {
@@ -234,6 +280,134 @@ func (r *Replica) prepareLocalResult(ctx context.Context, cmd *replicatedCmd) {
234
280
}
235
281
}
236
282
283
+ func (r * Replica ) tryReproposeWithNewLeaseIndexV2 (
284
+ ctx context.Context , origCmd * replicatedCmd ,
285
+ ) error {
286
+ // NB: `origCmd` remains "Local". It's just not going to signal anyone
287
+ // or release any latches.
288
+
289
+ origP := origCmd .proposal
290
+
291
+ // We want to move a few items from origCmd to the new command, but only if we
292
+ // managed to propose the new command. For example, if we move the latches
293
+ // over too early but then fail to actually get the new proposal started, the
294
+ // old proposal will not release the latches. This would result in a lost
295
+ // latch.
296
+ var success bool
297
+
298
+ // Go through the original proposal field by field and decide what transfers
299
+ // to the new proposal (and how that affects the old proposal). The overall
300
+ // goal is that the old proposal remains a local proposal (switching it to
301
+ // non-local now invites logic bugs) but not bound to the caller.
302
+
303
+ // NB: quotaAlloc is always nil here, because we already
304
+ // released the quota unconditionally in retrieveLocalProposalsV2.
305
+ // So the below is a no-op.
306
+ //
307
+ // TODO(tbg): if we shifted the release of proposal quota to *after*
308
+ // successful application, we could move the quota over
309
+ // prematurely releasing it here.
310
+ newQuotaAlloc := origP .quotaAlloc
311
+ defer func () {
312
+ if success {
313
+ origP .quotaAlloc = nil
314
+ }
315
+ }()
316
+
317
+ newCommand := kvserverpb.RaftCommand {
318
+ ProposerLeaseSequence : origP .command .ProposerLeaseSequence ,
319
+ DeprecatedProposerLease : origP .command .DeprecatedProposerLease ,
320
+ ReplicatedEvalResult : origP .command .ReplicatedEvalResult ,
321
+ WriteBatch : origP .command .WriteBatch ,
322
+ LogicalOpLog : origP .command .LogicalOpLog ,
323
+ TraceData : origP .command .TraceData ,
324
+
325
+ MaxLeaseIndex : 0 , // assigned on flush
326
+ ClosedTimestamp : nil , // assigned on flush
327
+ AdmissionPriority : 0 , // assigned on flush
328
+ AdmissionCreateTime : 0 , // assigned on flush
329
+ AdmissionOriginNode : 0 , // assigned on flush
330
+ }
331
+
332
+ // Now we construct the remainder of the ProposalData. First, the pieces
333
+ // that actively "move over", i.e. those that have to do with the latches
334
+ // held and the caller waiting to be signaled.
335
+
336
+ // `ec` (latches, etc) transfers to the new proposal.
337
+ newEC := origP .ec
338
+ defer func () {
339
+ if success {
340
+ origP .ec = endCmds {}
341
+ }
342
+ }()
343
+
344
+ // Ditto doneCh (signal to proposer).
345
+ newDoneCh := origP .doneCh
346
+ defer func () {
347
+ if success {
348
+ origP .doneCh = nil
349
+ }
350
+ }()
351
+
352
+ r .mu .RLock ()
353
+ ticks := r .mu .ticks
354
+ r .mu .RUnlock ()
355
+
356
+ // TODO(tbg): work on the lifecycle of ProposalData. This struct (and the
357
+ // surrounding replicatedCmd) are populated in an overly ad-hoc manner.
358
+ // TODO(tbg): the fields are spelled out here to make explicit what is being copied
359
+ // here. Add a unit test that fails on addition of a new field and points at the
360
+ // need to double check what the intended behavior of the new field in this method
361
+ // is.
362
+ newProposal := & ProposalData {
363
+ // The proposal's context and span carry over. Recall that they are *not*
364
+ // used for command application; `cmd.{ctx,sp}` are; and since this last
365
+ // span "follows from" the proposal's span, if the proposal sticks around
366
+ // for (some reincarnation of) the command to eventually apply, its trace
367
+ // will reflect the reproposal as well.
368
+ ctx : origP .ctx ,
369
+ sp : origP .sp , // NB: special handling below
370
+ idKey : raftlog .MakeCmdIDKey (),
371
+ proposedAtTicks : 0 , // set in registerProposalLocked
372
+ createdAtTicks : ticks ,
373
+ command : & newCommand ,
374
+ quotaAlloc : newQuotaAlloc ,
375
+ ec : newEC ,
376
+ applied : false ,
377
+ doneCh : newDoneCh ,
378
+ // Local is copied over. It won't be used on the old proposal (since that
379
+ // proposal got rejected), but since it's still "local" we don't want to put
380
+ // it into an undefined state by removing its response. The same goes for
381
+ // Request.
382
+ Local : origP .Local ,
383
+ Request : origP .Request ,
384
+ leaseStatus : origP .leaseStatus ,
385
+ tok : TrackedRequestToken {}, // filled in in `propose`
386
+ encodedCommand : nil ,
387
+ raftAdmissionMeta : nil ,
388
+ v2SeenDuringApplication : false ,
389
+ }
390
+ // If the original proposal had an explicit span, it's an async consensus
391
+ // proposal and the span would be finished momentarily (when we return to
392
+ // the caller) if we didn't unlink it here, but we want it to continue
393
+ // tracking newProposal. We leave it in `origP.ctx` though, since that
394
+ // context will become unused once the application of this (soft-failed)
395
+ // proposal concludes, i.e. soon after this method returns, in case there
396
+ // is anything left to log into it.
397
+ defer func () {
398
+ if success {
399
+ origP .sp = nil
400
+ }
401
+ }()
402
+
403
+ if err := r .tryReproposeWithNewLeaseIndexShared (ctx , newProposal ).GoError (); err != nil {
404
+ return err
405
+ }
406
+
407
+ success = true
408
+ return nil
409
+ }
410
+
237
411
// tryReproposeWithNewLeaseIndex is used by prepareLocalResult to repropose
238
412
// commands that have gotten an illegal lease index error, and that we know
239
413
// could not have applied while their lease index was valid (that is, we
@@ -267,7 +441,12 @@ func (r *Replica) tryReproposeWithNewLeaseIndex(
267
441
// succeeding in the Raft log for a given command.
268
442
return nil
269
443
}
444
+ return r .tryReproposeWithNewLeaseIndexShared (ctx , cmd .proposal )
445
+ }
270
446
447
+ func (r * Replica ) tryReproposeWithNewLeaseIndexShared (
448
+ ctx context.Context , p * ProposalData ,
449
+ ) * kvpb.Error {
271
450
// We need to track the request again in order to protect its timestamp until
272
451
// it gets reproposed.
273
452
// TODO(andrei): Only track if the request consults the ts cache. Some
@@ -299,7 +478,7 @@ func (r *Replica) tryReproposeWithNewLeaseIndex(
299
478
if pErr != nil {
300
479
return pErr
301
480
}
302
- log .VEventf (ctx , 2 , "reproposed command %x" , cmd . ID )
481
+ log .VEventf (ctx , 2 , "reproposed command %x" , p . idKey )
303
482
return nil
304
483
}
305
484
0 commit comments