48
48
DEFINE_test_flag (bool , assert_olm_empty_locks_map, false ,
49
49
" When set, asserts that the local locks map is empty at shutdown. Used in tests "
50
50
" to assert sanity, where tserver doesn't loose YSQL lease and all connections release "
51
- " acquired/timedout /errored locks." );
51
+ " acquired/tryagain /errored locks." );
52
52
53
53
DEFINE_test_flag (bool , olm_skip_scheduling_waiter_resumption, false ,
54
54
" When set, don't signal potential waiters for resumption" );
55
55
56
- // TODO(bkolagani): Default flag to false once issues with deadlock detection are resolved.
57
- DEFINE_test_flag (bool , olm_skip_sending_wait_for_probes, true ,
56
+ DEFINE_test_flag (bool , olm_skip_sending_wait_for_probes, false ,
58
57
" When set, the lock manager doesn't send wait-for probres to the local waiting txn registry, "
59
58
" essentially giving away deadlock detection." );
60
59
@@ -71,8 +70,8 @@ namespace {
71
70
const Status kShuttingDownError = STATUS(
72
71
ShutdownInProgress, " Object Lock Manager shutting down" );
73
72
74
- const Status kTimedOut = STATUS(
75
- TimedOut , " Failed to acquire object locks within deadline" );
73
+ const Status kTryAgain = STATUS(
74
+ TryAgain , " Failed to acquire object locks within deadline" );
76
75
77
76
const Status kTxnExpired = STATUS(
78
77
Expired, " Transaction expired, all acquired object locks have been released" );
@@ -191,6 +190,10 @@ struct WaiterEntry {
191
190
return lock_data.deadline ;
192
191
}
193
192
193
+ std::string ToString () const {
194
+ return YB_STRUCT_TO_STRING (lock_data);
195
+ }
196
+
194
197
TrackedTxnLockEntryPtr transaction_entry;
195
198
LockData lock_data;
196
199
size_t resume_it_offset;
@@ -209,7 +212,6 @@ using WaiterEntryPtr = std::shared_ptr<WaiterEntry>;
209
212
210
213
struct StartUsTag ;
211
214
struct DeadlineTag ;
212
- struct OwnerTag ;
213
215
using Waiters = boost::multi_index_container<
214
216
WaiterEntryPtr,
215
217
boost::multi_index::indexed_by<
@@ -220,12 +222,6 @@ using Waiters = boost::multi_index_container<
220
222
boost::multi_index::ordered_non_unique<
221
223
boost::multi_index::tag<DeadlineTag>,
222
224
boost::multi_index::const_mem_fun<WaiterEntry, CoarseTimePoint, &WaiterEntry::deadline>
223
- >,
224
- boost::multi_index::hashed_non_unique<
225
- boost::multi_index::tag<OwnerTag>,
226
- boost::multi_index::const_mem_fun<
227
- WaiterEntry, const TransactionId&, &WaiterEntry::txn_id
228
- >
229
225
>
230
226
>
231
227
>;
@@ -270,7 +266,7 @@ class ObjectLockManagerImpl {
270
266
271
267
void Lock (LockData&& data);
272
268
273
- void Unlock (const ObjectLockOwner& object_lock_owner, Status resume_with_status );
269
+ void Unlock (const ObjectLockOwner& object_lock_owner);
274
270
275
271
void Poll () EXCLUDES(global_mutex_);
276
272
@@ -328,13 +324,6 @@ class ObjectLockManagerImpl {
328
324
TrackedTransactionLockEntry::LockEntryMap& locks_map,
329
325
TrackedTxnLockEntryPtr& txn_entry) REQUIRES(global_mutex_, txn_entry->mutex);
330
326
331
- void SignalTerminateFinishedWaiters (
332
- const ObjectLockOwner& object_lock_owner, TrackedTxnLockEntryPtr& txn_entry,
333
- Status resume_with_status) REQUIRES(global_mutex_, txn_entry->mutex);
334
-
335
- void DoSignalTerminateFinishedWaiters (
336
- ObjectLockedBatchEntry* entry, TransactionId txn_id, Status resume_with_status);
337
-
338
327
bool UnlockSingleEntry (const LockBatchEntry<ObjectLockManager>& lock_entry);
339
328
340
329
bool DoUnlockSingleEntry (ObjectLockedBatchEntry& entry, LockState sub);
@@ -348,9 +337,6 @@ class ObjectLockManagerImpl {
348
337
349
338
void DoSignal (ObjectLockedBatchEntry* entry);
350
339
351
- void DoTerminateFinishedWaiters (
352
- ObjectLockedBatchEntry* entry, TransactionId txn_id, Status resume_with_status);
353
-
354
340
void DoComputeBlockersWithinQueue (
355
341
ObjectLockedBatchEntry* locked_batch_entry, std::optional<ObjectLockPrefix>& key,
356
342
LockStateBlockersMap& lockstate_blocker_map);
@@ -518,7 +504,7 @@ Status ObjectLockManagerImpl::MakePrepareAcquireResult(
518
504
const LockData& data, Status resume_with_status) {
519
505
RETURN_NOT_OK (resume_with_status);
520
506
if (data.deadline < CoarseMonoClock::Now ()) {
521
- return kTimedOut ;
507
+ return kTryAgain ;
522
508
}
523
509
if (shutdown_in_progress_) {
524
510
return kShuttingDownError ;
@@ -644,8 +630,7 @@ bool ObjectLockManagerImpl::DoLockSingleEntry(
644
630
}
645
631
}
646
632
647
- void ObjectLockManagerImpl::Unlock (const ObjectLockOwner& object_lock_owner,
648
- Status resume_with_status) {
633
+ void ObjectLockManagerImpl::Unlock (const ObjectLockOwner& object_lock_owner) {
649
634
TRACE (" Unlocking all keys for owner $0" , AsString (object_lock_owner));
650
635
651
636
TrackedTxnLockEntryPtr txn_entry;
@@ -672,11 +657,17 @@ void ObjectLockManagerImpl::Unlock(const ObjectLockOwner& object_lock_owner,
672
657
std::lock_guard lock (global_mutex_);
673
658
UniqueLock txn_lock (txn_entry->mutex );
674
659
DoUnlock (object_lock_owner, txn_entry->granted_locks , txn_entry);
675
- // Terminate any obsolete waiting lock request for this txn/subtxn. This could happen when
676
- // 1. txn gets aborted due to a deadlock and the pg backend issues a finish txn request
677
- // 2. txn times out due to conflict and pg backend issues a finish txn request before the
678
- // lock manager times out the waiting lock request.
679
- SignalTerminateFinishedWaiters (object_lock_owner, txn_entry, resume_with_status);
660
+ // We let the obsolete waiting lock request for this txn/subtxn, if any, to timeout and be resumed
661
+ // as part of ObjectLockManagerImpl::Poll. This should be okay since:
662
+ // 1. Obsolete waiting request could exist when txn times out due to conflict and pg backend
663
+ // issues a finish txn request before the lock manager times out the obsolete request. Since
664
+ // the obsolete waiting request would anyways be past the deadline, it would be resumed soon.
665
+ // 2. On abort due to txn deadlock, we anyways don't send an early release all request.
666
+ // PgClientSession waits for the previous lock req deadline (FLAGS_refresh_waiter_timeout_ms)
667
+ // and then drops the retry since the txn failed.
668
+ //
669
+ // If there's any requirement to early terminate obsolete waiters based on txn id, then we should
670
+ // signal appropriately here.
680
671
}
681
672
682
673
void ObjectLockManagerImpl::DoUnlock (
@@ -703,73 +694,6 @@ void ObjectLockManagerImpl::DoUnlock(
703
694
}
704
695
}
705
696
706
- void ObjectLockManagerImpl::SignalTerminateFinishedWaiters (
707
- const ObjectLockOwner& object_lock_owner,
708
- TrackedTxnLockEntryPtr& txn_entry,
709
- Status resume_with_status) {
710
- auto & locks_map = txn_entry->waiting_locks ;
711
- if (object_lock_owner.subtxn_id ) {
712
- auto subtxn_itr = locks_map.find (object_lock_owner.subtxn_id );
713
- if (subtxn_itr == locks_map.end ()) {
714
- return ;
715
- }
716
- for (auto itr = subtxn_itr->second .begin (); itr != subtxn_itr->second .end (); itr++) {
717
- DoSignalTerminateFinishedWaiters (
718
- &itr->second .locked_batch_entry , object_lock_owner.txn_id , resume_with_status);
719
- }
720
- return ;
721
- }
722
- for (auto locks_itr = locks_map.begin (); locks_itr != locks_map.end (); locks_itr++) {
723
- for (auto itr = locks_itr->second .begin (); itr != locks_itr->second .end (); itr++) {
724
- DoSignalTerminateFinishedWaiters (
725
- &itr->second .locked_batch_entry , object_lock_owner.txn_id , resume_with_status);
726
- }
727
- }
728
- }
729
-
730
- void ObjectLockManagerImpl::DoSignalTerminateFinishedWaiters (
731
- ObjectLockedBatchEntry* entry, TransactionId txn_id, Status resume_with_status) {
732
- WARN_NOT_OK (
733
- thread_pool_token_->SubmitFunc (
734
- std::bind (&ObjectLockManagerImpl::DoTerminateFinishedWaiters, this , entry, txn_id,
735
- resume_with_status)),
736
- " Failure submitting task ObjectLockManagerImpl::DoTerminateFinishedWaiters" );
737
- }
738
-
739
- void ObjectLockManagerImpl::DoTerminateFinishedWaiters (
740
- ObjectLockedBatchEntry* entry, TransactionId txn_id, Status resume_with_status) {
741
- std::vector<WaiterEntryPtr> waiters_failed_to_schedule;
742
- {
743
- std::lock_guard l (entry->mutex );
744
- auto & index = entry->wait_queue .get <OwnerTag>();
745
- auto it_range = index.equal_range (txn_id);
746
- auto it = it_range.first ;
747
- auto * messenger = server_.messenger ();
748
- while (it != it_range.second ) {
749
- auto waiter_entry = *it;
750
- it = index.erase (it);
751
- waiter_entry->waiter_registration .reset ();
752
- entry->waiting_state -= IntentTypeSetAdd (waiter_entry->resume_it ()->intent_types );
753
- VLOG (1 ) << " Resuming " << AsString (waiter_entry->object_lock_owner ());
754
- if (PREDICT_TRUE (messenger)) {
755
- ScopedOperation resuming_waiter_op (&waiters_amidst_resumption_on_messenger_);
756
- messenger->ThreadPool ().EnqueueFunctor (
757
- [operation = std::move (resuming_waiter_op), entry = std::move (waiter_entry),
758
- lock_manager = this , resume_with_status]() {
759
- entry->Resume (lock_manager, resume_with_status);
760
- });
761
- } else {
762
- // Don't schedule anything here on thread_pool_token_ as a shutdown could destroy tasks.
763
- LOG_WITH_FUNC (WARNING) << " Messenger not available" ;
764
- waiters_failed_to_schedule.push_back (std::move (waiter_entry));
765
- }
766
- }
767
- }
768
- for (auto & waiter : waiters_failed_to_schedule) {
769
- waiter->Resume (this , resume_with_status);
770
- }
771
- }
772
-
773
697
bool ObjectLockManagerImpl::UnlockSingleEntry (const LockBatchEntry<ObjectLockManager>& lock_entry) {
774
698
TRACE_FUNC ();
775
699
return DoUnlockSingleEntry (*lock_entry.locked , IntentTypeSetAdd (lock_entry.intent_types ));
@@ -797,7 +721,7 @@ void ObjectLockManagerImpl::Poll() {
797
721
}
798
722
}
799
723
for (auto & waiter : timed_out_waiters) {
800
- waiter->Resume (this , kTimedOut );
724
+ waiter->Resume (this , kTryAgain );
801
725
}
802
726
}
803
727
@@ -1151,9 +1075,8 @@ void ObjectLockManager::Lock(LockData&& data) {
1151
1075
impl_->Lock (std::move (data));
1152
1076
}
1153
1077
1154
- void ObjectLockManager::Unlock (
1155
- const ObjectLockOwner& object_lock_owner, Status resume_with_status) {
1156
- impl_->Unlock (object_lock_owner, resume_with_status);
1078
+ void ObjectLockManager::Unlock (const ObjectLockOwner& object_lock_owner) {
1079
+ impl_->Unlock (object_lock_owner);
1157
1080
}
1158
1081
1159
1082
void ObjectLockManager::Poll () {
0 commit comments