Skip to content

Commit 5c79f40

Browse files
Alexander Aringgregkh
authored andcommitted
dlm: fix missing lkb refcount handling
commit 1689c169134f4b5a39156122d799b7dca76d8ddb upstream. We always call hold_lkb(lkb) if we increment lkb->lkb_wait_count. So, we always need to call unhold_lkb(lkb) if we decrement lkb->lkb_wait_count. This patch will add missing unhold_lkb(lkb) if we decrement lkb->lkb_wait_count. In case of setting lkb->lkb_wait_count to zero we need to countdown until reaching zero and call unhold_lkb(lkb). The waiters list unhold_lkb(lkb) can be removed because it's done for the last lkb_wait_count decrement iteration as it's done in _remove_from_waiters(). This issue was discovered by a dlm gfs2 test case which use excessively dlm_unlock(LKF_CANCEL) feature. Probably the lkb->lkb_wait_count value never reached above 1 if this feature isn't used and so it was not discovered before. The testcase ended in a rsb on the rsb keep data structure with a refcount of 1 but no lkb was associated with it, which is itself an invalid behaviour. A side effect of that was a condition in which the dlm was sending remove messages in a looping behaviour. With this patch that has not been reproduced. Cc: [email protected] Signed-off-by: Alexander Aring <[email protected]> Signed-off-by: David Teigland <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 2c55155 commit 5c79f40

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

fs/dlm/lock.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
15551555
lkb->lkb_wait_type = 0;
15561556
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
15571557
lkb->lkb_wait_count--;
1558+
unhold_lkb(lkb);
15581559
goto out_del;
15591560
}
15601561

@@ -1581,6 +1582,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
15811582
log_error(ls, "remwait error %x reply %d wait_type %d overlap",
15821583
lkb->lkb_id, mstype, lkb->lkb_wait_type);
15831584
lkb->lkb_wait_count--;
1585+
unhold_lkb(lkb);
15841586
lkb->lkb_wait_type = 0;
15851587
}
15861588

@@ -5314,11 +5316,16 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
53145316
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
53155317
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
53165318
lkb->lkb_wait_type = 0;
5317-
lkb->lkb_wait_count = 0;
5319+
/* drop all wait_count references we still
5320+
* hold a reference for this iteration.
5321+
*/
5322+
while (lkb->lkb_wait_count) {
5323+
lkb->lkb_wait_count--;
5324+
unhold_lkb(lkb);
5325+
}
53185326
mutex_lock(&ls->ls_waiters_mutex);
53195327
list_del_init(&lkb->lkb_wait_reply);
53205328
mutex_unlock(&ls->ls_waiters_mutex);
5321-
unhold_lkb(lkb); /* for waiters list */
53225329

53235330
if (oc || ou) {
53245331
/* do an unlock or cancel instead of resending */

0 commit comments

Comments
 (0)