Skip to content

Commit f8457fb

Browse files
authored
Fix deadlock on dmu_tx_assign() from vdev_rebuild()
vdev_rebuild() is always called with spa_config_lock held in RW_WRITER mode. However, when it tries to call dmu_tx_assign() the latter may hang on dmu_tx_wait() waiting for available txg. But that available txg may not happen because txg_sync takes spa_config_lock in order to process the current txg. So we have a deadlock case here: - dmu_tx_assign() waits for txg holding spa_config_lock; - txg_sync waits for spa_config_lock not progressing with txg. Here are the stacks: __schedule+0x24e/0x590 schedule+0x69/0x110 cv_wait_common+0xf8/0x130 [spl] __cv_wait+0x15/0x20 [spl] dmu_tx_wait+0x8e/0x1e0 [zfs] dmu_tx_assign+0x49/0x80 [zfs] vdev_rebuild_initiate+0x39/0xc0 [zfs] vdev_rebuild+0x84/0x90 [zfs] spa_vdev_attach+0x305/0x680 [zfs] zfs_ioc_vdev_attach+0xc7/0xe0 [zfs] cv_wait_common+0xf8/0x130 [spl] __cv_wait+0x15/0x20 [spl] spa_config_enter+0xf9/0x120 [zfs] spa_sync+0x6d/0x5b0 [zfs] txg_sync_thread+0x266/0x2f0 [zfs] The solution is to pass txg returned by spa_vdev_enter(spa) at the top of spa_vdev_attach() to vdev_rebuild() and call dmu_tx_create_assigned(txg) which doesn't wait for txg. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Akash B <[email protected]> Reviewed-by: Alek Pinchuk <[email protected]> Signed-off-by: Andriy Tkachuk <[email protected]> Closes #18210 Closes #18258
1 parent f3d4c79 commit f8457fb

File tree

3 files changed

+6
-7
lines changed

3 files changed

+6
-7
lines changed

include/sys/vdev_rebuild.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ typedef struct vdev_rebuild {
9090
boolean_t vdev_rebuild_active(vdev_t *);
9191

9292
int vdev_rebuild_load(vdev_t *);
93-
void vdev_rebuild(vdev_t *);
93+
void vdev_rebuild(vdev_t *, uint64_t);
9494
void vdev_rebuild_stop_wait(vdev_t *);
9595
void vdev_rebuild_stop_all(spa_t *);
9696
void vdev_rebuild_restart(spa_t *);

module/zfs/spa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8484,7 +8484,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
84848484
if (rebuild) {
84858485
newvd->vdev_rebuild_txg = txg;
84868486

8487-
vdev_rebuild(tvd);
8487+
vdev_rebuild(tvd, txg);
84888488
} else {
84898489
newvd->vdev_resilver_txg = txg;
84908490

module/zfs/vdev_rebuild.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,16 +278,15 @@ vdev_rebuild_log_notify(spa_t *spa, vdev_t *vd, const char *name)
278278
* active for the duration of the rebuild, then revert to the enabled state.
279279
*/
280280
static void
281-
vdev_rebuild_initiate(vdev_t *vd)
281+
vdev_rebuild_initiate(vdev_t *vd, uint64_t txg)
282282
{
283283
spa_t *spa = vd->vdev_spa;
284284

285285
ASSERT(vd->vdev_top == vd);
286286
ASSERT(MUTEX_HELD(&vd->vdev_rebuild_lock));
287287
ASSERT(!vd->vdev_rebuilding);
288288

289-
dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
290-
VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND));
289+
dmu_tx_t *tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
291290

292291
vd->vdev_rebuilding = B_TRUE;
293292

@@ -1015,7 +1014,7 @@ vdev_rebuild_active(vdev_t *vd)
10151014
* top-level vdev is currently actively rebuilding.
10161015
*/
10171016
void
1018-
vdev_rebuild(vdev_t *vd)
1017+
vdev_rebuild(vdev_t *vd, uint64_t txg)
10191018
{
10201019
vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
10211020
vdev_rebuild_phys_t *vrp __maybe_unused = &vr->vr_rebuild_phys;
@@ -1039,7 +1038,7 @@ vdev_rebuild(vdev_t *vd)
10391038
if (!vd->vdev_rebuild_reset_wanted)
10401039
vd->vdev_rebuild_reset_wanted = B_TRUE;
10411040
} else {
1042-
vdev_rebuild_initiate(vd);
1041+
vdev_rebuild_initiate(vd, txg);
10431042
}
10441043
mutex_exit(&vd->vdev_rebuild_lock);
10451044
}

0 commit comments

Comments
 (0)