Skip to content

Commit a5a9411

Browse files
committed
Add knob to disable slow io notifications
Introduce a new vdev property `VDEV_PROP_SLOW_IO_REPORTING` that allows users to disable notifications for slow devices. This prevents ZED and/or ZFSD from degrading the pool due to slow I/O. Signed-off-by: Mariusz Zaborski <[email protected]>
1 parent 4c2a7f8 commit a5a9411

File tree

10 files changed

+123
-33
lines changed

10 files changed

+123
-33
lines changed

include/sys/fs/zfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ typedef enum {
385385
VDEV_PROP_TRIM_SUPPORT,
386386
VDEV_PROP_TRIM_ERRORS,
387387
VDEV_PROP_SLOW_IOS,
388+
VDEV_PROP_SLOW_IO_EVENTS,
388389
VDEV_NUM_PROPS
389390
} vdev_prop_t;
390391

include/sys/vdev_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@ struct vdev {
464464
uint64_t vdev_checksum_t;
465465
uint64_t vdev_io_n;
466466
uint64_t vdev_io_t;
467+
boolean_t vdev_slow_io_events;
467468
uint64_t vdev_slow_io_n;
468469
uint64_t vdev_slow_io_t;
469470
};

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5930,7 +5930,8 @@
59305930
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
59315931
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
59325932
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
5933-
<enumerator name='VDEV_NUM_PROPS' value='52'/>
5933+
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='52'/>
5934+
<enumerator name='VDEV_NUM_PROPS' value='53'/>
59345935
</enum-decl>
59355936
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
59365937
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

man/man7/vdevprops.7

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ section, below.
4545
Every vdev has a set of properties that export statistics about the vdev
4646
as well as control various behaviors.
4747
Properties are not inherited from top-level vdevs, with the exception of
48-
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
48+
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
4949
.Pp
5050
The values of numeric properties can be specified using human-readable suffixes
5151
.Po for example,
@@ -126,7 +126,8 @@ Indicates if a leaf device supports trim operations.
126126
.Pp
127127
The following native properties can be used to change the behavior of a vdev.
128128
.Bl -tag -width "allocating"
129-
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t
129+
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_events, slow_io_n ,
130+
.It Sy slow_io_t
130131
Tune the fault management daemon by specifying checksum/io thresholds of <N>
131132
errors in <T> seconds, respectively.
132133
These properties can be set on leaf and top-level vdevs.

module/zcommon/zpool_prop.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,9 @@ vdev_prop_init(void)
475475
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
476476
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
477477
sfeatures);
478+
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
479+
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
480+
"SLOW_IO_EVENTS", boolean_table, sfeatures);
478481

479482
/* hidden properties */
480483
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

module/zfs/vdev.c

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -427,32 +427,53 @@ vdev_get_nparity(vdev_t *vd)
427427
}
428428

429429
static int
430-
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
430+
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
431431
{
432-
spa_t *spa = vd->vdev_spa;
433-
objset_t *mos = spa->spa_meta_objset;
434-
uint64_t objid;
435-
int err;
436432

437433
if (vd->vdev_root_zap != 0) {
438-
objid = vd->vdev_root_zap;
434+
*objid = vd->vdev_root_zap;
439435
} else if (vd->vdev_top_zap != 0) {
440-
objid = vd->vdev_top_zap;
436+
*objid = vd->vdev_top_zap;
441437
} else if (vd->vdev_leaf_zap != 0) {
442-
objid = vd->vdev_leaf_zap;
438+
*objid = vd->vdev_leaf_zap;
443439
} else {
444440
return (EINVAL);
445441
}
446442

443+
return (0);
444+
}
445+
446+
static int
447+
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
448+
{
449+
spa_t *spa = vd->vdev_spa;
450+
objset_t *mos = spa->spa_meta_objset;
451+
uint64_t objid;
452+
int err;
453+
454+
if (vdev_prop_get_objid(vd, &objid) != 0)
455+
return (EINVAL);
456+
447457
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
448458
sizeof (uint64_t), 1, value);
449-
450459
if (err == ENOENT)
451460
*value = vdev_prop_default_numeric(prop);
452461

453462
return (err);
454463
}
455464

465+
static int
466+
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
467+
{
468+
int err;
469+
uint64_t ivalue;
470+
471+
err = vdev_prop_get_int(vd, prop, &ivalue);
472+
*bvalue = ivalue != 1;
473+
474+
return (err);
475+
}
476+
456477
/*
457478
* Get the number of data disks for a top-level vdev.
458479
*/
@@ -714,8 +735,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
714735
*/
715736
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
716737
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
738+
717739
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
718740
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
741+
742+
vd->vdev_slow_io_events = vdev_prop_default_numeric(
743+
VDEV_PROP_SLOW_IO_EVENTS);
719744
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
720745
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
721746

@@ -3870,6 +3895,11 @@ vdev_load(vdev_t *vd)
38703895
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
38713896
"failed [error=%d]", (u_longlong_t)zapobj, error);
38723897

3898+
error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
3899+
&vd->vdev_slow_io_events);
3900+
if (error && error != ENOENT)
3901+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3902+
"failed [error=%d]", (u_longlong_t)zapobj, error);
38733903
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
38743904
&vd->vdev_slow_io_n);
38753905
if (error && error != ENOENT)
@@ -5917,15 +5947,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
59175947
/*
59185948
* Set vdev property values in the vdev props mos object.
59195949
*/
5920-
if (vd->vdev_root_zap != 0) {
5921-
objid = vd->vdev_root_zap;
5922-
} else if (vd->vdev_top_zap != 0) {
5923-
objid = vd->vdev_top_zap;
5924-
} else if (vd->vdev_leaf_zap != 0) {
5925-
objid = vd->vdev_leaf_zap;
5926-
} else {
5950+
if (vdev_prop_get_objid(vd, &objid) != 0)
59275951
panic("unexpected vdev type");
5928-
}
59295952

59305953
mutex_enter(&spa->spa_props_lock);
59315954

@@ -6102,6 +6125,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61026125
}
61036126
vd->vdev_io_t = intval;
61046127
break;
6128+
case VDEV_PROP_SLOW_IO_EVENTS:
6129+
if (nvpair_value_uint64(elem, &intval) != 0) {
6130+
error = EINVAL;
6131+
break;
6132+
}
6133+
vd->vdev_slow_io_events = intval != 0;
6134+
break;
61056135
case VDEV_PROP_SLOW_IO_N:
61066136
if (nvpair_value_uint64(elem, &intval) != 0) {
61076137
error = EINVAL;
@@ -6143,6 +6173,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61436173
nvpair_t *elem = NULL;
61446174
nvlist_t *nvprops = NULL;
61456175
uint64_t intval = 0;
6176+
boolean_t boolval = 0;
61466177
char *strval = NULL;
61476178
const char *propname = NULL;
61486179
vdev_prop_t prop;
@@ -6156,15 +6187,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61566187

61576188
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
61586189

6159-
if (vd->vdev_root_zap != 0) {
6160-
objid = vd->vdev_root_zap;
6161-
} else if (vd->vdev_top_zap != 0) {
6162-
objid = vd->vdev_top_zap;
6163-
} else if (vd->vdev_leaf_zap != 0) {
6164-
objid = vd->vdev_leaf_zap;
6165-
} else {
6190+
if (vdev_prop_get_objid(vd, &objid) != 0)
61666191
return (SET_ERROR(EINVAL));
6167-
}
61686192
ASSERT(objid != 0);
61696193

61706194
mutex_enter(&spa->spa_props_lock);
@@ -6473,6 +6497,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
64736497
vdev_prop_add_list(outnvl, propname, strval,
64746498
intval, src);
64756499
break;
6500+
case VDEV_PROP_SLOW_IO_EVENTS:
6501+
err = vdev_prop_get_bool(vd, prop, &boolval);
6502+
if (err && err != ENOENT)
6503+
break;
6504+
6505+
src = ZPROP_SRC_LOCAL;
6506+
if (boolval == vdev_prop_default_numeric(prop))
6507+
src = ZPROP_SRC_DEFAULT;
6508+
6509+
vdev_prop_add_list(outnvl, propname, NULL,
6510+
boolval, src);
6511+
break;
64766512
case VDEV_PROP_CHECKSUM_N:
64776513
case VDEV_PROP_CHECKSUM_T:
64786514
case VDEV_PROP_IO_N:

module/zfs/zfs_fm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
223223
case VDEV_PROP_IO_T:
224224
propval = vd->vdev_io_t;
225225
break;
226+
case VDEV_PROP_SLOW_IO_EVENTS:
227+
propval = vd->vdev_slow_io_events;
228+
break;
226229
case VDEV_PROP_SLOW_IO_N:
227230
propval = vd->vdev_slow_io_n;
228231
break;

module/zfs/zio.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5414,9 +5414,12 @@ zio_done(zio_t *zio)
54145414
zio->io_vd->vdev_stat.vs_slow_ios++;
54155415
mutex_exit(&zio->io_vd->vdev_stat_lock);
54165416

5417-
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
5418-
zio->io_spa, zio->io_vd, &zio->io_bookmark,
5419-
zio, 0);
5417+
if (zio->io_vd->vdev_slow_io_events) {
5418+
(void) zfs_ereport_post(
5419+
FM_EREPORT_ZFS_DELAY,
5420+
zio->io_spa, zio->io_vd,
5421+
&zio->io_bookmark, zio, 0);
5422+
}
54205423
}
54215424
}
54225425
}

tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ typeset -a properties=(
7171
checksum_t
7272
io_n
7373
io_t
74+
slow_io_events
7475
slow_io_n
7576
slow_io_t
7677
trim_support

tests/zfs-tests/tests/functional/events/zed_slow_io.ksh

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#
2525
# Copyright (c) 2023, Klara Inc.
26+
# Copyright (c) 2025, Mariusz Zaborski <[email protected]>
2627
#
2728

2829
# DESCRIPTION:
@@ -140,8 +141,8 @@ function slow_io_degrade
140141
{
141142
do_setup
142143

143-
zpool set slow_io_n=5 $TESTPOOL $VDEV
144-
zpool set slow_io_t=60 $TESTPOOL $VDEV
144+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
145+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
145146

146147
start_slow_io
147148
for i in {1..16}; do
@@ -193,6 +194,44 @@ function slow_io_no_degrade
193194
do_clean
194195
}
195196

197+
# Change slow_io_n, slow_io_t to 5 events in 60 seconds
198+
# fire more than 5 events. Disable slow io events.
199+
# Should not degrade.
200+
function slow_io_degrade_disabled
201+
{
202+
do_setup
203+
204+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
205+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
206+
log_must zpool set slow_io_events=off $TESTPOOL $VDEV
207+
208+
start_slow_io
209+
for i in {1..16}; do
210+
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
211+
sleep 0.5
212+
done
213+
stop_slow_io
214+
zpool sync
215+
216+
#
217+
# wait 60 seconds to confirm that zfs.delay was not generated.
218+
#
219+
typeset -i i=0
220+
typeset -i events=0
221+
while [[ $i -lt 60 ]]; do
222+
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
223+
i=$((i+1))
224+
sleep 1
225+
done
226+
log_note "$events delay events found"
227+
228+
[ $events -eq "0" ] || \
229+
log_fail "expecting no delay events, found $events"
230+
231+
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
232+
do_clean
233+
}
234+
196235
log_assert "Test ZED slow io configurability"
197236
log_onexit cleanup
198237

@@ -202,5 +241,6 @@ log_must zed_start
202241
default_degrade
203242
slow_io_degrade
204243
slow_io_no_degrade
244+
slow_io_degrade_disabled
205245

206246
log_pass "Test ZED slow io configurability"

0 commit comments

Comments
 (0)