Skip to content

ztest: Fix false positive of ENOSPC handling #17506

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

ihoro
Copy link

@ihoro ihoro commented Jul 2, 2025

[Sponsors: Klara, Inc., Wasabi Technology, Inc.]

Motivation and Context

It fixes a false positive when we want to free some space before the next pass but it may not take immediate effect due to TXG_DEFER window.

Description

Calling txg_wait_synced() right after dataset deletion allows to make sure the freed space is allocatable before the next allocation event.

In addition, this change switches ztest_dataset_open() call to an assertion in ztest_generic_run(). Otherwise, the failure path does not report about opening issue and leads to undefined behavior which is harder to diagnose. In my case I faced failure with ztest_spa_import_export() where it was not obvious to reason why spa_export() returns EBUSY, and it turned out that some test threads were still running and the actual root cause was the dataset open failure.

How Has This Been Tested?

To improve the chances of the reproduction it was tested with 100% ztest_dmu_read_write, 0% ztest_kill and 100% ztest_spa_import_export checks after each pass. Literally it means this temporary patch:

diff --git a/cmd/ztest.c b/cmd/ztest.c
index 86f05e8b9..8416a2762 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -452,10 +452,10 @@ ztest_func_t ztest_pool_prefetch_ddt;
 ztest_func_t ztest_ddt_prune;
 
 static uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
-static uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
-static uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
-static uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
-static uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
+//static uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
+//static uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
+//static uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
+//static uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
 
 #define	ZTI_INIT(func, iters, interval) \
 	{   .zi_func = (func), \
@@ -465,48 +465,48 @@ static uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
 
 static ztest_info_t ztest_info[] = {
 	ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always),
-       ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always),
-       ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always),
-       ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always),
-       ZTI_INIT(ztest_zap, 30, &zopt_always),
-       ZTI_INIT(ztest_zap_parallel, 100, &zopt_always),
-       ZTI_INIT(ztest_split_pool, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant),
-       ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often),
-       ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often),
-       ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often),
-       ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes),
-#if 0
-       ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes),
-#endif
-       ZTI_INIT(ztest_fzap, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
-       ZTI_INIT(ztest_scrub, 1, &zopt_rarely),
-       ZTI_INIT(ztest_spa_upgrade, 1, &zopt_rarely),
-       ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely),
-       ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_vdev_raidz_attach, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely),
-       ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime),
-       ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime),
-       ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime),
-       ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
-       ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
-       ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
-       ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
-       ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
-       ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
-       ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always),
+//     ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always),
+//     ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always),
+//     ZTI_INIT(ztest_zap, 30, &zopt_always),
+//     ZTI_INIT(ztest_zap_parallel, 100, &zopt_always),
+//     ZTI_INIT(ztest_split_pool, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant),
+//     ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often),
+//     ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often),
+//     ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often),
+//     ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes),
+//#if 0
+//     ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes),
+//#endif
+//     ZTI_INIT(ztest_fzap, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_scrub, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_spa_upgrade, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_vdev_raidz_attach, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime),
+//     ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime),
+//     ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime),
+//     ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
+//     ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
+//     ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
 };
 
 #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
@@ -1221,6 +1221,7 @@ invalid:
 static void
 ztest_kill(ztest_shared_t *zs)
 {
+	return;
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 
@@ -8551,7 +8552,7 @@ ztest_run(ztest_shared_t *zs)
 	 * Verify that we can export the pool and reimport it under a
 	 * different name.
 	 */
-	if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) {
+	if (/*(ztest_random(2) == 0) &&*/ !ztest_opts.zo_mmp_test) {
 		char name[ZFS_MAX_DATASET_NAME_LEN];
 		(void) snprintf(name, sizeof (name), "%s_import",
 		    ztest_opts.zo_pool);

Types of changes

  • Bug fix (non-breaking change which fixes an issue)
  • New feature (non-breaking change which adds functionality)
  • Performance enhancement (non-breaking change which improves efficiency)
  • Code cleanup (non-breaking change which makes code smaller or more readable)
  • Quality assurance (non-breaking change which makes the code more robust against bugs)
  • Breaking change (fix or feature that would cause existing functionality to change)
  • Library ABI change (libzfs, libzfs_core, libnvpair, libuutil and libzfsbootenv)
  • Documentation (a change to man pages or other documentation)

Checklist:

Before running a pass zs_enospc_count is checked to free up some space
by destroying a random dataset. But the space freed may still be not
re-usable during the TXG_DEFER window breaking the next dataset creation
in ztest_generic_run().

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Signed-off-by: Igor Ostapenko <[email protected]>
@behlendorf behlendorf added the Status: Code Review Needed Ready for review and testing label Jul 2, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Status: Code Review Needed Ready for review and testing
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants