Skip to content

zdb: better handling for corrupt block pointers #17166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 49 additions & 12 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ static zfs_range_tree_t *mos_refd_objs;
static spa_t *spa;
static objset_t *os;
static boolean_t kernel_init_done;
static boolean_t corruption_found = B_FALSE;

static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
boolean_t);
Expand Down Expand Up @@ -250,6 +251,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
&e->svbr_blk, B_TRUE);
(void) printf("\tERROR: %d unmatched FREE(s): %s\n",
e->svbr_refcnt, blkbuf);
corruption_found = B_TRUE;
}
zfs_btree_destroy(&sv->sv_pair);

Expand Down Expand Up @@ -405,6 +407,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
(u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
(u_longlong_t)found->svb_allocated_txg,
(u_longlong_t)txg);
corruption_found = B_TRUE;
}
}
}
Expand All @@ -426,6 +429,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
(u_longlong_t)txg, (u_longlong_t)offset,
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
(u_longlong_t)mv->mv_msid);
corruption_found = B_TRUE;
} else {
zfs_range_tree_add(mv->mv_allocated,
offset, size);
Expand All @@ -439,6 +443,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
(u_longlong_t)txg, (u_longlong_t)offset,
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
(u_longlong_t)mv->mv_msid);
corruption_found = B_TRUE;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just 13 lines up is also a case of double alloc.

} else {
zfs_range_tree_remove(mv->mv_allocated,
offset, size);
Expand Down Expand Up @@ -526,6 +531,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
corruption_found = B_TRUE;
continue;
}

Expand All @@ -542,6 +548,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
corruption_found = B_TRUE;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar error 17 lines up.

continue;
}

Expand Down Expand Up @@ -654,6 +661,7 @@ livelist_metaslab_validate(spa_t *spa)
}
(void) printf("ERROR: Found livelist blocks marked as allocated "
"for indirect vdevs:\n");
corruption_found = B_TRUE;

zfs_btree_index_t *where = NULL;
sublivelist_verify_block_t *svb;
Expand Down Expand Up @@ -826,7 +834,7 @@ usage(void)
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
zdb_exit(1);
zdb_exit(2);
}

static void
Expand Down Expand Up @@ -2582,19 +2590,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
}
}

static void
static u_longlong_t
print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
const dnode_phys_t *dnp)
{
char blkbuf[BP_SPRINTF_LEN];
u_longlong_t offset;
int l;

if (!BP_IS_EMBEDDED(bp)) {
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
}
offset = (u_longlong_t)blkid2offset(dnp, bp, zb);

(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
(void) printf("%16llx ", offset);

ASSERT(zb->zb_level >= 0);

Expand All @@ -2609,19 +2615,38 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
(void) printf("%s\n", blkbuf);
(void) printf("%s", blkbuf);

if (!BP_IS_EMBEDDED(bp)) {
if (BP_GET_TYPE(bp) != dnp->dn_type) {
(void) printf(" (ERROR: Block pointer type "
"(%llu) does not match dnode type (%hhu))",
BP_GET_TYPE(bp), dnp->dn_type);
corruption_found = B_TRUE;
}
if (BP_GET_LEVEL(bp) != zb->zb_level) {
(void) printf(" (ERROR: Block pointer level "
"(%llu) does not match bookmark level (%ld))",
BP_GET_LEVEL(bp), zb->zb_level);
corruption_found = B_TRUE;
}
}
(void) printf("\n");

return (offset);
}

static int
visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
blkptr_t *bp, const zbookmark_phys_t *zb)
{
u_longlong_t offset;
int err = 0;

if (BP_GET_LOGICAL_BIRTH(bp) == 0)
return (0);

print_indirect(spa, bp, zb, dnp);
offset = print_indirect(spa, bp, zb, dnp);

if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
arc_flags_t flags = ARC_FLAG_WAIT;
Expand Down Expand Up @@ -2651,8 +2676,15 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
break;
fill += BP_GET_FILL(cbp);
}
if (!err)
ASSERT3U(fill, ==, BP_GET_FILL(bp));
if (!err) {
if (fill != BP_GET_FILL(bp)) {
(void) printf("%16llx: Block pointer "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

print to stderr instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's what my first commit did. But I switched to stdout in response to @amotin's comments.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think printing to stderr makes sense only if we exit immediately, so it is the last message before we return error status. But if we continue, then using different stream makes it difficult to understand what's actually wrong. Om the other side, if we print it to stdout, then it has to be formatted in some way nice to possible parsers or readers, unless we expect them to ignore all the output after getting an errors.

"fill (%llu) does not match calculated "
"value (%lu)\n", offset, BP_GET_FILL(bp),
fill);
corruption_found = B_TRUE;
}
}
arc_buf_destroy(buf, &buf);
}

Expand Down Expand Up @@ -2908,6 +2940,7 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
(void) printf("ERROR %u while trying to open "
"subobj id %llu\n",
error, (u_longlong_t)subobj);
corruption_found = B_TRUE;
continue;
}
dump_full_bpobj(&subbpo, "subobj", indent + 1);
Expand Down Expand Up @@ -3087,6 +3120,7 @@ bpobj_count_refd(bpobj_t *bpo)
(void) printf("ERROR %u while trying to open "
"subobj id %llu\n",
error, (u_longlong_t)subobj);
corruption_found = B_TRUE;
continue;
}
bpobj_count_refd(&subbpo);
Expand Down Expand Up @@ -9605,7 +9639,7 @@ main(int argc, char **argv)
} else if (objset_str && !zdb_numeric(objset_str + 1) &&
dump_opt['N']) {
printf("Supply a numeric objset ID with -N\n");
error = 1;
error = 2;
goto fini;
}
} else {
Expand Down Expand Up @@ -9907,5 +9941,8 @@ main(int argc, char **argv)
if (kernel_init_done)
kernel_fini();

if (corruption_found && error == 0)
error = 3;

return (error);
}
14 changes: 13 additions & 1 deletion man/man8/zdb.8
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
.\" Copyright (c) 2017 Intel Corporation.
.\"
.Dd October 27, 2024
.Dd April 23, 2025
.Dt ZDB 8
.Os
.
Expand Down Expand Up @@ -526,6 +526,18 @@ option, with more occurrences enabling more verbosity.
If no options are specified, all information about the named pool will be
displayed at default verbosity.
.
.Sh EXIT STATUS
The
.Nm
utility exits
.Sy 0
on success,
.Sy 1
if a fatal error occurs,
.Sy 2
if invalid command line options were specified, or
.Sy 3
if on-disk corruption was detected, but was not fatal.
.Sh EXAMPLES
.Ss Example 1 : No Display the configuration of imported pool Ar rpool
.Bd -literal
Expand Down
Loading