-
Notifications
You must be signed in to change notification settings - Fork 1.8k
zdb: better handling for corrupt block pointers #17166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
c004cee
22ceb77
53c5e12
453306a
0d2eb64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,6 +127,7 @@ static zfs_range_tree_t *mos_refd_objs; | |
static spa_t *spa; | ||
static objset_t *os; | ||
static boolean_t kernel_init_done; | ||
static boolean_t corruption_found = B_FALSE; | ||
|
||
static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, | ||
boolean_t); | ||
|
@@ -250,6 +251,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) | |
&e->svbr_blk, B_TRUE); | ||
(void) printf("\tERROR: %d unmatched FREE(s): %s\n", | ||
e->svbr_refcnt, blkbuf); | ||
corruption_found = B_TRUE; | ||
} | ||
zfs_btree_destroy(&sv->sv_pair); | ||
|
||
|
@@ -405,6 +407,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, | |
(u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), | ||
(u_longlong_t)found->svb_allocated_txg, | ||
(u_longlong_t)txg); | ||
corruption_found = B_TRUE; | ||
} | ||
} | ||
} | ||
|
@@ -426,6 +429,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) | |
(u_longlong_t)txg, (u_longlong_t)offset, | ||
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid, | ||
(u_longlong_t)mv->mv_msid); | ||
corruption_found = B_TRUE; | ||
} else { | ||
zfs_range_tree_add(mv->mv_allocated, | ||
offset, size); | ||
|
@@ -439,6 +443,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) | |
(u_longlong_t)txg, (u_longlong_t)offset, | ||
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid, | ||
(u_longlong_t)mv->mv_msid); | ||
corruption_found = B_TRUE; | ||
} else { | ||
zfs_range_tree_remove(mv->mv_allocated, | ||
offset, size); | ||
|
@@ -526,6 +531,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) | |
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), | ||
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), | ||
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); | ||
corruption_found = B_TRUE; | ||
continue; | ||
} | ||
|
||
|
@@ -542,6 +548,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) | |
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), | ||
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), | ||
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); | ||
corruption_found = B_TRUE; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar error 17 lines up. |
||
continue; | ||
} | ||
|
||
|
@@ -654,6 +661,7 @@ livelist_metaslab_validate(spa_t *spa) | |
} | ||
(void) printf("ERROR: Found livelist blocks marked as allocated " | ||
"for indirect vdevs:\n"); | ||
corruption_found = B_TRUE; | ||
|
||
zfs_btree_index_t *where = NULL; | ||
sublivelist_verify_block_t *svb; | ||
|
@@ -826,7 +834,7 @@ usage(void) | |
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " | ||
"to make only that option verbose\n"); | ||
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); | ||
zdb_exit(1); | ||
zdb_exit(2); | ||
} | ||
|
||
static void | ||
|
@@ -2582,19 +2590,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, | |
} | ||
} | ||
|
||
static void | ||
static u_longlong_t | ||
print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, | ||
const dnode_phys_t *dnp) | ||
{ | ||
char blkbuf[BP_SPRINTF_LEN]; | ||
u_longlong_t offset; | ||
int l; | ||
|
||
if (!BP_IS_EMBEDDED(bp)) { | ||
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); | ||
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); | ||
} | ||
offset = (u_longlong_t)blkid2offset(dnp, bp, zb); | ||
|
||
(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); | ||
(void) printf("%16llx ", offset); | ||
|
||
ASSERT(zb->zb_level >= 0); | ||
|
||
|
@@ -2609,19 +2615,38 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, | |
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); | ||
if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) | ||
snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); | ||
(void) printf("%s\n", blkbuf); | ||
(void) printf("%s", blkbuf); | ||
|
||
if (!BP_IS_EMBEDDED(bp)) { | ||
if (BP_GET_TYPE(bp) != dnp->dn_type) { | ||
(void) printf(" (ERROR: Block pointer type " | ||
"(%llu) does not match dnode type (%hhu))", | ||
BP_GET_TYPE(bp), dnp->dn_type); | ||
corruption_found = B_TRUE; | ||
} | ||
if (BP_GET_LEVEL(bp) != zb->zb_level) { | ||
(void) printf(" (ERROR: Block pointer level " | ||
"(%llu) does not match bookmark level (%ld))", | ||
BP_GET_LEVEL(bp), zb->zb_level); | ||
corruption_found = B_TRUE; | ||
} | ||
} | ||
(void) printf("\n"); | ||
|
||
return (offset); | ||
} | ||
|
||
static int | ||
visit_indirect(spa_t *spa, const dnode_phys_t *dnp, | ||
blkptr_t *bp, const zbookmark_phys_t *zb) | ||
{ | ||
u_longlong_t offset; | ||
int err = 0; | ||
|
||
if (BP_GET_LOGICAL_BIRTH(bp) == 0) | ||
return (0); | ||
|
||
print_indirect(spa, bp, zb, dnp); | ||
offset = print_indirect(spa, bp, zb, dnp); | ||
|
||
if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { | ||
arc_flags_t flags = ARC_FLAG_WAIT; | ||
|
@@ -2651,8 +2676,15 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, | |
break; | ||
fill += BP_GET_FILL(cbp); | ||
} | ||
if (!err) | ||
ASSERT3U(fill, ==, BP_GET_FILL(bp)); | ||
if (!err) { | ||
if (fill != BP_GET_FILL(bp)) { | ||
(void) printf("%16llx: Block pointer " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. print to stderr instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's what my first commit did. But I switched to stdout in response to @amotin's comments. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think printing to stderr makes sense only if we exit immediately, so it is the last message before we return error status. But if we continue, then using different stream makes it difficult to understand what's actually wrong. Om the other side, if we print it to stdout, then it has to be formatted in some way nice to possible parsers or readers, unless we expect them to ignore all the output after getting an errors. |
||
"fill (%llu) does not match calculated " | ||
"value (%lu)\n", offset, BP_GET_FILL(bp), | ||
fill); | ||
corruption_found = B_TRUE; | ||
} | ||
} | ||
arc_buf_destroy(buf, &buf); | ||
} | ||
|
||
|
@@ -2908,6 +2940,7 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) | |
(void) printf("ERROR %u while trying to open " | ||
"subobj id %llu\n", | ||
error, (u_longlong_t)subobj); | ||
corruption_found = B_TRUE; | ||
continue; | ||
} | ||
dump_full_bpobj(&subbpo, "subobj", indent + 1); | ||
|
@@ -3087,6 +3120,7 @@ bpobj_count_refd(bpobj_t *bpo) | |
(void) printf("ERROR %u while trying to open " | ||
"subobj id %llu\n", | ||
error, (u_longlong_t)subobj); | ||
corruption_found = B_TRUE; | ||
continue; | ||
} | ||
bpobj_count_refd(&subbpo); | ||
|
@@ -9605,7 +9639,7 @@ main(int argc, char **argv) | |
} else if (objset_str && !zdb_numeric(objset_str + 1) && | ||
dump_opt['N']) { | ||
printf("Supply a numeric objset ID with -N\n"); | ||
error = 1; | ||
error = 2; | ||
goto fini; | ||
} | ||
} else { | ||
|
@@ -9907,5 +9941,8 @@ main(int argc, char **argv) | |
if (kernel_init_done) | ||
kernel_fini(); | ||
|
||
if (corruption_found && error == 0) | ||
error = 3; | ||
|
||
return (error); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just 13 lines up is also a case of double alloc.