Skip to content

Commit 751e1d0

Browse files
committed
manifest: rework blob file reference counting
Rework the reference counting of in-use blob files to decouple the reference counts from the TableMetadatas' blob references. With the introduction of blob file rewrites and replacement, the physical blob file and the logical blob file's references have distinct lifetimes. A physical blob file that has been replaced will need to be removed once all Versions that predate the replacement have been unreferenced. This commit renames the struct previously known as BlobFileMetadata to PhysicalBlobFile, describing metadata particular to a physical file backing a logical blob file. A new BlobFileMetadata struct is introduced that holds a BlobFileID and a pointer to the PhysicalBlobFile. This commit additionally adapts the physical blob file referencing to occur through a separate B-Tree of BlobFileMetadata structs. Similar to TableMetadata, references to blob files are maintained by copy-on-write B-Tree nodes which themselves are reference counted. A PhysicalBlobFile's reference count is incremented when a new B-Tree node references a containing BlobFileMetadata, and it's decremented when the B-Tree node's reference count falls to zero. This indirection ensures that a mutation to the set of blob files only performs log(n) work. Since addition and removal of blob files within a version is now modeled directly (as opposed to indirectly via TableMetadata BlobReferences), a blob file may be removed or replaced within the set before all referencing TableMetadata are removed. Informs #4802.
1 parent 507d3bd commit 751e1d0

18 files changed

+407
-231
lines changed

checkpoint.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,12 @@ func (d *DB) Checkpoint(
354354
// When we write the MANIFEST of the checkpoint, we'll include a final
355355
// VersionEdit that removes these blob files so that the checkpointed
356356
// manifest is consistent.
357-
var excludedBlobFiles map[base.DiskFileNum]*manifest.BlobFileMetadata
357+
var excludedBlobFiles map[base.BlobFileID]*manifest.PhysicalBlobFile
358358
if len(includedBlobFiles) < len(versionBlobFiles) {
359-
excludedBlobFiles = make(map[base.DiskFileNum]*manifest.BlobFileMetadata, len(versionBlobFiles)-len(includedBlobFiles))
360-
for _, blobFile := range versionBlobFiles {
361-
if _, ok := includedBlobFiles[blobFile.FileID]; !ok {
362-
diskFileNum := blob.DiskFileNumTODO(blobFile.FileID)
363-
excludedBlobFiles[diskFileNum] = blobFile
359+
excludedBlobFiles = make(map[base.BlobFileID]*manifest.PhysicalBlobFile, len(versionBlobFiles)-len(includedBlobFiles))
360+
for _, meta := range versionBlobFiles {
361+
if _, ok := includedBlobFiles[meta.FileID]; !ok {
362+
excludedBlobFiles[meta.FileID] = meta.Physical
364363
}
365364
}
366365
}
@@ -471,7 +470,7 @@ func (d *DB) writeCheckpointManifest(
471470
manifestSize int64,
472471
excludedTables map[manifest.DeletedTableEntry]*manifest.TableMetadata,
473472
removeBackingTables []base.DiskFileNum,
474-
excludedBlobFiles map[base.DiskFileNum]*manifest.BlobFileMetadata,
473+
excludedBlobFiles map[base.BlobFileID]*manifest.PhysicalBlobFile,
475474
) error {
476475
// Copy the MANIFEST, and create a pointer to it. We copy rather
477476
// than link because additional version edits added to the

compaction.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,7 +2478,7 @@ func (d *DB) handleCompactFailure(c *compaction, err error) {
24782478
func (d *DB) cleanupVersionEdit(ve *manifest.VersionEdit) {
24792479
obsoleteFiles := manifest.ObsoleteFiles{
24802480
TableBackings: make([]*manifest.TableBacking, 0, len(ve.NewTables)),
2481-
BlobFiles: make([]*manifest.BlobFileMetadata, 0, len(ve.NewBlobFiles)),
2481+
BlobFiles: make([]*manifest.PhysicalBlobFile, 0, len(ve.NewBlobFiles)),
24822482
}
24832483
deletedTables := make(map[base.TableNum]struct{})
24842484
for key := range ve.DeletedTables {
@@ -2488,10 +2488,10 @@ func (d *DB) cleanupVersionEdit(ve *manifest.VersionEdit) {
24882488
obsoleteFiles.AddBlob(ve.NewBlobFiles[i])
24892489
d.mu.versions.zombieBlobs.Add(objectInfo{
24902490
fileInfo: fileInfo{
2491-
FileNum: base.DiskFileNum(ve.NewBlobFiles[i].FileID),
2491+
FileNum: base.DiskFileNum(ve.NewBlobFiles[i].FileNum),
24922492
FileSize: ve.NewBlobFiles[i].Size,
24932493
},
2494-
isLocal: objstorage.IsLocalBlobFile(d.objProvider, base.DiskFileNum(ve.NewBlobFiles[i].FileID)),
2494+
isLocal: objstorage.IsLocalBlobFile(d.objProvider, base.DiskFileNum(ve.NewBlobFiles[i].FileNum)),
24952495
})
24962496
}
24972497
for i := range ve.NewTables {
@@ -3105,7 +3105,7 @@ func (d *DB) runCompaction(
31053105
// Delete any created tables or blob files.
31063106
obsoleteFiles := manifest.ObsoleteFiles{
31073107
TableBackings: make([]*manifest.TableBacking, 0, len(result.Tables)),
3108-
BlobFiles: make([]*manifest.BlobFileMetadata, 0, len(result.Blobs)),
3108+
BlobFiles: make([]*manifest.PhysicalBlobFile, 0, len(result.Blobs)),
31093109
}
31103110
d.mu.Lock()
31113111
for i := range result.Tables {
@@ -3291,7 +3291,7 @@ func (c *compaction) makeVersionEdit(result compact.Result) (*manifest.VersionEd
32913291
}
32923292
}
32933293
// Add any newly constructed blob files to the version edit.
3294-
ve.NewBlobFiles = make([]*manifest.BlobFileMetadata, len(result.Blobs))
3294+
ve.NewBlobFiles = make([]*manifest.PhysicalBlobFile, len(result.Blobs))
32953295
for i := range result.Blobs {
32963296
ve.NewBlobFiles[i] = result.Blobs[i].Metadata
32973297
}

data_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ func runDBDefineCmdReuseFS(td *datadriven.TestData, opts *Options) (*DB, error)
927927
// them to the final version edit.
928928
valueSeparator := &defineDBValueSeparator{
929929
pbr: &preserveBlobReferences{},
930-
metas: make(map[base.BlobFileID]*manifest.BlobFileMetadata),
930+
metas: make(map[base.BlobFileID]*manifest.PhysicalBlobFile),
931931
}
932932

933933
var mem *memTable
@@ -1640,7 +1640,8 @@ func describeLSM(d *DB, verbose bool) string {
16401640
if blobFileMetas := d.mu.versions.blobFiles.Metadatas(); len(blobFileMetas) > 0 {
16411641
buf.WriteString("Blob files:\n")
16421642
for _, meta := range blobFileMetas {
1643-
fmt.Fprintf(&buf, " %s: %d physical bytes, %d value bytes\n", meta.FileID, meta.Size, meta.ValueSize)
1643+
fmt.Fprintf(&buf, " %s: [%s] %d physical bytes, %d value bytes\n",
1644+
meta.FileID, meta.Physical.FileNum, meta.Physical.Size, meta.Physical.ValueSize)
16441645
}
16451646
}
16461647
return buf.String()

event.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/cockroachdb/pebble/internal/manifest"
2020
"github.com/cockroachdb/pebble/objstorage"
2121
"github.com/cockroachdb/pebble/objstorage/remote"
22-
"github.com/cockroachdb/pebble/sstable/blob"
2322
"github.com/cockroachdb/pebble/vfs"
2423
"github.com/cockroachdb/redact"
2524
)
@@ -1234,10 +1233,9 @@ func (d *DB) reportCorruption(meta any, err error) error {
12341233
switch meta := meta.(type) {
12351234
case *manifest.TableMetadata:
12361235
return d.reportFileCorruption(base.FileTypeTable, meta.TableBacking.DiskFileNum, meta.UserKeyBounds(), err)
1237-
case *manifest.BlobFileMetadata:
1238-
diskFileNum := blob.DiskFileNumTODO(meta.FileID)
1236+
case *manifest.PhysicalBlobFile:
12391237
// TODO(jackson): Add bounds for blob files.
1240-
return d.reportFileCorruption(base.FileTypeBlob, diskFileNum, base.UserKeyBounds{}, err)
1238+
return d.reportFileCorruption(base.FileTypeBlob, meta.FileNum, base.UserKeyBounds{}, err)
12411239
default:
12421240
panic(fmt.Sprintf("unknown metadata type: %T", meta))
12431241
}

internal/compact/run.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ type OutputBlob struct {
6060
// ObjMeta is metadata for the object backing the blob file.
6161
ObjMeta objstorage.ObjectMetadata
6262
// Metadata is metadata for the blob file.
63-
Metadata *manifest.BlobFileMetadata
63+
Metadata *manifest.PhysicalBlobFile
6464
}
6565

6666
// Stats describes stats collected during the compaction.
@@ -140,7 +140,7 @@ type ValueSeparationMetadata struct {
140140
// The below fields are only populated if a new blob file was created.
141141
BlobFileStats blob.FileWriterStats
142142
BlobFileObject objstorage.ObjectMetadata
143-
BlobFileMetadata *manifest.BlobFileMetadata
143+
BlobFileMetadata *manifest.PhysicalBlobFile
144144
}
145145

146146
// Runner is a helper for running the "data" part of a compaction (where we use

0 commit comments

Comments
 (0)