diff --git a/compact.go b/compact.go index 8ed92294..8c0e4792 100644 --- a/compact.go +++ b/compact.go @@ -157,7 +157,10 @@ func (c *LeveledCompactor) Plan(dir string) ([]string, error) { for _, dir := range dirs { meta, err := readMetaFile(dir) if err != nil { - return nil, err + level.Debug(c.logger).Log("msg", "couldn't read a block meta file at planning", "err", err) + // We continue with the rest of the blocks. + // This one will be deleted when reloading the db. + continue } dms = append(dms, dirMeta{dir, meta}) } @@ -313,13 +316,15 @@ func (c *LeveledCompactor) Compact(dest string, dirs ...string) (uid ulid.ULID, for _, d := range dirs { b, err := OpenBlock(d, c.chunkPool) if err != nil { - return uid, err + level.Error(c.logger).Log("msg", "couldn't open a block", "dir", d, "err", err.Error()) + continue } defer b.Close() meta, err := readMetaFile(d) if err != nil { - return uid, err + level.Error(c.logger).Log("msg", "reading meta file", "dir", d, "err", err.Error()) + continue } metas = append(metas, meta) diff --git a/db.go b/db.go index 1ac2d425..85fab4b6 100644 --- a/db.go +++ b/db.go @@ -319,7 +319,7 @@ func (db *DB) retentionCutoff() (b bool, err error) { last := blocks[len(db.blocks)-1] mint := last.Meta().MaxTime - int64(db.opts.RetentionDuration) - dirs, err := retentionCutoffDirs(db.dir, mint) + dirs, err := retentionCutoffDirs(db.logger, db.dir, mint) if err != nil { return false, err } @@ -433,7 +433,7 @@ func (db *DB) compact() (changes bool, err error) { // retentionCutoffDirs returns all directories of blocks in dir that are strictly // before mint. -func retentionCutoffDirs(dir string, mint int64) ([]string, error) { +func retentionCutoffDirs(l log.Logger, dir string, mint int64) ([]string, error) { df, err := fileutil.OpenDir(dir) if err != nil { return nil, errors.Wrapf(err, "open directory") @@ -450,14 +450,16 @@ func retentionCutoffDirs(dir string, mint int64) ([]string, error) { for _, dir := range dirs { meta, err := readMetaFile(dir) if err != nil { - return nil, errors.Wrapf(err, "read block meta %s", dir) + level.Debug(l).Log("msg", "couldn't read a block meta file at retention", "err", err) + // We continue with the rest of the blocks. + // This one will be deleted when reloading the db. + continue } // The first block we encounter marks that we crossed the boundary // of deletable blocks. if meta.MaxTime >= mint { break } - delDirs = append(delDirs, dir) } @@ -504,7 +506,9 @@ func (db *DB) reload(deleteable ...string) (err error) { for _, dir := range dirs { meta, err := readMetaFile(dir) if err != nil { - return errors.Wrapf(err, "read meta information %s", dir) + deleteable = append(deleteable, dir) + level.Error(db.logger).Log("msg", "block set for deletion due to error in the meta file", "dir", dir, "err", err.Error()) + continue } // If the block is pending for deletion, don't add it to the new block set. if stringsContain(deleteable, dir) { @@ -541,8 +545,14 @@ func (db *DB) reload(deleteable ...string) (err error) { if err := b.Close(); err != nil { level.Warn(db.logger).Log("msg", "closing block failed", "err", err) } - if err := os.RemoveAll(b.Dir()); err != nil { - level.Warn(db.logger).Log("msg", "deleting block failed", "err", err) + deleteable = append(deleteable, b.Dir()) + } + + for _, d := range deleteable { + if _, err := os.Stat(d); err == nil { + if err := os.RemoveAll(d); err != nil { + level.Warn(db.logger).Log("msg", "deleting block failed", "err", err) + } } } diff --git a/db_test.go b/db_test.go index ef10edba..068f01f3 100644 --- a/db_test.go +++ b/db_test.go @@ -18,6 +18,7 @@ import ( "math" "math/rand" "os" + "path/filepath" "sort" "testing" @@ -808,6 +809,45 @@ func TestDB_Retention(t *testing.T) { testutil.Equals(t, int64(100), db.blocks[0].meta.MaxTime) // To verify its the right block. } +// TestDBMissingMeta assures that the db can be opened even when a folder is missing the meta file. +// Also ensures that the folder with the missing meta is deleted when reloading the db. +func TestDBMissingMeta(t *testing.T) { + db, close := openTestDB(t, nil) + defer close() + + lbls := labels.Labels{labels.Label{Name: "labelname", Value: "labelvalue"}} + + app := db.Appender() + _, err := app.Add(lbls, 0, 1) + testutil.Ok(t, err) + testutil.Ok(t, app.Commit()) + + // create snapshot to make it create a block. + snap, err := ioutil.TempDir("", "snap") + testutil.Ok(t, err) + testutil.Ok(t, db.Snapshot(snap)) + + testutil.Ok(t, db.Close()) + defer os.RemoveAll(snap) + + var deleteable string + testutil.Ok(t, filepath.Walk(snap, func(path string, f os.FileInfo, err error) error { + if f.Name() == metaFilename { + deleteable = filepath.Dir(path) + return os.Remove(path) + } + return nil + })) + + // reopen DB from snapshot. This should succeed even though the meta file is missing. + db, err = Open(snap, nil, nil, nil) + testutil.Ok(t, err) + + db.reload() + _, err = os.Stat(deleteable) + testutil.Assert(t, os.IsNotExist(err), "unexpected error when checking that the folder with the missing meta is deleted", err) +} + func TestNotMatcherSelectsLabelsUnsetSeries(t *testing.T) { tmpdir, _ := ioutil.TempDir("", "test") defer os.RemoveAll(tmpdir) diff --git a/repair.go b/repair.go index e9f2a964..40bec66d 100644 --- a/repair.go +++ b/repair.go @@ -30,6 +30,12 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { continue } d = path.Join(dir, d) + if _, err := os.Stat(filepath.Join(d, metaFilename)); os.IsNotExist(err) { + level.Debug(logger).Log("msg", "couldn't read a block meta file at index repair", "err", err) + // We continue with the rest of the blocks. + // This one will be deleted when reloading the db. + continue + } meta, err := readBogusMetaFile(d) if err != nil { diff --git a/repair_test.go b/repair_test.go index f4c9d208..c8097600 100644 --- a/repair_test.go +++ b/repair_test.go @@ -76,7 +76,7 @@ func TestRepairBadIndexVersion(t *testing.T) { } // On DB opening all blocks in the base dir should be repaired. - db, _ := Open("testdata/repair_index_version", nil, nil, nil) + db, err := Open("testdata/repair_index_version", nil, nil, nil) if err != nil { t.Fatal(err) }