Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 882162d

Browse files
fix the "failed compaction" metric. (#613)
Signed-off-by: Krasi Georgiev <[email protected]>
1 parent 13c80a5 commit 882162d

File tree

3 files changed

+14
-9
lines changed

3 files changed

+14
-9
lines changed

compact.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ type LeveledCompactor struct {
8484
type compactorMetrics struct {
8585
ran prometheus.Counter
8686
populatingBlocks prometheus.Gauge
87-
failed prometheus.Counter
8887
overlappingBlocks prometheus.Counter
8988
duration prometheus.Histogram
9089
chunkSize prometheus.Histogram
@@ -103,10 +102,6 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
103102
Name: "prometheus_tsdb_compaction_populating_block",
104103
Help: "Set to 1 when a block is currently being written to the disk.",
105104
})
106-
m.failed = prometheus.NewCounter(prometheus.CounterOpts{
107-
Name: "prometheus_tsdb_compactions_failed_total",
108-
Help: "Total number of compactions that failed for the partition.",
109-
})
110105
m.overlappingBlocks = prometheus.NewCounter(prometheus.CounterOpts{
111106
Name: "prometheus_tsdb_vertical_compactions_total",
112107
Help: "Total number of compactions done on overlapping blocks.",
@@ -136,7 +131,6 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
136131
r.MustRegister(
137132
m.ran,
138133
m.populatingBlocks,
139-
m.failed,
140134
m.overlappingBlocks,
141135
m.duration,
142136
m.chunkRange,
@@ -541,9 +535,6 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
541535
if err := os.RemoveAll(tmp); err != nil {
542536
level.Error(c.logger).Log("msg", "removed tmp folder after failed compaction", "err", err.Error())
543537
}
544-
if err != nil {
545-
c.metrics.failed.Inc()
546-
}
547538
c.metrics.ran.Inc()
548539
c.metrics.duration.Observe(time.Since(t).Seconds())
549540
}(time.Now())

compact_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,13 +1042,16 @@ func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) {
10421042

10431043
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.metrics.reloadsFailed), "initial 'failed db reload' count metrics mismatch")
10441044
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.ran), "initial `compactions` count metric mismatch")
1045+
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.metrics.compactionsFailed), "initial `compactions failed` count metric mismatch")
10451046

10461047
// Do the compaction and check the metrics.
10471048
// Compaction should succeed, but the reload should fail and
10481049
// the new block created from the compaction should be deleted.
10491050
testutil.NotOk(t, db.compact())
10501051
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.metrics.reloadsFailed), "'failed db reload' count metrics mismatch")
10511052
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.ran), "`compaction` count metric mismatch")
1053+
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.metrics.compactionsFailed), "`compactions failed` count metric mismatch")
1054+
10521055
actBlocks, err = blockDirs(db.Dir())
10531056
testutil.Ok(t, err)
10541057
testutil.Equals(t, expBlocks, len(actBlocks)-1, "block count should be the same as before the compaction") // -1 to exclude the corrupted block.

db.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ type dbMetrics struct {
147147
reloads prometheus.Counter
148148
reloadsFailed prometheus.Counter
149149
compactionsTriggered prometheus.Counter
150+
compactionsFailed prometheus.Counter
150151
timeRetentionCount prometheus.Counter
151152
compactionsSkipped prometheus.Counter
152153
startTime prometheus.GaugeFunc
@@ -191,6 +192,10 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
191192
Name: "prometheus_tsdb_compactions_triggered_total",
192193
Help: "Total number of triggered compactions for the partition.",
193194
})
195+
m.compactionsFailed = prometheus.NewCounter(prometheus.CounterOpts{
196+
Name: "prometheus_tsdb_compactions_failed_total",
197+
Help: "Total number of compactions that failed for the partition.",
198+
})
194199
m.timeRetentionCount = prometheus.NewCounter(prometheus.CounterOpts{
195200
Name: "prometheus_tsdb_time_retentions_total",
196201
Help: "The number of times that blocks were deleted because the maximum time limit was exceeded.",
@@ -231,6 +236,7 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
231236
m.reloadsFailed,
232237
m.timeRetentionCount,
233238
m.compactionsTriggered,
239+
m.compactionsFailed,
234240
m.startTime,
235241
m.tombCleanTimer,
236242
m.blocksBytes,
@@ -411,6 +417,11 @@ func (a dbAppender) Commit() error {
411417
func (db *DB) compact() (err error) {
412418
db.cmtx.Lock()
413419
defer db.cmtx.Unlock()
420+
defer func() {
421+
if err != nil {
422+
db.metrics.compactionsFailed.Inc()
423+
}
424+
}()
414425
// Check whether we have pending head blocks that are ready to be persisted.
415426
// They have the highest priority.
416427
for {

0 commit comments

Comments
 (0)