Skip to content

Commit 6fd81ac

Browse files
fix: filter out snapshot-clone-src-*|replica-vol-* (#10)
* fix: filter out `snapshot-clone-src-*|replica-vol-*` * chore: ignore .txt files * unfeat: remove 'node_(read|write)_ops_total' * fix: remove volume info from `solidfire_node_iscsi_sessions` * remove: volume_last_sample_read_bytes|volume_read_latency_seconds|volume_last_sample_read_ops|volume_last_sample_write_bytes|volume_write_latency_seconds|volume_write_ops_last_sample
1 parent e860dfe commit 6fd81ac

File tree

5 files changed

+21
-173
lines changed

5 files changed

+21
-173
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ tmp
66
./solidfire-exporter
77
archive.tar.gz
88
bin/*
9+
*.txt

README.md

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,20 +121,15 @@ docker run --rm -p 9987:9987 ghcr.io/mjavier2k/solidfire-exporter:v0.6.6
121121
| solidfire_node_interface_utilization_percentage | gauge | Network interface utilization (in percent) of network interface. |
122122
| solidfire_node_load | histogram | System load histogram |
123123
| solidfire_node_read_latency_seconds_total | counter | The total time spent performing read operations since the creation of the cluster. |
124-
| solidfire_node_read_ops_total | counter | Total read operations to a node. |
125124
| solidfire_node_samples | gauge | Node stat sample count |
126125
| solidfire_node_total_memory_bytes | gauge | Total node memory in bytes. |
127126
| solidfire_node_used_memory_bytes | gauge | Total node memory used in bytes. |
128127
| solidfire_node_write_latency_seconds_total | counter | The total time spent performing write operations since the creation of the cluster. |
129-
| solidfire_node_write_ops_total | counter | Total write operations to a node. |
130128
| solidfire_up | gauge | Whether last scrape against Solidfire API was successful |
131129
| solidfire_volume_actual_iops | gauge | The current actual IOPS to the volume in the last 500 milliseconds |
132130
| solidfire_volume_average_iop_size_bytes | gauge | The average size in bytes of recent I/O to the volume in the last 500 milliseconds |
133131
| solidfire_volume_burst_iops_credit | gauge | The total number of IOP credits available to the user. When volumes are not using up to the configured maxIOPS, credits are accrued. |
134132
| solidfire_volume_client_queue_depth | gauge | The number of outstanding read and write operations to the volume. |
135-
| solidfire_volume_last_sample_read_bytes | gauge | The total number of bytes read from the volume during the last sample period. |
136-
| solidfire_volume_last_sample_read_ops | gauge | The total number of read operations during the last sample period |
137-
| solidfire_volume_last_sample_write_bytes | gauge | The total number of bytes written to the volume during the last sample period. |
138133
| solidfire_volume_latency_seconds | gauge | The average time, in seconds, to complete operations to the volume in the last 500 milliseconds. A '0' (zero) value means there is no I/O to the volume. |
139134
| solidfire_volume_non_zero_blocks | gauge | The total number of 4KiB blocks that contain data after the last garbage collection operation has completed. |
140135
| solidfire_volume_qos_below_min_iops_percentage | histogram | Volume QoS Below minimum IOPS percentage |
@@ -144,7 +139,6 @@ docker run --rm -p 9987:9987 ghcr.io/mjavier2k/solidfire-exporter:v0.6.6
144139
| solidfire_volume_qos_throttle_percentage | histogram | Volume QoS throttle percentage |
145140
| solidfire_volume_qos_write_block_sizes_bytes_bucket | histogram | Volume QoS write block sizes |
146141
| solidfire_volume_read_bytes_total | counter | The total cumulative bytes read from the volume since the creation of the volume. |
147-
| solidfire_volume_read_latency_seconds | gauge | The average time, in seconds, to complete read operations to the volume in the last 500 milliseconds. |
148142
| solidfire_volume_read_latency_seconds_total | counter | The total time spent performing read operations from the volume |
149143
| solidfire_volume_read_ops_total | counter | The total read operations to the volume since the creation of the volume. |
150144
| solidfire_volume_size_bytes | gauge | Total provisioned capacity in bytes. |
@@ -153,9 +147,7 @@ docker run --rm -p 9987:9987 ghcr.io/mjavier2k/solidfire-exporter:v0.6.6
153147
| solidfire_volume_unaligned_writes_total | counter | The total cumulative unaligned write operations to a volume since the creation of the volume. |
154148
| solidfire_volume_utilization | gauge | A floating value that describes how much the client is using the volume. Value 0: The client is not using the volume. Value 1: The client is using their maximum. Value 1+: The client is using their burst. |
155149
| solidfire_volume_write_bytes_total | counter | The total cumulative bytes written to the volume since the creation of the volume. |
156-
| solidfire_volume_write_latency_seconds | gauge | The average time, in seconds, to complete write operations to a volume in the last 500 milliseconds. |
157150
| solidfire_volume_write_latency_seconds_total | counter | The total time spent performing write operations to the volume |
158-
| solidfire_volume_write_ops_last_sample | gauge | The total number of write operations during the last sample period. |
159151
| solidfire_volume_write_ops_total | counter | The total cumulative write operations to the volume since the creation of the volume. |
160152
| solidfire_volume_zero_blocks | gauge | The total number of empty 4KiB blocks without data after the last round of garbage collection operation has completed. |
161153

pkg/prom/collector.go

Lines changed: 17 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"math"
7+
"regexp"
78
"strconv"
89
"strings"
910
"sync"
@@ -58,22 +59,16 @@ func (c *SolidfireCollector) Describe(ch chan<- *prometheus.Desc) {
5859
ch <- MetricDescriptions.VolumeLatencySeconds
5960
ch <- MetricDescriptions.VolumeNonZeroBlocks
6061
ch <- MetricDescriptions.VolumeReadBytesTotal
61-
ch <- MetricDescriptions.VolumeLastSampleReadBytes
62-
ch <- MetricDescriptions.VolumeReadLatencySeconds
6362
ch <- MetricDescriptions.VolumeReadLatencySecondsTotal
6463
ch <- MetricDescriptions.VolumeReadOpsTotal
65-
ch <- MetricDescriptions.VolumeLastSampleReadOps
6664
ch <- MetricDescriptions.VolumeThrottle
6765
ch <- MetricDescriptions.VolumeUnalignedReadsTotal
6866
ch <- MetricDescriptions.VolumeUnalignedWritesTotal
6967
ch <- MetricDescriptions.VolumeSizeBytes
7068
ch <- MetricDescriptions.VolumeUtilization
7169
ch <- MetricDescriptions.VolumeWriteBytesTotal
72-
ch <- MetricDescriptions.VolumeLastSampleWriteBytes
73-
ch <- MetricDescriptions.VolumeWriteLatencySeconds
7470
ch <- MetricDescriptions.VolumeWriteLatencyTotal
7571
ch <- MetricDescriptions.VolumeWriteOpsTotal
76-
ch <- MetricDescriptions.VolumeWriteOpsLastSample
7772
ch <- MetricDescriptions.VolumeStatsZeroBlocks
7873

7974
ch <- MetricDescriptions.ClusterActiveBlockSpaceBytes
@@ -112,10 +107,8 @@ func (c *SolidfireCollector) Describe(ch chan<- *prometheus.Desc) {
112107
ch <- MetricDescriptions.NodeInterfaceOutBytesTotal
113108
ch <- MetricDescriptions.NodeInterfaceUtilizationPercentage
114109
ch <- MetricDescriptions.NodeReadLatencyTotal
115-
ch <- MetricDescriptions.NodeReadOpsTotal
116110
ch <- MetricDescriptions.NodeUsedMemoryBytes
117111
ch <- MetricDescriptions.NodeWriteLatencyTotal
118-
ch <- MetricDescriptions.NodeWriteOpsTotal
119112
ch <- MetricDescriptions.NodeLoadHistogram
120113

121114
ch <- MetricDescriptions.NodeInfo
@@ -249,6 +242,9 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
249242
mu.Lock()
250243
defer mu.Unlock()
251244
for _, vol := range volumeStats.Result.VolumeStats {
245+
if ok, _ := regexp.MatchString(`snapshot-clone-src-*|replica-vol-*`, c.volumeNamesByID[vol.VolumeID]); ok {
246+
continue
247+
}
252248
ch <- prometheus.MustNewConstMetric(
253249
MetricDescriptions.VolumeActualIOPS,
254250
prometheus.GaugeValue,
@@ -298,20 +294,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
298294
strconv.Itoa(vol.VolumeID),
299295
c.volumeNamesByID[vol.VolumeID])
300296

301-
ch <- prometheus.MustNewConstMetric(
302-
MetricDescriptions.VolumeLastSampleReadBytes,
303-
prometheus.GaugeValue,
304-
vol.ReadBytesLastSample,
305-
strconv.Itoa(vol.VolumeID),
306-
c.volumeNamesByID[vol.VolumeID])
307-
308-
ch <- prometheus.MustNewConstMetric(
309-
MetricDescriptions.VolumeReadLatencySeconds,
310-
prometheus.GaugeValue,
311-
MicrosecondsToSeconds(vol.ReadLatencyUSec),
312-
strconv.Itoa(vol.VolumeID),
313-
c.volumeNamesByID[vol.VolumeID])
314-
315297
ch <- prometheus.MustNewConstMetric(
316298
MetricDescriptions.VolumeReadLatencySecondsTotal,
317299
prometheus.CounterValue,
@@ -326,13 +308,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
326308
strconv.Itoa(vol.VolumeID),
327309
c.volumeNamesByID[vol.VolumeID])
328310

329-
ch <- prometheus.MustNewConstMetric(
330-
MetricDescriptions.VolumeLastSampleReadOps,
331-
prometheus.GaugeValue,
332-
vol.ReadOpsLastSample,
333-
strconv.Itoa(vol.VolumeID),
334-
c.volumeNamesByID[vol.VolumeID])
335-
336311
ch <- prometheus.MustNewConstMetric(
337312
MetricDescriptions.VolumeThrottle,
338313
prometheus.GaugeValue,
@@ -375,20 +350,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
375350
strconv.Itoa(vol.VolumeID),
376351
c.volumeNamesByID[vol.VolumeID])
377352

378-
ch <- prometheus.MustNewConstMetric(
379-
MetricDescriptions.VolumeLastSampleWriteBytes,
380-
prometheus.GaugeValue,
381-
vol.WriteBytesLastSample,
382-
strconv.Itoa(vol.VolumeID),
383-
c.volumeNamesByID[vol.VolumeID])
384-
385-
ch <- prometheus.MustNewConstMetric(
386-
MetricDescriptions.VolumeWriteLatencySeconds,
387-
prometheus.GaugeValue,
388-
MicrosecondsToSeconds(vol.WriteLatencyUSec),
389-
strconv.Itoa(vol.VolumeID),
390-
c.volumeNamesByID[vol.VolumeID])
391-
392353
ch <- prometheus.MustNewConstMetric(
393354
MetricDescriptions.VolumeWriteLatencyTotal,
394355
prometheus.CounterValue,
@@ -403,13 +364,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
403364
strconv.Itoa(vol.VolumeID),
404365
c.volumeNamesByID[vol.VolumeID])
405366

406-
ch <- prometheus.MustNewConstMetric(
407-
MetricDescriptions.VolumeWriteOpsLastSample,
408-
prometheus.GaugeValue,
409-
vol.WriteOpsLastSample,
410-
strconv.Itoa(vol.VolumeID),
411-
c.volumeNamesByID[vol.VolumeID])
412-
413367
ch <- prometheus.MustNewConstMetric(
414368
MetricDescriptions.VolumeStatsZeroBlocks,
415369
prometheus.GaugeValue,
@@ -712,14 +666,6 @@ func (c *SolidfireCollector) collectClusterNodeStats(ctx context.Context, ch cha
712666
c.nodesNamesByID[stats.NodeID],
713667
)
714668

715-
ch <- prometheus.MustNewConstMetric(
716-
MetricDescriptions.NodeReadOpsTotal,
717-
prometheus.CounterValue,
718-
stats.ReadOps,
719-
strconv.Itoa(stats.NodeID),
720-
c.nodesNamesByID[stats.NodeID],
721-
)
722-
723669
ch <- prometheus.MustNewConstMetric(
724670
MetricDescriptions.NodeInterfaceInBytesTotal,
725671
prometheus.CounterValue,
@@ -754,13 +700,6 @@ func (c *SolidfireCollector) collectClusterNodeStats(ctx context.Context, ch cha
754700
c.nodesNamesByID[stats.NodeID],
755701
)
756702

757-
ch <- prometheus.MustNewConstMetric(
758-
MetricDescriptions.NodeWriteOpsTotal,
759-
prometheus.CounterValue,
760-
stats.WriteOps,
761-
strconv.Itoa(stats.NodeID),
762-
c.nodesNamesByID[stats.NodeID],
763-
)
764703
}
765704
return nil
766705
}
@@ -773,6 +712,9 @@ func (c *SolidfireCollector) collectVolumeQosHistograms(ctx context.Context, ch
773712
mu.Lock()
774713
defer mu.Unlock()
775714
for _, h := range VolumeQoSHistograms.Result.QosHistograms {
715+
if ok, _ := regexp.MatchString(`snapshot-clone-src-*|replica-vol-*`, c.volumeNamesByID[h.VolumeID]); ok {
716+
continue
717+
}
776718
// Below Min IOPS Percentage
777719
BelowMinIopsPercentages := map[float64]uint64{
778720
19: h.Histograms.BelowMinIopsPercentages.Bucket1To19,
@@ -1256,27 +1198,20 @@ func (c *SolidfireCollector) collectISCSISessions(ctx context.Context, ch chan<-
12561198
}
12571199
mu.Lock()
12581200
defer mu.Unlock()
1259-
sessions := make(map[int]map[int]float64)
1201+
sessions := make(map[int]float64)
12601202

12611203
for _, session := range ListISCSISessions.Result.Sessions {
1262-
if sessions[session.NodeID] == nil {
1263-
sessions[session.NodeID] = make(map[int]float64)
1264-
}
1265-
sessions[session.NodeID][session.VolumeID]++
1204+
sessions[session.NodeID]++
12661205
}
12671206

1268-
for node, v := range sessions {
1269-
for vol, val := range v {
1270-
ch <- prometheus.MustNewConstMetric(
1271-
MetricDescriptions.NodeISCSISessions,
1272-
prometheus.GaugeValue,
1273-
val,
1274-
strconv.Itoa(node),
1275-
c.nodesNamesByID[node],
1276-
strconv.Itoa(vol),
1277-
c.volumeNamesByID[vol],
1278-
)
1279-
}
1207+
for node, val := range sessions {
1208+
ch <- prometheus.MustNewConstMetric(
1209+
MetricDescriptions.NodeISCSISessions,
1210+
prometheus.GaugeValue,
1211+
val,
1212+
strconv.Itoa(node),
1213+
c.nodesNamesByID[node],
1214+
)
12801215
}
12811216
return nil
12821217
}

pkg/prom/metrics.go

Lines changed: 1 addition & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,9 @@ type Descriptions struct {
1313
VolumeAverageIOPSizeBytes *prometheus.Desc
1414
VolumeBurstIOPSCredit *prometheus.Desc
1515
VolumeClientQueueDepth *prometheus.Desc
16-
VolumeLastSampleReadBytes *prometheus.Desc
17-
VolumeLastSampleReadOps *prometheus.Desc
18-
VolumeLastSampleWriteBytes *prometheus.Desc
1916
VolumeLatencySeconds *prometheus.Desc
2017
VolumeNonZeroBlocks *prometheus.Desc
2118
VolumeReadBytesTotal *prometheus.Desc
22-
VolumeReadLatencySeconds *prometheus.Desc
2319
VolumeReadLatencySecondsTotal *prometheus.Desc
2420
VolumeReadOpsTotal *prometheus.Desc
2521
VolumeThrottle *prometheus.Desc
@@ -28,9 +24,7 @@ type Descriptions struct {
2824
VolumeSizeBytes *prometheus.Desc
2925
VolumeUtilization *prometheus.Desc
3026
VolumeWriteBytesTotal *prometheus.Desc
31-
VolumeWriteLatencySeconds *prometheus.Desc
3227
VolumeWriteLatencyTotal *prometheus.Desc
33-
VolumeWriteOpsLastSample *prometheus.Desc
3428
VolumeWriteOpsTotal *prometheus.Desc
3529
VolumeStatsZeroBlocks *prometheus.Desc
3630

@@ -82,12 +76,10 @@ type Descriptions struct {
8276
NodeInterfaceUtilizationPercentage *prometheus.Desc
8377
NodeLoadHistogram *prometheus.Desc
8478
NodeReadLatencyTotal *prometheus.Desc
85-
NodeReadOpsTotal *prometheus.Desc
8679
NodeSamples *prometheus.Desc
8780
NodeTotalMemoryBytes *prometheus.Desc
8881
NodeUsedMemoryBytes *prometheus.Desc
8982
NodeWriteLatencyTotal *prometheus.Desc
90-
NodeWriteOpsTotal *prometheus.Desc
9183

9284
// ListAllNodes
9385
NodeInfo *prometheus.Desc
@@ -211,20 +203,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
211203
nil,
212204
)
213205

214-
d.VolumeLastSampleReadBytes = prometheus.NewDesc(
215-
prometheus.BuildFQName(namespace, "", "volume_last_sample_read_bytes"),
216-
"The total number of bytes read from the volume during the last sample period.",
217-
[]string{"volume_id", "volume_name"},
218-
nil,
219-
)
220-
221-
d.VolumeReadLatencySeconds = prometheus.NewDesc(
222-
prometheus.BuildFQName(namespace, "", "volume_read_latency_seconds"),
223-
"The average time, in seconds, to complete read operations to the volume in the last 500 milliseconds.",
224-
[]string{"volume_id", "volume_name"},
225-
nil,
226-
)
227-
228206
d.VolumeReadLatencySecondsTotal = prometheus.NewDesc(
229207
prometheus.BuildFQName(namespace, "", "volume_read_latency_seconds_total"),
230208
"The total time spent performing read operations from the volume",
@@ -239,13 +217,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
239217
nil,
240218
)
241219

242-
d.VolumeLastSampleReadOps = prometheus.NewDesc(
243-
prometheus.BuildFQName(namespace, "", "volume_last_sample_read_ops"),
244-
"The total number of read operations during the last sample period",
245-
[]string{"volume_id", "volume_name"},
246-
nil,
247-
)
248-
249220
d.VolumeThrottle = prometheus.NewDesc(
250221
prometheus.BuildFQName(namespace, "", "volume_throttle"),
251222
"A floating value between 0 and 1 that represents how much the system is throttling clients below their maxIOPS because of rereplication of data, transient errors, and snapshots taken.",
@@ -288,20 +259,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
288259
nil,
289260
)
290261

291-
d.VolumeLastSampleWriteBytes = prometheus.NewDesc(
292-
prometheus.BuildFQName(namespace, "", "volume_last_sample_write_bytes"),
293-
"The total number of bytes written to the volume during the last sample period.",
294-
[]string{"volume_id", "volume_name"},
295-
nil,
296-
)
297-
298-
d.VolumeWriteLatencySeconds = prometheus.NewDesc(
299-
prometheus.BuildFQName(namespace, "", "volume_write_latency_seconds"),
300-
"The average time, in seconds, to complete write operations to a volume in the last 500 milliseconds.",
301-
[]string{"volume_id", "volume_name"},
302-
nil,
303-
)
304-
305262
d.VolumeWriteLatencyTotal = prometheus.NewDesc(
306263
prometheus.BuildFQName(namespace, "", "volume_write_latency_seconds_total"),
307264
"The total time spent performing write operations to the volume",
@@ -316,13 +273,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
316273
nil,
317274
)
318275

319-
d.VolumeWriteOpsLastSample = prometheus.NewDesc(
320-
prometheus.BuildFQName(namespace, "", "volume_write_ops_last_sample"),
321-
"The total number of write operations during the last sample period.",
322-
[]string{"volume_id", "volume_name"},
323-
nil,
324-
)
325-
326276
d.VolumeStatsZeroBlocks = prometheus.NewDesc(
327277
prometheus.BuildFQName(namespace, "", "volume_zero_blocks"),
328278
"The total number of empty 4KiB blocks without data after the last round of garbage collection operation has completed.",
@@ -544,20 +494,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
544494
nil,
545495
)
546496

547-
d.NodeReadOpsTotal = prometheus.NewDesc(
548-
prometheus.BuildFQName(namespace, "", "node_read_ops_total"),
549-
"Total read operations to a node.", // undocumented metric
550-
[]string{"node_id", "node_name"},
551-
nil,
552-
)
553-
554-
d.NodeWriteOpsTotal = prometheus.NewDesc(
555-
prometheus.BuildFQName(namespace, "", "node_write_ops_total"),
556-
"Total write operations to a node", // undocumented metric
557-
[]string{"node_id", "node_name"},
558-
nil,
559-
)
560-
561497
d.NodeTotalMemoryBytes = prometheus.NewDesc(
562498
prometheus.BuildFQName(namespace, "", "node_total_memory_bytes"),
563499
"Total node memory in bytes.",
@@ -932,7 +868,7 @@ func NewMetricDescriptions(namespace string) *Descriptions {
932868
d.NodeISCSISessions = prometheus.NewDesc(
933869
prometheus.BuildFQName(namespace, "", "node_iscsi_sessions"),
934870
"The total number of iscsi sessions per node and volume",
935-
[]string{"node_id", "node_name", "volume_id", "volume_name"},
871+
[]string{"node_id", "node_name"},
936872
nil,
937873
)
938874

0 commit comments

Comments
 (0)