Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Reduce number of allocations in histogram metric #317

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
coverage/
Gemfile.lock
pkg/
spec/benchmarks/*.json
2 changes: 1 addition & 1 deletion lib/prometheus/client/histogram.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def type
# https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations
# for details.
def observe(value, labels: {})
bucket = buckets.find {|upper_limit| upper_limit >= value }
bucket = buckets.bsearch { |upper_limit| upper_limit >= value }
bucket = "+Inf" if bucket.nil?

base_label_set = label_set_for(labels)
Expand Down
165 changes: 165 additions & 0 deletions lib/prometheus/client/histogram_fixed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# frozen_string_literal: true

require 'prometheus/client/metric'

module Prometheus
module Client
# A histogram samples observations (usually things like request durations
# or response sizes) and counts them in configurable buckets. It also
# provides a total count and sum of all observed values.
class HistogramFixed < Metric
# DEFAULT_BUCKETS are the default Histogram buckets. The default buckets
# are tailored to broadly measure the response time (in seconds) of a
# network service. (From DefBuckets client_golang)
DEFAULT_BUCKETS = [
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10
].freeze
INF = '+Inf'
SUM = 'sum'

attr_reader :buckets, :bucket_strings

# Offer a way to manually specify buckets
def initialize(name,
docstring:,
labels: [],
preset_labels: {},
buckets: DEFAULT_BUCKETS,
store_settings: {})
raise ArgumentError, 'Unsorted buckets, typo?' unless sorted?(buckets)

@buckets = buckets
@bucket_strings = buckets.map(&:to_s) # This is used to avoid calling `to_s` multiple times

super(name,
docstring: docstring,
labels: labels,
preset_labels: preset_labels,
store_settings: store_settings)
end

def self.linear_buckets(start:, width:, count:)
count.times.map { |idx| start.to_f + idx * width }
end

def self.exponential_buckets(start:, factor: 2, count:)
count.times.map { |idx| start.to_f * factor ** idx }
end

def with_labels(labels)
new_metric = self.class.new(name,
docstring: docstring,
labels: @labels,
preset_labels: preset_labels.merge(labels),
buckets: @buckets,
store_settings: @store_settings)

# The new metric needs to use the same store as the "main" declared one, otherwise
# any observations on that copy with the pre-set labels won't actually be exported.
new_metric.replace_internal_store(@store)

new_metric
end

def type
:histogram
end

# Records a given value. The recorded value is usually positive
# or zero. A negative value is accepted but prevents current
# versions of Prometheus from properly detecting counter resets
# in the sum of observations. See
# https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations
# for details.
def observe(value, labels: {})
base_label_set = label_set_for(labels) # Pottentially can raise, so it should be first
bucket_idx = buckets.bsearch_index { |upper_limit| upper_limit >= value }
bucket_str = bucket_idx == nil ? INF : bucket_strings[bucket_idx]

# This is basically faster than doing `.merge`
bucket_label_set = base_label_set.dup
bucket_label_set[:le] = bucket_str

@store.synchronize do
@store.increment(labels: bucket_label_set, by: 1)
@store.increment(labels: base_label_set, by: value)
end
end

# Returns a hash with all the buckets plus +Inf (count) plus Sum for the given label set
def get(labels: {})
base_label_set = label_set_for(labels)

all_buckets = buckets + [INF, SUM]

@store.synchronize do
all_buckets.each_with_object(Hash.new(0.0)) do |upper_limit, acc|
acc[upper_limit.to_s] = @store.get(labels: base_label_set.merge(le: upper_limit.to_s))
end.tap do |acc|
accumulate_buckets!(acc)
end
end
end

# Returns all label sets with their values expressed as hashes with their buckets
def values
values = @store.all_values
default_buckets = Hash.new(0.0)
bucket_strings.each { |b| default_buckets[b] = 0.0 }

result = values.each_with_object({}) do |(label_set, v), acc|
actual_label_set = label_set.except(:le)
acc[actual_label_set] ||= default_buckets.dup
acc[actual_label_set][label_set[:le]] = v
end

result.each_value { |v| accumulate_buckets!(v) }
end

def init_label_set(labels)
base_label_set = label_set_for(labels)

@store.synchronize do
(buckets + [INF, SUM]).each do |bucket|
@store.set(labels: base_label_set.merge(le: bucket.to_s), val: 0)
end
end
end

private

# Modifies the passed in parameter
def accumulate_buckets!(h)
accumulator = 0

bucket_strings.each do |upper_limit|
accumulator = (h[upper_limit] += accumulator)
end

h[INF] += accumulator
end

RESERVED_LABELS = [:le].freeze
private_constant :RESERVED_LABELS
def reserved_labels
RESERVED_LABELS
end

def sorted?(bucket)
# This is faster than using `each_cons` and `all?`
bucket == bucket.sort
end

def label_set_for(labels)
@label_set_for ||= Hash.new do |hash, key|
_labels = key.transform_values(&:to_s)
_labels = @validator.validate_labelset_new!(preset_labels.merge(_labels))
_labels[:le] = SUM # We can cache this, because it's always the same
hash[key] = _labels
end

@label_set_for[labels]
end
end
end
end
15 changes: 15 additions & 0 deletions lib/prometheus/client/label_set_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ def validate_labelset!(labelset)
" keys expected: #{expected_labels}"
end

def validate_labelset_new!(labelset)
begin
# keys already allocated new array, so it's safe to modify it in place with sort!
return labelset if labelset.keys.sort! == expected_labels
rescue ArgumentError
# If labelset contains keys that are a mixture of strings and symbols, this will
# raise when trying to sort them, but the error should be the same:
# InvalidLabelSetError
end

raise InvalidLabelSetError, "labels must have the same signature " \
"(keys given: #{labelset.keys} vs." \
" keys expected: #{expected_labels}"
end

private

def keys_match?(labelset)
Expand Down
1 change: 1 addition & 0 deletions prometheus-client.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ Gem::Specification.new do |s|
s.add_development_dependency 'benchmark-ips'
s.add_development_dependency 'concurrent-ruby'
s.add_development_dependency 'timecop'
s.add_development_dependency 'vernier'
end
185 changes: 185 additions & 0 deletions spec/benchmarks/histogram.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# frozen_string_literal: true

require 'benchmark'
require 'benchmark/ips'
require 'vernier'

require 'prometheus/client'
require 'prometheus/client/data_stores/single_threaded'
require 'prometheus/client/histogram'
require 'prometheus/client/histogram_fixed'

# NoopStore doesn't work here, because it doesn't implement `get` and `all_values` methods
Prometheus::Client.config.data_store = Prometheus::Client::DataStores::SingleThreaded.new # Simple data storage

BUCKETS = [
0.00001, 0.000015, 0.00002, 0.000025, 0.00003, 0.000035, 0.00004, 0.000045, 0.00005, 0.000055, 0.00006, 0.000065, 0.00007, 0.000075, 0.00008, 0.000085,
0.00009, 0.000095, 0.0001, 0.000101, 0.000102, 0.000103, 0.000104, 0.000105, 0.000106, 0.000107, 0.000108, 0.000109, 0.00011, 0.000111, 0.000112, 0.000113,
0.000114, 0.000115, 0.000116, 0.000117, 0.000118, 0.000119, 0.00012, 0.000121, 0.000122, 0.000123, 0.000124, 0.000125, 0.000126, 0.000127, 0.000128,
0.000129, 0.00013, 0.000131, 0.000132, 0.000133, 0.000134, 0.000135, 0.000136, 0.000137, 0.000138, 0.000139, 0.00014, 0.000141, 0.000142, 0.000143, 0.000144,
0.000145, 0.000146, 0.000147, 0.000148, 0.000149, 0.00015, 0.000151, 0.000152, 0.000153, 0.000154, 0.000155, 0.000156, 0.000157, 0.000158, 0.000159, 0.00016,
0.000161, 0.000162, 0.000163, 0.000164, 0.000165, 0.000166, 0.000167, 0.000168, 0.000169, 0.00017, 0.000171, 0.000172, 0.000173, 0.000174, 0.000175,
0.000176, 0.000177, 0.000178, 0.000179, 0.00018, 0.000181, 0.000182, 0.000183, 0.000184, 0.000185, 0.000186, 0.000187, 0.000188, 0.000189, 0.00019, 0.000191,
0.000192, 0.000193, 0.000194, 0.000195, 0.000196, 0.000197, 0.000198, 0.000199, 0.0002, 0.00021, 0.00022, 0.00023, 0.00024, 0.00025, 0.00026,
0.00027, 0.00028, 0.00029, 0.0003, 0.00031, 0.00032, 0.00033, 0.00034, 0.00035, 0.00036, 0.00037, 0.00038, 0.00039, 0.0004, 0.00041, 0.00042,
0.00043, 0.00044, 0.00045, 0.00046, 0.00047, 0.00048, 0.00049, 0.0005, 0.00051, 0.00052, 0.00053, 0.00054, 0.00055, 0.00056, 0.00057, 0.00058,
0.00059, 0.0006, 0.00061, 0.00062, 0.00063, 0.00064, 0.00065, 0.00066, 0.00067, 0.00068, 0.00069, 0.0007, 0.00071, 0.00072, 0.00073, 0.00074,
0.00075, 0.00076, 0.00077, 0.00078, 0.00079, 0.0008, 0.00081, 0.00082, 0.00083, 0.00084, 0.00085, 0.00086, 0.00087, 0.00088, 0.00089, 0.0009,
0.00091, 0.00092, 0.00093, 0.00094, 0.00095, 0.00096, 0.00097, 0.00098, 0.00099, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005,
0.0055, 0.006, 0.0065, 0.007, 0.0075, 0.008, 0.0085, 0.009, 0.0095, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05, 0.055, 0.06, 0.065, 0.07,
0.075, 0.08, 0.085, 0.09, 0.095, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.5, 2.0, 2.5,
3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0, 12.5, 13.0, 13.5, 14.0, 14.5, 15.0, 15.5, 16.0, 16.5, 17.0, 17.5
].freeze

def allocations
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end

# Setup 4 test cases
# 1. Default buckets + no labels
# 2. Large buckets + no labels
# 3. Default buckets + 2 labels
# 4. Large buckets + 2 labels
TEST_CASES = {
'default buckets + no lables' => [
Prometheus::Client::Histogram.new(
:default_buckets_old,
docstring: 'default buckets + no lables'
),
Prometheus::Client::HistogramFixed.new(
:default_buckets_new,
docstring: 'default buckets + no lables'
)
],
'large buckets + no lables' => [
Prometheus::Client::Histogram.new(
:large_buckets_old,
docstring: 'large buckets + no lables',
buckets: BUCKETS
),
Prometheus::Client::HistogramFixed.new(
:large_buckets_new,
docstring: 'large buckets + no lables',
buckets: BUCKETS
)
],
'default buckets + labels' => [
Prometheus::Client::Histogram.new(
:default_buckets_with_labels_old,
docstring: 'default buckets + labels',
labels: [:label1, :label2]
),
Prometheus::Client::HistogramFixed.new(
:default_buckets_with_labels_new,
docstring: 'default buckets + labels',
labels: [:label1, :label2]
)
],
'large buckets + labels' => [
Prometheus::Client::Histogram.new(
:large_buckets_with_labels_old,
docstring: 'large buckets + labels',
buckets: BUCKETS,
labels: [:label1, :label2]
),
Prometheus::Client::HistogramFixed.new(
:large_buckets_with_labels_new,
docstring: 'large buckets + labels',
buckets: BUCKETS,
labels: [:label1, :label2]
)
],
}.freeze

labels = [1,2,3,4,5,6,7,8,9]

TEST_CASES.each do |name, (old, new)|
with_labels = name.include?('+ labels')
Benchmark.ips do |bm|
bm.report("#{name} -> Observe old") do
if with_labels
old.observe(rand(BUCKETS.last + 10), labels: { label1: labels.sample, label2: labels.sample })
else
old.observe(rand(BUCKETS.last + 10))
end
end
bm.report("#{name} -> Observe new") do
if with_labels
new.observe(rand(BUCKETS.last + 10), labels: { label1: labels.sample, label2: labels.sample })
else
new.observe(rand(BUCKETS.last + 10))
end
end

bm.compare!
end

Benchmark.ips do |bm|
bm.report("#{name} -> Values old") { old.values }
bm.report("#{name} -> Values new") { new.values }

bm.compare!
end
end

# Sample usage of profiler
val = rand(BUCKETS.last)
l = { label1: 1, label2: 2 }

# Vernier.profile(mode: :wall, out: 'x.json', interval: 1, allocation_interval: 1) do
# 100000.times { large_buckets_new.observe(val, labels: l) }
# end

old, new = TEST_CASES['large buckets + labels']

puts "Old#observe allocations -> #{allocations { 1000.times { old.observe(val, labels: l) }}}"
puts "New#observe allocations -> #{allocations { 1000.times { new.observe(val, labels: l) }}}"

puts "Old#values allocations -> #{allocations { 1000.times { old.values }}}"
puts "New#values allocations -> #{allocations { 1000.times { new.values }}}"


# Results:
# 1. Default buckets + no labels
# #observe is almost the same, but #values is 2.15x faster
#
# default buckets + no lables -> Observe new: 492718.9 i/s
# default buckets + no lables -> Observe old: 475856.7 i/s - same-ish: difference falls within error
# default buckets + no lables -> Values new: 98723.1 i/s
# default buckets + no lables -> Values old: 45867.1 i/s - 2.15x slower
#
# 2. Large buckets + no labels
# #observe is almost the same, but #values is 2.93x faster
#
# large buckets + no lables -> Observe new: 441325.9 i/s
# large buckets + no lables -> Observe old: 437752.4 i/s - same-ish: difference falls within error
# large buckets + no lables -> Values new: 4792.0 i/s
# large buckets + no lables -> Values old: 1636.8 i/s - 2.93x slower
#
# 3. Default buckets + 2 labels
# #observe is 1.44x faster, #values is 2.70x faster
#
# default buckets + labels -> Observe new: 450357.3 i/s
# default buckets + labels -> Observe old: 311747.3 i/s - 1.44x slower
# default buckets + labels -> Values new: 1633.8 i/s
# default buckets + labels -> Values old: 604.2 i/s - 2.70x slower
#
# 4. Large buckets + 2 labels
# #observe is 1.41x faster, #values is 9.57x faster
#
# large buckets + labels -> Observe new: 392597.2 i/s
# large buckets + labels -> Observe old: 277499.9 i/s - 1.41x slower
# large buckets + labels -> Values new: 247.6 i/s
# large buckets + labels -> Values old: 25.9 i/s - 9.57x slower
#
# 5. Allocations for 1000 observations for #observe method
# Old allocations -> 11001 - 11 allocations per observation
# New allocations -> 1000 - 1 allocation per observation
# Last place left `bucket_label_set = base_label_set.dup` in #observe method
#
# 6. Allocations for 1000 observations for #values method
# Old#values allocations -> 96150000
# New#values allocations -> 3325000
# almost 30x less allocations
Loading