|
1 | 1 | package limits
|
2 | 2 |
|
3 | 3 | import (
|
4 |
| - "sync" |
5 |
| - |
6 | 4 | "github.com/efficientgo/core/errors"
|
| 5 | + "github.com/thanos-io/promql-engine/query" |
7 | 6 | "go.uber.org/atomic"
|
8 | 7 | )
|
9 | 8 |
|
10 | 9 | type Limits struct {
|
11 | 10 | maxSamples int
|
12 | 11 |
|
13 |
| - curSamplesPerTimestamp sync.Map |
| 12 | + start int64 |
| 13 | + step int64 |
| 14 | + stepsBatch int64 |
| 15 | + |
| 16 | + samplesPerTimestamp []*atomic.Int64 |
| 17 | + periodPerTimestamp []*atomic.Int64 |
14 | 18 | }
|
15 | 19 |
|
16 |
| -func NewLimits(maxSamples int) *Limits { |
17 |
| - return &Limits{ |
| 20 | +// NewLimits returns a pointer to a Limits struct. It can be used to |
| 21 | +// track samples that enter the engine in some timestamp and limit it |
| 22 | +// to a maximum number. Since the engine processes "stepsBatch" timestamps |
| 23 | +// in parallel the resulting memory overhead will be "O(stepsBatch*maxSamples)". |
| 24 | +func NewLimits(maxSamples int, opts *query.Options) *Limits { |
| 25 | + step := opts.Step.Milliseconds() |
| 26 | + if opts.NumSteps() == 1 { |
| 27 | + step = 1 |
| 28 | + } |
| 29 | + start := opts.Start.UnixMilli() |
| 30 | + stepsBatch := opts.StepsBatch |
| 31 | + |
| 32 | + res := &Limits{ |
18 | 33 | maxSamples: maxSamples,
|
| 34 | + |
| 35 | + start: start, |
| 36 | + step: step, |
| 37 | + stepsBatch: stepsBatch, |
| 38 | + |
| 39 | + periodPerTimestamp: make([]*atomic.Int64, stepsBatch), |
| 40 | + samplesPerTimestamp: make([]*atomic.Int64, stepsBatch), |
19 | 41 | }
|
| 42 | + |
| 43 | + for i := int64(0); i < stepsBatch; i++ { |
| 44 | + res.periodPerTimestamp[i] = atomic.NewInt64(0) |
| 45 | + res.samplesPerTimestamp[i] = atomic.NewInt64(0) |
| 46 | + } |
| 47 | + |
| 48 | + return res |
20 | 49 | }
|
21 | 50 |
|
| 51 | +// AccountSamplesForTimestamp keeps track of the samples used for timestamp t. |
| 52 | +// It will return an error if a step wants to add use more samples then the configured |
| 53 | +// maxSamples value. |
22 | 54 | func (l *Limits) AccountSamplesForTimestamp(t int64, n int) error {
|
23 | 55 | if l.maxSamples == 0 {
|
24 | 56 | return nil
|
25 | 57 | }
|
26 |
| - v, _ := l.curSamplesPerTimestamp.LoadOrStore(t, atomic.NewInt64(0)) |
27 |
| - av := v.(*atomic.Int64) |
| 58 | + // TODO: properly this method would need a lock but so far it seems to work well enough. |
| 59 | + |
| 60 | + idx := ((t - l.start) / l.step) |
| 61 | + idxmod := idx % l.stepsBatch |
| 62 | + |
| 63 | + // This assumes that if we process "stepsBatch+1" we have processed "1" already. |
| 64 | + // This is accurate so long we dont move the coalesce operator higher up the execution |
| 65 | + // tree. It allows us to account only for the last "stepsBatch" timestamps and keep a |
| 66 | + // constant memory overhead. |
| 67 | + if period := idx / l.stepsBatch; period > l.periodPerTimestamp[idxmod].Load() { |
| 68 | + l.periodPerTimestamp[idxmod].Store(period) |
| 69 | + l.samplesPerTimestamp[idxmod].Store(0) |
| 70 | + } |
28 | 71 |
|
29 |
| - if cur := av.Load(); cur+int64(n) > int64(l.maxSamples) { |
| 72 | + if cur := l.samplesPerTimestamp[idxmod].Load(); cur+int64(n) > int64(l.maxSamples) { |
30 | 73 | return errors.New("query processing would load too many samples into memory in query execution")
|
31 | 74 | }
|
| 75 | + l.samplesPerTimestamp[idxmod].Add(int64(n)) |
32 | 76 |
|
33 |
| - av.Add(int64(n)) |
34 | 77 | return nil
|
35 | 78 | }
|
0 commit comments