Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 296f943

Browse files
authored
More efficient Merge implementation. (#486)
Avoid a tree of merge objects, which can result in what I suspect is n^2 calls to Seek when using Without. With 100k metrics, and a regex of ^$ in BenchmarkHeadPostingForMatchers: Before: BenchmarkHeadPostingForMatchers-8 1 51633185216 ns/op 29745528 B/op 200357 allocs/op After: BenchmarkHeadPostingForMatchers-8 10 108924996 ns/op 25715025 B/op 101748 allocs/op Signed-off-by: Brian Brazil <[email protected]>
1 parent b2d7bbd commit 296f943

File tree

2 files changed

+20
-75
lines changed

2 files changed

+20
-75
lines changed

index/postings.go

Lines changed: 17 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -366,80 +366,25 @@ func Merge(its ...Postings) Postings {
366366
if len(its) == 1 {
367367
return its[0]
368368
}
369-
l := len(its) / 2
370-
return newMergedPostings(Merge(its[:l]...), Merge(its[l:]...))
371-
}
372-
373-
type mergedPostings struct {
374-
a, b Postings
375-
initialized bool
376-
aok, bok bool
377-
cur uint64
378-
}
379-
380-
func newMergedPostings(a, b Postings) *mergedPostings {
381-
return &mergedPostings{a: a, b: b}
382-
}
383-
384-
func (it *mergedPostings) At() uint64 {
385-
return it.cur
386-
}
387-
388-
func (it *mergedPostings) Next() bool {
389-
if !it.initialized {
390-
it.aok = it.a.Next()
391-
it.bok = it.b.Next()
392-
it.initialized = true
393-
}
394-
395-
if !it.aok && !it.bok {
396-
return false
397-
}
398-
399-
if !it.aok {
400-
it.cur = it.b.At()
401-
it.bok = it.b.Next()
402-
return true
403-
}
404-
if !it.bok {
405-
it.cur = it.a.At()
406-
it.aok = it.a.Next()
407-
return true
408-
}
409-
410-
acur, bcur := it.a.At(), it.b.At()
411-
412-
if acur < bcur {
413-
it.cur = acur
414-
it.aok = it.a.Next()
415-
} else if acur > bcur {
416-
it.cur = bcur
417-
it.bok = it.b.Next()
418-
} else {
419-
it.cur = acur
420-
it.aok = it.a.Next()
421-
it.bok = it.b.Next()
422-
}
423-
return true
424-
}
425-
426-
func (it *mergedPostings) Seek(id uint64) bool {
427-
if it.cur >= id {
428-
return true
369+
// All the uses of this function immediately expand it, so
370+
// collect everything in a map. This is more efficient
371+
// when there's 100ks of postings, compared to
372+
// having a tree of merge objects.
373+
pm := make(map[uint64]struct{}, len(its))
374+
for _, it := range its {
375+
for it.Next() {
376+
pm[it.At()] = struct{}{}
377+
}
378+
if it.Err() != nil {
379+
return ErrPostings(it.Err())
380+
}
429381
}
430-
431-
it.aok = it.a.Seek(id)
432-
it.bok = it.b.Seek(id)
433-
it.initialized = true
434-
435-
return it.Next()
436-
}
437-
438-
func (it *mergedPostings) Err() error {
439-
if it.a.Err() != nil {
440-
return it.a.Err()
382+
pl := make([]uint64, 0, len(pm))
383+
for p := range pm {
384+
pl = append(pl, p)
441385
}
442-
return it.b.Err()
386+
sort.Slice(pl, func(i, j int) bool { return pl[i] < pl[j] })
387+
return newListPostings(pl)
443388
}
444389

445390
// Without returns a new postings list that contains all elements from the full list that

index/postings_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ func TestMergedPostings(t *testing.T) {
233233
a := newListPostings(c.a)
234234
b := newListPostings(c.b)
235235

236-
res, err := ExpandPostings(newMergedPostings(a, b))
236+
res, err := ExpandPostings(Merge(a, b))
237237
testutil.Ok(t, err)
238238
testutil.Equals(t, c.res, res)
239239
}
@@ -286,7 +286,7 @@ func TestMergedPostingsSeek(t *testing.T) {
286286
a := newListPostings(c.a)
287287
b := newListPostings(c.b)
288288

289-
p := newMergedPostings(a, b)
289+
p := Merge(a, b)
290290

291291
testutil.Equals(t, c.success, p.Seek(c.seek))
292292

@@ -546,7 +546,7 @@ func TestIntersectWithMerge(t *testing.T) {
546546
// https://github.com/prometheus/prometheus/issues/2616
547547
a := newListPostings([]uint64{21, 22, 23, 24, 25, 30})
548548

549-
b := newMergedPostings(
549+
b := Merge(
550550
newListPostings([]uint64{10, 20, 30}),
551551
newListPostings([]uint64{15, 26, 30}),
552552
)

0 commit comments

Comments
 (0)