Skip to content

Commit c40b88b

Browse files
committed
Introduce getUnionedWritableContainer to minimize allocations for in-place Or().
this removes double allocations for roaring array containers that have the COW flag set. previously they would first be cloned, then additional terms would be added, necessitating another slice allocation.
1 parent cd6117d commit c40b88b

File tree

3 files changed

+82
-1
lines changed

3 files changed

+82
-1
lines changed

benchmark_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package roaring
33
import (
44
"bytes"
55
"fmt"
6+
"github.com/stretchr/testify/require"
67
"math/rand"
78
"runtime"
89
"testing"
@@ -235,6 +236,75 @@ func BenchmarkUnionRoaring(b *testing.B) {
235236
}
236237
}
237238

239+
// BenchmarkUnionInPlaceCopyOnWrite tests the performance of bitmap.Or()
240+
// when the bitmap was generated via FromBuffer.
241+
// In this case all left containers need to be copied in order to be updated.
242+
// The nested for-loops test a number of different scenarios
243+
// with respect to the ranges and densities of bitmaps.
244+
func BenchmarkUnionInPlaceCopyOnWrite(b *testing.B) {
245+
//uint32s to maintain 1.12 compatibility, which requires unsigned shifts.
246+
startingContainerPower := uint32(4)
247+
finalContainerPower := uint32(10)
248+
containerIncrement := uint32(3)
249+
startingItemsPower := uint32(3)
250+
finalItemsPower := uint32(10)
251+
itemsIncrement := uint32(7)
252+
for leftContainerPower := startingContainerPower; leftContainerPower <= finalContainerPower; leftContainerPower += containerIncrement {
253+
for rightContainerPower := startingContainerPower; rightContainerPower <= finalContainerPower; rightContainerPower += containerIncrement {
254+
for leftItemsPerContainerPower := startingItemsPower; leftItemsPerContainerPower <= finalItemsPower; leftItemsPerContainerPower += itemsIncrement {
255+
for rightItemsPerContainerPower := startingItemsPower; rightItemsPerContainerPower <= finalItemsPower; rightItemsPerContainerPower += itemsIncrement {
256+
b.Run(fmt.Sprintf("%d-%d-%d-%d", leftContainerPower, rightContainerPower, leftItemsPerContainerPower, rightItemsPerContainerPower),
257+
func(b *testing.B) {
258+
leftMax := (1 << 16) << leftContainerPower
259+
rightMax := (1 << 16) << rightContainerPower
260+
leftItems := 1 << (leftContainerPower + leftItemsPerContainerPower)
261+
rightItems := 1 << (rightContainerPower + rightItemsPerContainerPower)
262+
left := make([][]byte, 10)
263+
right := make([]*Bitmap, 10)
264+
for i := 0; i < 10; i++ {
265+
right[i] = NewBitmap()
266+
left[i] = generateRandomBitmap(b, leftMax, leftItems)
267+
_, err := right[i].FromBuffer(generateRandomBitmap(b, rightMax, rightItems))
268+
require.NoError(b, err)
269+
}
270+
// This tests a destructive operation, Or() so have to have a fresh bitmap per test.
271+
targetLefts := make([]*Bitmap, b.N)
272+
for i := 0; i < b.N; i++ {
273+
targetLefts[i] = NewBitmap()
274+
_, err := targetLefts[i].FromBuffer(left[i%10])
275+
require.NoError(b, err)
276+
}
277+
runActualBenchmark(b, targetLefts, right)
278+
})
279+
}
280+
}
281+
}
282+
}
283+
}
284+
285+
// runActualBenchmark is broken out primarily so you can profile the tests,
286+
// as otherwise the generation overwhelms the actual test.
287+
func runActualBenchmark(b *testing.B, targetLefts []*Bitmap, right []*Bitmap) uint64 {
288+
b.ResetTimer()
289+
b.ReportAllocs()
290+
total := uint64(0)
291+
for i := 0; i < b.N; i++ {
292+
targetLefts[i].Or(right[i%10])
293+
total += targetLefts[i].GetCardinality()
294+
}
295+
return total
296+
}
297+
298+
func generateRandomBitmap(b *testing.B, max, terms int) []byte {
299+
bitmap := NewBitmap()
300+
for i := 0; i < terms; i++ {
301+
bitmap.Add(uint32(rand.Intn(max)))
302+
}
303+
result, err := bitmap.ToBytes()
304+
require.NoError(b, err)
305+
return result
306+
}
307+
238308
// go test -bench BenchmarkSize -run -
239309
func BenchmarkSizeBitset(b *testing.B) {
240310
b.StopTimer()

roaring.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,7 @@ main:
10061006
}
10071007
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
10081008
} else {
1009-
rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getWritableContainerAtIndex(pos1).ior(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
1009+
rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getUnionedWritableContainer(pos1, x2.highlowcontainer.getContainerAtIndex(pos2)), false)
10101010
pos1++
10111011
pos2++
10121012
if (pos1 == length1) || (pos2 == length2) {

roaringarray.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,17 @@ func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) cont
328328
return c
329329
}
330330

331+
// getUnionedWritableContainer switches behavior for in-place Or
332+
// depending on whether the container requires a copy on write.
333+
// If it does using the non-inplace or() method leads to fewer allocations.
334+
func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) container {
335+
if ra.needCopyOnWrite[pos] {
336+
return ra.getContainerAtIndex(pos).or(other)
337+
}
338+
return ra.getContainerAtIndex(pos).ior(other)
339+
340+
}
341+
331342
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
332343
if ra.needCopyOnWrite[i] {
333344
ra.containers[i] = ra.containers[i].clone()

0 commit comments

Comments
 (0)