Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 95f3c79

Browse files
committed
add 64bit support for roaringBitmapPostings
Signed-off-by: naivewong <[email protected]>
1 parent 430064a commit 95f3c79

File tree

3 files changed

+192
-42
lines changed

3 files changed

+192
-42
lines changed

index/index.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,9 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
541541
width = 1
542542
}
543543
w.buf2.PutByte(byte(width))
544+
for i := 0; i < 8 - width; i++ {
545+
w.buf2.PutByte(0)
546+
}
544547
for i := 0; i < len(refs); i++ {
545548
for j := width - 1; j >= 0; j-- {
546549
w.buf2.B = append(w.buf2.B, byte(((refs[i]-refs[0])>>(8*uint(j))&0xff)))

index/postings.go

Lines changed: 108 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -754,33 +754,27 @@ type baseDeltaPostings struct {
754754
base uint64
755755
size int
756756
idx int
757+
i int
757758
cur uint64
758759
mask uint64
759760
prel int
760761
}
761762

762763
func newBaseDeltaPostings(bstream []byte, base uint64, width int, size int) *baseDeltaPostings {
763-
return &baseDeltaPostings{bs: bstream, width: width, base: base, size: size, cur: uint64(base), mask: (uint64(1) << (uint64(width) << 3)) - 1, prel: 8 - width}
764+
return &baseDeltaPostings{bs: bstream, width: width, base: base, size: size, idx: 8 - width, cur: uint64(base), mask: (uint64(1) << (uint64(width) << 3)) - 1, prel: 8 - width}
764765
}
765766

766767
func (it *baseDeltaPostings) At() uint64 {
767768
return it.cur
768769
}
769770

770771
func (it *baseDeltaPostings) Next() bool {
771-
if it.idx >= it.size*it.width {
772+
if it.i >= it.size {
772773
return false
773774
}
774-
if it.idx-it.prel >= 0 {
775-
it.cur = binary.BigEndian.Uint64(it.bs[it.idx-it.prel:])&it.mask + it.base
776-
} else {
777-
it.cur = 0
778-
for i := 0; i < it.width; i++ {
779-
it.cur = (it.cur << 8) | uint64(it.bs[it.idx+i])
780-
}
781-
it.cur += it.base
782-
}
775+
it.cur = binary.BigEndian.Uint64(it.bs[it.idx-it.prel:])&it.mask + it.base
783776
it.idx += it.width
777+
it.i += 1
784778
return true
785779
}
786780

@@ -789,8 +783,7 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
789783
return true
790784
}
791785

792-
num := it.size - it.idx/it.width
793-
// Do binary search between current position and end.
786+
num := it.size - it.i
794787
x -= it.base
795788
i := sort.Search(num, func(i int) bool {
796789
return binary.BigEndian.Uint64(it.bs[it.idx+i*it.width-it.prel:])&it.mask >= x
@@ -799,18 +792,18 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
799792
it.idx += i * it.width
800793
it.cur = it.base + (binary.BigEndian.Uint64(it.bs[it.idx-it.prel:])&it.mask)
801794
it.idx += it.width
795+
it.i += i + 1
802796
return true
803797
}
804-
it.idx += i*it.width
805798
return false
806799
}
807800

808801
func (it *baseDeltaPostings) Err() error {
809802
return nil
810803
}
811804

812-
const deltaBlockSize = 4096
813-
const deltaBlockBits = 12
805+
const deltaBlockSize = 32
806+
const deltaBlockBits = 5
814807

815808
// Block format(delta is to the previous value).
816809
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
@@ -1363,15 +1356,15 @@ func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
13631356
}
13641357

13651358
var rbpMasks []byte
1366-
var rbpValueMask uint32
1359+
var rbpValueMask uint64
13671360
var rbpValueSize int
13681361
var rbpBitmapSize int
13691362

13701363
func init() {
13711364
for i := 7; i >= 0; i-- {
13721365
rbpMasks = append(rbpMasks, byte(1<<uint(i)))
13731366
}
1374-
rbpValueMask = uint32((1 << uint(bitmapBits)) - 1)
1367+
rbpValueMask = (uint64(1) << uint(bitmapBits)) - 1
13751368
rbpBitmapSize = 1 << (bitmapBits - 3)
13761369
rbpValueSize = bitmapBits >> 3
13771370
}
@@ -1392,7 +1385,7 @@ type roaringBitmapPostings struct {
13921385
idx1 int // The offset in the bitmap in current block in bytes.
13931386
idx2 int // The offset in the current byte in the bitmap ([0,8)).
13941387
footerAddr int
1395-
key uint32
1388+
key uint64
13961389
numBlock int
13971390
blockIdx int
13981391
blockType byte
@@ -1419,7 +1412,7 @@ func (it *roaringBitmapPostings) Next() bool {
14191412
if it.inside { // Already entered the block.
14201413
if it.blockType == 0 { // Type array.
14211414
if it.idx < it.nextBlock {
1422-
it.cur = uint64(it.key) | uint64(it.bs[it.idx])
1415+
it.cur = it.key | uint64(it.bs[it.idx])
14231416
it.idx += 1
14241417
return true
14251418
}
@@ -1429,7 +1422,7 @@ func (it *roaringBitmapPostings) Next() bool {
14291422
}
14301423
for it.idx1 < rbpBitmapSize {
14311424
if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 {
1432-
it.cur = uint64(it.key) | uint64((it.idx1<<3)+it.idx2)
1425+
it.cur = it.key | uint64((it.idx1<<3)+it.idx2)
14331426
it.idx2 += 1
14341427
if it.idx2 == 8 {
14351428
it.idx1 += 1
@@ -1454,7 +1447,7 @@ func (it *roaringBitmapPostings) Next() bool {
14541447
} else { // Not yet entered the block.
14551448
if it.idx < it.footerAddr {
14561449
val, size := binary.Uvarint(it.bs[it.idx:])
1457-
it.key = uint32(val) << bitmapBits
1450+
it.key = val << bitmapBits
14581451
it.idx += size
14591452
it.blockType = it.bs[it.idx]
14601453
it.idx += 1
@@ -1478,7 +1471,7 @@ func (it *roaringBitmapPostings) Next() bool {
14781471
}
14791472

14801473
func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
1481-
curVal := byte(uint32(x) & rbpValueMask)
1474+
curVal := byte(x & rbpValueMask)
14821475
if it.blockType == 0 {
14831476
// If encoding with array, binary search.
14841477
num := (it.nextBlock - it.idx)
@@ -1492,15 +1485,15 @@ func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
14921485
return it.Next()
14931486
}
14941487

1495-
it.cur = uint64(it.key) | uint64(it.bs[it.idx+j])
1488+
it.cur = it.key | uint64(it.bs[it.idx+j])
14961489
it.idx += j + 1
14971490
return true
14981491
} else {
14991492
// If encoding with bitmap, go to the exact location of value of x.
15001493
it.idx1 = int(curVal >> 3)
15011494
it.idx2 = int(curVal % 8)
15021495
if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 { // Found x.
1503-
it.cur = uint64(it.key) | uint64(it.idx1*8+it.idx2)
1496+
it.cur = it.key | uint64(it.idx1*8+it.idx2)
15041497
it.idx2 += 1
15051498
if it.idx2 == 8 {
15061499
it.idx1 += 1
@@ -1522,7 +1515,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15221515
if it.cur >= x {
15231516
return true
15241517
}
1525-
curKey := uint32(x) >> bitmapBits
1518+
curKey := x >> bitmapBits
15261519
if it.inside && it.key>>bitmapBits == curKey {
15271520
// Fast path.
15281521
return it.seekInBlock(x)
@@ -1533,7 +1526,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15331526
// off := it.readBytes(it.footerAddr+1+(it.blockIdx+i)*it.width)
15341527
off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-4+it.width:]) & it.addrMask)
15351528
k, _ := binary.Uvarint(it.bs[off:])
1536-
return uint32(k) >= curKey
1529+
return k >= curKey
15371530
// return binary.BigEndian.Uint32(it.bs[off:]) > curKey
15381531
})
15391532
if i == it.numBlock-it.blockIdx {
@@ -1553,7 +1546,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15531546

15541547
val, size := binary.Uvarint(it.bs[it.idx:])
15551548
// If the key of current block doesn't match, directly go to the next block.
1556-
if uint32(val) != curKey {
1549+
if val != curKey {
15571550
if it.blockIdx == it.numBlock-1 {
15581551
it.idx = it.footerAddr
15591552
return false
@@ -1564,7 +1557,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15641557
it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+it.blockIdx*it.width-4+it.width:]) & it.addrMask)
15651558
// it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+it.blockIdx*4:]))
15661559
val, size := binary.Uvarint(it.bs[it.idx:])
1567-
it.key = uint32(val) << bitmapBits
1560+
it.key = val << bitmapBits
15681561
it.idx += size
15691562
it.blockType = it.bs[it.idx]
15701563
it.idx += 1
@@ -1582,7 +1575,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15821575
return it.Next()
15831576
}
15841577
}
1585-
it.key = uint32(val) << bitmapBits
1578+
it.key = val << bitmapBits
15861579
it.idx += size
15871580
it.blockType = it.bs[it.idx]
15881581
it.idx += 1
@@ -1662,10 +1655,10 @@ func (it *roaringBitmapPostings) readBits(offset int) uint64 {
16621655
return u
16631656
}
16641657

1665-
func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint32, thres int, bitmapSize int, valueSize int) {
1666-
var offset int // The starting offset of the bitmap of each block.
1667-
var idx1 int // The offset in the bitmap in current block in bytes.
1668-
var idx2 int // The offset in the current byte in the bitmap ([0,8)).
1658+
func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []uint32, key uint32, thres int, bitmapSize int, valueSize int) {
1659+
var offset int // The starting offset of the bitmap of each block.
1660+
var idx1 uint32 // The offset in the bitmap in current block in bytes.
1661+
var idx2 uint32 // The offset in the current byte in the bitmap ([0,8)).
16691662
e.PutUvarint32(key)
16701663
if len(vals) > thres {
16711664
e.PutByte(byte(1))
@@ -1676,19 +1669,48 @@ func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint3
16761669
for _, val := range vals {
16771670
idx1 = val >> 3
16781671
idx2 = val % 8
1679-
e.B[offset+idx1] |= 1 << uint(7-idx2)
1672+
e.B[uint32(offset)+idx1] |= 1 << uint(7-idx2)
16801673
}
16811674
} else {
1675+
c := make([]byte, 4)
16821676
e.PutByte(byte(0))
16831677
for _, val := range vals {
1684-
binary.BigEndian.PutUint32(c[:], uint32(val))
1678+
binary.BigEndian.PutUint32(c[:], val)
16851679
for i := 4 - valueSize; i < 4; i++ {
16861680
e.PutByte(c[i])
16871681
}
16881682
}
16891683
}
16901684
}
16911685

1686+
func writeRoaringBitmapBlock64(e *encoding.Encbuf, vals []uint64, key uint64, thres int, bitmapSize int, valueSize int) {
1687+
var offset int // The starting offset of the bitmap of each block.
1688+
var idx1 uint64 // The offset in the bitmap in current block in bytes.
1689+
var idx2 uint64 // The offset in the current byte in the bitmap ([0,8)).
1690+
e.PutUvarint64(key)
1691+
if len(vals) > thres {
1692+
e.PutByte(byte(1))
1693+
offset = len(e.Get())
1694+
for i := 0; i < bitmapSize; i++ {
1695+
e.PutByte(byte(0))
1696+
}
1697+
for _, val := range vals {
1698+
idx1 = val >> 3
1699+
idx2 = val % 8
1700+
e.B[uint64(offset)+idx1] |= 1 << uint(7-idx2)
1701+
}
1702+
} else {
1703+
c := make([]byte, 8)
1704+
e.PutByte(byte(0))
1705+
for _, val := range vals {
1706+
binary.BigEndian.PutUint64(c[:], val)
1707+
for i := 8 - valueSize; i < 8; i++ {
1708+
e.PutByte(c[i])
1709+
}
1710+
}
1711+
}
1712+
}
1713+
16921714
func putBytes(e *encoding.Encbuf, val uint32, width int) {
16931715
for i := width - 1; i >= 0; i-- {
16941716
e.PutByte(byte((val >> (8 * uint(i)) & 0xff)))
@@ -1705,8 +1727,7 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17051727
var curVal uint32
17061728
var idx int // Index of current element in arr.
17071729
var startingOffs []uint32 // The starting offsets of each block.
1708-
var vals []int // The converted values in the current block.
1709-
c := make([]byte, 4)
1730+
var vals []uint32 // The converted values in the current block.
17101731
startOff := len(e.Get())
17111732
e.PutBE32(0) // Footer starting offset.
17121733
for idx < len(arr) {
@@ -1716,22 +1737,22 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17161737
// Move to next block.
17171738
if idx != 0 {
17181739
startingOffs = append(startingOffs, uint32(len(e.B)))
1719-
writeRoaringBitmapBlock(e, vals, c, key, thres, bitmapSize, valueSize)
1740+
writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize)
17201741
vals = vals[:0]
17211742
}
17221743
key = curKey
17231744
}
1724-
vals = append(vals, int(curVal))
1745+
vals = append(vals, curVal)
17251746
idx += 1
17261747
}
17271748
startingOffs = append(startingOffs, uint32(len(e.B)))
1728-
writeRoaringBitmapBlock(e, vals, c, key, thres, bitmapSize, valueSize)
1749+
writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize)
17291750

17301751
// Put footer starting offset.
17311752
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
17321753
width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff))
17331754
if width == 0 {
1734-
// key 0 will result in o width.
1755+
// key 0 will result in 0 width.
17351756
width += 1
17361757
}
17371758
// e.PutBE32(uint32(len(startingOffs))) // Number of blocks.
@@ -1749,3 +1770,48 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17491770
// e.PutBE32(off - 4 - uint32(startOff))
17501771
// }
17511772
}
1773+
1774+
func writeRoaringBitmapPostings64(e *encoding.Encbuf, arr []uint64) {
1775+
key := uint64(0xffffffffffffffff) // The initial key should be unique.
1776+
bitmapSize := 1 << (bitmapBits - 3) // Bitmap size in bytes.
1777+
valueSize := bitmapBits >> 3 // The size of the element in array in bytes.
1778+
thres := (1 << bitmapBits) / bitmapBits // Threshold of number of elements in the block for choosing encoding type.
1779+
mask := (uint64(1) << uint(bitmapBits)) - 1 // Mask for the elements in the block.
1780+
var curKey uint64
1781+
var curVal uint64
1782+
var idx int // Index of current element in arr.
1783+
var startingOffs []uint32 // The starting offsets of each block.
1784+
var vals []uint64 // The converted values in the current block.
1785+
startOff := len(e.Get())
1786+
e.PutBE32(0) // Footer starting offset.
1787+
for idx < len(arr) {
1788+
curKey = arr[idx] >> bitmapBits // Key of block.
1789+
curVal = arr[idx] & mask // Value inside block.
1790+
if curKey != key {
1791+
// Move to next block.
1792+
if idx != 0 {
1793+
startingOffs = append(startingOffs, uint32(len(e.B)))
1794+
writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize)
1795+
vals = vals[:0]
1796+
}
1797+
key = curKey
1798+
}
1799+
vals = append(vals, curVal)
1800+
idx += 1
1801+
}
1802+
startingOffs = append(startingOffs, uint32(len(e.B)))
1803+
writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize)
1804+
1805+
// Put footer starting offset.
1806+
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
1807+
width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff))
1808+
if width == 0 {
1809+
// key 0 will result in 0 width.
1810+
width += 1
1811+
}
1812+
1813+
e.PutByte(byte((width + 7) / 8))
1814+
for _, off := range startingOffs {
1815+
putBytes(e, off-4-uint32(startOff), (width+7)/8)
1816+
}
1817+
}

0 commit comments

Comments
 (0)