Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit c7505ed

Browse files
committed
add 64bit support for roaringBitmapPostings
Signed-off-by: naivewong <[email protected]>
1 parent 430064a commit c7505ed

File tree

2 files changed

+195
-29
lines changed

2 files changed

+195
-29
lines changed

index/postings.go

Lines changed: 120 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -792,16 +792,34 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
792792
num := it.size - it.idx/it.width
793793
// Do binary search between current position and end.
794794
x -= it.base
795+
idx1 := it.idx-it.prel
795796
i := sort.Search(num, func(i int) bool {
796-
return binary.BigEndian.Uint64(it.bs[it.idx+i*it.width-it.prel:])&it.mask >= x
797+
idx2 := idx1 + i*it.width
798+
if idx2 >= 0 {
799+
return binary.BigEndian.Uint64(it.bs[idx2:])&it.mask >= x
800+
} else {
801+
temp := uint64(0)
802+
for i := idx2 + it.prel; i < idx2 + it.prel + 8; i++ {
803+
temp = (temp << 8) | uint64(it.bs[i])
804+
}
805+
return temp >= x
806+
}
797807
})
798808
if i < num {
799809
it.idx += i * it.width
800-
it.cur = it.base + (binary.BigEndian.Uint64(it.bs[it.idx-it.prel:])&it.mask)
810+
idx1 = it.idx - it.prel
811+
if idx1 >= 0 {
812+
it.cur = it.base + (binary.BigEndian.Uint64(it.bs[idx1:])&it.mask)
813+
} else {
814+
it.cur = 0
815+
for j := it.idx; j < it.idx + it.width; j++ {
816+
it.cur = (it.cur << 8) | uint64(it.bs[j])
817+
}
818+
it.cur += it.base
819+
}
801820
it.idx += it.width
802821
return true
803822
}
804-
it.idx += i*it.width
805823
return false
806824
}
807825

@@ -1363,15 +1381,15 @@ func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
13631381
}
13641382

13651383
var rbpMasks []byte
1366-
var rbpValueMask uint32
1384+
var rbpValueMask uint64
13671385
var rbpValueSize int
13681386
var rbpBitmapSize int
13691387

13701388
func init() {
13711389
for i := 7; i >= 0; i-- {
13721390
rbpMasks = append(rbpMasks, byte(1<<uint(i)))
13731391
}
1374-
rbpValueMask = uint32((1 << uint(bitmapBits)) - 1)
1392+
rbpValueMask = (uint64(1) << uint(bitmapBits)) - 1
13751393
rbpBitmapSize = 1 << (bitmapBits - 3)
13761394
rbpValueSize = bitmapBits >> 3
13771395
}
@@ -1392,7 +1410,7 @@ type roaringBitmapPostings struct {
13921410
idx1 int // The offset in the bitmap in current block in bytes.
13931411
idx2 int // The offset in the current byte in the bitmap ([0,8)).
13941412
footerAddr int
1395-
key uint32
1413+
key uint64
13961414
numBlock int
13971415
blockIdx int
13981416
blockType byte
@@ -1419,7 +1437,7 @@ func (it *roaringBitmapPostings) Next() bool {
14191437
if it.inside { // Already entered the block.
14201438
if it.blockType == 0 { // Type array.
14211439
if it.idx < it.nextBlock {
1422-
it.cur = uint64(it.key) | uint64(it.bs[it.idx])
1440+
it.cur = it.key | uint64(it.bs[it.idx])
14231441
it.idx += 1
14241442
return true
14251443
}
@@ -1429,7 +1447,7 @@ func (it *roaringBitmapPostings) Next() bool {
14291447
}
14301448
for it.idx1 < rbpBitmapSize {
14311449
if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 {
1432-
it.cur = uint64(it.key) | uint64((it.idx1<<3)+it.idx2)
1450+
it.cur = it.key | uint64((it.idx1<<3)+it.idx2)
14331451
it.idx2 += 1
14341452
if it.idx2 == 8 {
14351453
it.idx1 += 1
@@ -1454,7 +1472,7 @@ func (it *roaringBitmapPostings) Next() bool {
14541472
} else { // Not yet entered the block.
14551473
if it.idx < it.footerAddr {
14561474
val, size := binary.Uvarint(it.bs[it.idx:])
1457-
it.key = uint32(val) << bitmapBits
1475+
it.key = val << bitmapBits
14581476
it.idx += size
14591477
it.blockType = it.bs[it.idx]
14601478
it.idx += 1
@@ -1478,7 +1496,7 @@ func (it *roaringBitmapPostings) Next() bool {
14781496
}
14791497

14801498
func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
1481-
curVal := byte(uint32(x) & rbpValueMask)
1499+
curVal := byte(x & rbpValueMask)
14821500
if it.blockType == 0 {
14831501
// If encoding with array, binary search.
14841502
num := (it.nextBlock - it.idx)
@@ -1492,15 +1510,15 @@ func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
14921510
return it.Next()
14931511
}
14941512

1495-
it.cur = uint64(it.key) | uint64(it.bs[it.idx+j])
1513+
it.cur = it.key | uint64(it.bs[it.idx+j])
14961514
it.idx += j + 1
14971515
return true
14981516
} else {
14991517
// If encoding with bitmap, go to the exact location of value of x.
15001518
it.idx1 = int(curVal >> 3)
15011519
it.idx2 = int(curVal % 8)
15021520
if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 { // Found x.
1503-
it.cur = uint64(it.key) | uint64(it.idx1*8+it.idx2)
1521+
it.cur = it.key | uint64(it.idx1*8+it.idx2)
15041522
it.idx2 += 1
15051523
if it.idx2 == 8 {
15061524
it.idx1 += 1
@@ -1522,7 +1540,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15221540
if it.cur >= x {
15231541
return true
15241542
}
1525-
curKey := uint32(x) >> bitmapBits
1543+
curKey := x >> bitmapBits
15261544
if it.inside && it.key>>bitmapBits == curKey {
15271545
// Fast path.
15281546
return it.seekInBlock(x)
@@ -1533,7 +1551,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15331551
// off := it.readBytes(it.footerAddr+1+(it.blockIdx+i)*it.width)
15341552
off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-4+it.width:]) & it.addrMask)
15351553
k, _ := binary.Uvarint(it.bs[off:])
1536-
return uint32(k) >= curKey
1554+
return k >= curKey
15371555
// return binary.BigEndian.Uint32(it.bs[off:]) > curKey
15381556
})
15391557
if i == it.numBlock-it.blockIdx {
@@ -1553,7 +1571,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15531571

15541572
val, size := binary.Uvarint(it.bs[it.idx:])
15551573
// If the key of current block doesn't match, directly go to the next block.
1556-
if uint32(val) != curKey {
1574+
if val != curKey {
15571575
if it.blockIdx == it.numBlock-1 {
15581576
it.idx = it.footerAddr
15591577
return false
@@ -1564,7 +1582,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15641582
it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+it.blockIdx*it.width-4+it.width:]) & it.addrMask)
15651583
// it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+it.blockIdx*4:]))
15661584
val, size := binary.Uvarint(it.bs[it.idx:])
1567-
it.key = uint32(val) << bitmapBits
1585+
it.key = val << bitmapBits
15681586
it.idx += size
15691587
it.blockType = it.bs[it.idx]
15701588
it.idx += 1
@@ -1582,7 +1600,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15821600
return it.Next()
15831601
}
15841602
}
1585-
it.key = uint32(val) << bitmapBits
1603+
it.key = val << bitmapBits
15861604
it.idx += size
15871605
it.blockType = it.bs[it.idx]
15881606
it.idx += 1
@@ -1662,10 +1680,10 @@ func (it *roaringBitmapPostings) readBits(offset int) uint64 {
16621680
return u
16631681
}
16641682

1665-
func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint32, thres int, bitmapSize int, valueSize int) {
1666-
var offset int // The starting offset of the bitmap of each block.
1667-
var idx1 int // The offset in the bitmap in current block in bytes.
1668-
var idx2 int // The offset in the current byte in the bitmap ([0,8)).
1683+
func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []uint32, key uint32, thres int, bitmapSize int, valueSize int) {
1684+
var offset int // The starting offset of the bitmap of each block.
1685+
var idx1 uint32 // The offset in the bitmap in current block in bytes.
1686+
var idx2 uint32 // The offset in the current byte in the bitmap ([0,8)).
16691687
e.PutUvarint32(key)
16701688
if len(vals) > thres {
16711689
e.PutByte(byte(1))
@@ -1676,19 +1694,48 @@ func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint3
16761694
for _, val := range vals {
16771695
idx1 = val >> 3
16781696
idx2 = val % 8
1679-
e.B[offset+idx1] |= 1 << uint(7-idx2)
1697+
e.B[uint32(offset)+idx1] |= 1 << uint(7-idx2)
16801698
}
16811699
} else {
1700+
c := make([]byte, 4)
16821701
e.PutByte(byte(0))
16831702
for _, val := range vals {
1684-
binary.BigEndian.PutUint32(c[:], uint32(val))
1703+
binary.BigEndian.PutUint32(c[:], val)
16851704
for i := 4 - valueSize; i < 4; i++ {
16861705
e.PutByte(c[i])
16871706
}
16881707
}
16891708
}
16901709
}
16911710

1711+
func writeRoaringBitmapBlock64(e *encoding.Encbuf, vals []uint64, key uint64, thres int, bitmapSize int, valueSize int) {
1712+
var offset int // The starting offset of the bitmap of each block.
1713+
var idx1 uint64 // The offset in the bitmap in current block in bytes.
1714+
var idx2 uint64 // The offset in the current byte in the bitmap ([0,8)).
1715+
e.PutUvarint64(key)
1716+
if len(vals) > thres {
1717+
e.PutByte(byte(1))
1718+
offset = len(e.Get())
1719+
for i := 0; i < bitmapSize; i++ {
1720+
e.PutByte(byte(0))
1721+
}
1722+
for _, val := range vals {
1723+
idx1 = val >> 3
1724+
idx2 = val % 8
1725+
e.B[uint64(offset)+idx1] |= 1 << uint(7-idx2)
1726+
}
1727+
} else {
1728+
c := make([]byte, 8)
1729+
e.PutByte(byte(0))
1730+
for _, val := range vals {
1731+
binary.BigEndian.PutUint64(c[:], val)
1732+
for i := 8 - valueSize; i < 8; i++ {
1733+
e.PutByte(c[i])
1734+
}
1735+
}
1736+
}
1737+
}
1738+
16921739
func putBytes(e *encoding.Encbuf, val uint32, width int) {
16931740
for i := width - 1; i >= 0; i-- {
16941741
e.PutByte(byte((val >> (8 * uint(i)) & 0xff)))
@@ -1705,8 +1752,7 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17051752
var curVal uint32
17061753
var idx int // Index of current element in arr.
17071754
var startingOffs []uint32 // The starting offsets of each block.
1708-
var vals []int // The converted values in the current block.
1709-
c := make([]byte, 4)
1755+
var vals []uint32 // The converted values in the current block.
17101756
startOff := len(e.Get())
17111757
e.PutBE32(0) // Footer starting offset.
17121758
for idx < len(arr) {
@@ -1716,22 +1762,22 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17161762
// Move to next block.
17171763
if idx != 0 {
17181764
startingOffs = append(startingOffs, uint32(len(e.B)))
1719-
writeRoaringBitmapBlock(e, vals, c, key, thres, bitmapSize, valueSize)
1765+
writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize)
17201766
vals = vals[:0]
17211767
}
17221768
key = curKey
17231769
}
1724-
vals = append(vals, int(curVal))
1770+
vals = append(vals, curVal)
17251771
idx += 1
17261772
}
17271773
startingOffs = append(startingOffs, uint32(len(e.B)))
1728-
writeRoaringBitmapBlock(e, vals, c, key, thres, bitmapSize, valueSize)
1774+
writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize)
17291775

17301776
// Put footer starting offset.
17311777
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
17321778
width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff))
17331779
if width == 0 {
1734-
// key 0 will result in o width.
1780+
// key 0 will result in 0 width.
17351781
width += 1
17361782
}
17371783
// e.PutBE32(uint32(len(startingOffs))) // Number of blocks.
@@ -1749,3 +1795,48 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17491795
// e.PutBE32(off - 4 - uint32(startOff))
17501796
// }
17511797
}
1798+
1799+
func writeRoaringBitmapPostings64(e *encoding.Encbuf, arr []uint64) {
1800+
key := uint64(0xffffffffffffffff) // The initial key should be unique.
1801+
bitmapSize := 1 << (bitmapBits - 3) // Bitmap size in bytes.
1802+
valueSize := bitmapBits >> 3 // The size of the element in array in bytes.
1803+
thres := (1 << bitmapBits) / bitmapBits // Threshold of number of elements in the block for choosing encoding type.
1804+
mask := (uint64(1) << uint(bitmapBits)) - 1 // Mask for the elements in the block.
1805+
var curKey uint64
1806+
var curVal uint64
1807+
var idx int // Index of current element in arr.
1808+
var startingOffs []uint32 // The starting offsets of each block.
1809+
var vals []uint64 // The converted values in the current block.
1810+
startOff := len(e.Get())
1811+
e.PutBE32(0) // Footer starting offset.
1812+
for idx < len(arr) {
1813+
curKey = arr[idx] >> bitmapBits // Key of block.
1814+
curVal = arr[idx] & mask // Value inside block.
1815+
if curKey != key {
1816+
// Move to next block.
1817+
if idx != 0 {
1818+
startingOffs = append(startingOffs, uint32(len(e.B)))
1819+
writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize)
1820+
vals = vals[:0]
1821+
}
1822+
key = curKey
1823+
}
1824+
vals = append(vals, curVal)
1825+
idx += 1
1826+
}
1827+
startingOffs = append(startingOffs, uint32(len(e.B)))
1828+
writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize)
1829+
1830+
// Put footer starting offset.
1831+
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
1832+
width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff))
1833+
if width == 0 {
1834+
// key 0 will result in 0 width.
1835+
width += 1
1836+
}
1837+
1838+
e.PutByte(byte((width + 7) / 8))
1839+
for _, off := range startingOffs {
1840+
putBytes(e, off-4-uint32(startOff), (width+7)/8)
1841+
}
1842+
}

index/postings_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,81 @@ func TestRoaringBitmapPostings(t *testing.T) {
10941094
})
10951095
}
10961096

1097+
func TestRoaringBitmapPostings64(t *testing.T) {
1098+
num := 1000
1099+
// mock a list as postings
1100+
ls := make([]uint64, num)
1101+
ls[0] = 2
1102+
for i := 1; i < num; i++ {
1103+
ls[i] = ls[i-1] + uint64(rand.Int63n(15)) + 2
1104+
// ls[i] = ls[i-1] + 10
1105+
}
1106+
1107+
buf := encoding.Encbuf{}
1108+
writeRoaringBitmapPostings64(&buf, ls)
1109+
// t.Log("len", len(buf.Get()))
1110+
1111+
t.Run("Iteration", func(t *testing.T) {
1112+
rbp := newRoaringBitmapPostings(buf.Get())
1113+
for i := 0; i < num; i++ {
1114+
testutil.Assert(t, rbp.Next() == true, "")
1115+
// t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At())
1116+
testutil.Equals(t, ls[i], rbp.At())
1117+
}
1118+
1119+
testutil.Assert(t, rbp.Next() == false, "")
1120+
testutil.Assert(t, rbp.Err() == nil, "")
1121+
})
1122+
1123+
t.Run("Seek", func(t *testing.T) {
1124+
table := []struct {
1125+
seek uint64
1126+
val uint64
1127+
found bool
1128+
}{
1129+
{
1130+
ls[0] - 1, ls[0], true,
1131+
},
1132+
{
1133+
ls[4], ls[4], true,
1134+
},
1135+
{
1136+
ls[500] - 1, ls[500], true,
1137+
},
1138+
{
1139+
ls[600] + 1, ls[601], true,
1140+
},
1141+
{
1142+
ls[600] + 1, ls[601], true,
1143+
},
1144+
{
1145+
ls[600] + 1, ls[601], true,
1146+
},
1147+
{
1148+
ls[0], ls[601], true,
1149+
},
1150+
{
1151+
ls[600], ls[601], true,
1152+
},
1153+
{
1154+
ls[999], ls[999], true,
1155+
},
1156+
{
1157+
ls[999] + 10, ls[999], false,
1158+
},
1159+
}
1160+
1161+
rbp := newRoaringBitmapPostings(buf.Get())
1162+
1163+
for _, v := range table {
1164+
// t.Log("i", i)
1165+
testutil.Equals(t, v.found, rbp.Seek(v.seek))
1166+
testutil.Equals(t, v.val, rbp.At())
1167+
testutil.Assert(t, rbp.Err() == nil, "")
1168+
}
1169+
})
1170+
}
1171+
10971172
func BenchmarkPostings(b *testing.B) {
10981173
num := 100000
10991174
// mock a list as postings

0 commit comments

Comments
 (0)