Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit c4b6f0f

Browse files
committed
add 64bit support for roaringBitmapPostings
Signed-off-by: naivewong <[email protected]>
1 parent 430064a commit c4b6f0f

File tree

2 files changed

+185
-27
lines changed

2 files changed

+185
-27
lines changed

index/postings.go

Lines changed: 110 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -792,8 +792,18 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
792792
num := it.size - it.idx/it.width
793793
// Do binary search between current position and end.
794794
x -= it.base
795+
idx1 := it.idx-it.prel
795796
i := sort.Search(num, func(i int) bool {
796-
return binary.BigEndian.Uint64(it.bs[it.idx+i*it.width-it.prel:])&it.mask >= x
797+
idx2 := idx1 + i*it.width
798+
if idx2 >= 0 {
799+
return binary.BigEndian.Uint64(it.bs[idx2:])&it.mask >= x
800+
} else {
801+
temp := uint64(0)
802+
for i := idx2 + it.prel; i < idx2 + it.prel + 8; i++ {
803+
temp = (temp << 8) | uint64(it.bs[i])
804+
}
805+
return temp >= x
806+
}
797807
})
798808
if i < num {
799809
it.idx += i * it.width
@@ -1363,15 +1373,15 @@ func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
13631373
}
13641374

13651375
var rbpMasks []byte
1366-
var rbpValueMask uint32
1376+
var rbpValueMask uint64
13671377
var rbpValueSize int
13681378
var rbpBitmapSize int
13691379

13701380
func init() {
13711381
for i := 7; i >= 0; i-- {
13721382
rbpMasks = append(rbpMasks, byte(1<<uint(i)))
13731383
}
1374-
rbpValueMask = uint32((1 << uint(bitmapBits)) - 1)
1384+
rbpValueMask = (uint64(1) << uint(bitmapBits)) - 1
13751385
rbpBitmapSize = 1 << (bitmapBits - 3)
13761386
rbpValueSize = bitmapBits >> 3
13771387
}
@@ -1392,7 +1402,7 @@ type roaringBitmapPostings struct {
13921402
idx1 int // The offset in the bitmap in current block in bytes.
13931403
idx2 int // The offset in the current byte in the bitmap ([0,8)).
13941404
footerAddr int
1395-
key uint32
1405+
key uint64
13961406
numBlock int
13971407
blockIdx int
13981408
blockType byte
@@ -1419,7 +1429,7 @@ func (it *roaringBitmapPostings) Next() bool {
14191429
if it.inside { // Already entered the block.
14201430
if it.blockType == 0 { // Type array.
14211431
if it.idx < it.nextBlock {
1422-
it.cur = uint64(it.key) | uint64(it.bs[it.idx])
1432+
it.cur = it.key | uint64(it.bs[it.idx])
14231433
it.idx += 1
14241434
return true
14251435
}
@@ -1429,7 +1439,7 @@ func (it *roaringBitmapPostings) Next() bool {
14291439
}
14301440
for it.idx1 < rbpBitmapSize {
14311441
if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 {
1432-
it.cur = uint64(it.key) | uint64((it.idx1<<3)+it.idx2)
1442+
it.cur = it.key | uint64((it.idx1<<3)+it.idx2)
14331443
it.idx2 += 1
14341444
if it.idx2 == 8 {
14351445
it.idx1 += 1
@@ -1454,7 +1464,7 @@ func (it *roaringBitmapPostings) Next() bool {
14541464
} else { // Not yet entered the block.
14551465
if it.idx < it.footerAddr {
14561466
val, size := binary.Uvarint(it.bs[it.idx:])
1457-
it.key = uint32(val) << bitmapBits
1467+
it.key = val << bitmapBits
14581468
it.idx += size
14591469
it.blockType = it.bs[it.idx]
14601470
it.idx += 1
@@ -1478,7 +1488,7 @@ func (it *roaringBitmapPostings) Next() bool {
14781488
}
14791489

14801490
func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
1481-
curVal := byte(uint32(x) & rbpValueMask)
1491+
curVal := byte(x & rbpValueMask)
14821492
if it.blockType == 0 {
14831493
// If encoding with array, binary search.
14841494
num := (it.nextBlock - it.idx)
@@ -1492,15 +1502,15 @@ func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
14921502
return it.Next()
14931503
}
14941504

1495-
it.cur = uint64(it.key) | uint64(it.bs[it.idx+j])
1505+
it.cur = it.key | uint64(it.bs[it.idx+j])
14961506
it.idx += j + 1
14971507
return true
14981508
} else {
14991509
// If encoding with bitmap, go to the exact location of value of x.
15001510
it.idx1 = int(curVal >> 3)
15011511
it.idx2 = int(curVal % 8)
15021512
if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 { // Found x.
1503-
it.cur = uint64(it.key) | uint64(it.idx1*8+it.idx2)
1513+
it.cur = it.key | uint64(it.idx1*8+it.idx2)
15041514
it.idx2 += 1
15051515
if it.idx2 == 8 {
15061516
it.idx1 += 1
@@ -1522,7 +1532,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15221532
if it.cur >= x {
15231533
return true
15241534
}
1525-
curKey := uint32(x) >> bitmapBits
1535+
curKey := x >> bitmapBits
15261536
if it.inside && it.key>>bitmapBits == curKey {
15271537
// Fast path.
15281538
return it.seekInBlock(x)
@@ -1533,7 +1543,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15331543
// off := it.readBytes(it.footerAddr+1+(it.blockIdx+i)*it.width)
15341544
off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-4+it.width:]) & it.addrMask)
15351545
k, _ := binary.Uvarint(it.bs[off:])
1536-
return uint32(k) >= curKey
1546+
return k >= curKey
15371547
// return binary.BigEndian.Uint32(it.bs[off:]) > curKey
15381548
})
15391549
if i == it.numBlock-it.blockIdx {
@@ -1553,7 +1563,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15531563

15541564
val, size := binary.Uvarint(it.bs[it.idx:])
15551565
// If the key of current block doesn't match, directly go to the next block.
1556-
if uint32(val) != curKey {
1566+
if val != curKey {
15571567
if it.blockIdx == it.numBlock-1 {
15581568
it.idx = it.footerAddr
15591569
return false
@@ -1564,7 +1574,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15641574
it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+it.blockIdx*it.width-4+it.width:]) & it.addrMask)
15651575
// it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+it.blockIdx*4:]))
15661576
val, size := binary.Uvarint(it.bs[it.idx:])
1567-
it.key = uint32(val) << bitmapBits
1577+
it.key = val << bitmapBits
15681578
it.idx += size
15691579
it.blockType = it.bs[it.idx]
15701580
it.idx += 1
@@ -1582,7 +1592,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15821592
return it.Next()
15831593
}
15841594
}
1585-
it.key = uint32(val) << bitmapBits
1595+
it.key = val << bitmapBits
15861596
it.idx += size
15871597
it.blockType = it.bs[it.idx]
15881598
it.idx += 1
@@ -1662,10 +1672,10 @@ func (it *roaringBitmapPostings) readBits(offset int) uint64 {
16621672
return u
16631673
}
16641674

1665-
func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint32, thres int, bitmapSize int, valueSize int) {
1666-
var offset int // The starting offset of the bitmap of each block.
1667-
var idx1 int // The offset in the bitmap in current block in bytes.
1668-
var idx2 int // The offset in the current byte in the bitmap ([0,8)).
1675+
func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []uint32, key uint32, thres int, bitmapSize int, valueSize int) {
1676+
var offset int // The starting offset of the bitmap of each block.
1677+
var idx1 uint32 // The offset in the bitmap in current block in bytes.
1678+
var idx2 uint32 // The offset in the current byte in the bitmap ([0,8)).
16691679
e.PutUvarint32(key)
16701680
if len(vals) > thres {
16711681
e.PutByte(byte(1))
@@ -1676,19 +1686,48 @@ func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint3
16761686
for _, val := range vals {
16771687
idx1 = val >> 3
16781688
idx2 = val % 8
1679-
e.B[offset+idx1] |= 1 << uint(7-idx2)
1689+
e.B[uint32(offset)+idx1] |= 1 << uint(7-idx2)
16801690
}
16811691
} else {
1692+
c := make([]byte, 4)
16821693
e.PutByte(byte(0))
16831694
for _, val := range vals {
1684-
binary.BigEndian.PutUint32(c[:], uint32(val))
1695+
binary.BigEndian.PutUint32(c[:], val)
16851696
for i := 4 - valueSize; i < 4; i++ {
16861697
e.PutByte(c[i])
16871698
}
16881699
}
16891700
}
16901701
}
16911702

1703+
func writeRoaringBitmapBlock64(e *encoding.Encbuf, vals []uint64, key uint64, thres int, bitmapSize int, valueSize int) {
1704+
var offset int // The starting offset of the bitmap of each block.
1705+
var idx1 uint64 // The offset in the bitmap in current block in bytes.
1706+
var idx2 uint64 // The offset in the current byte in the bitmap ([0,8)).
1707+
e.PutUvarint64(key)
1708+
if len(vals) > thres {
1709+
e.PutByte(byte(1))
1710+
offset = len(e.Get())
1711+
for i := 0; i < bitmapSize; i++ {
1712+
e.PutByte(byte(0))
1713+
}
1714+
for _, val := range vals {
1715+
idx1 = val >> 3
1716+
idx2 = val % 8
1717+
e.B[uint64(offset)+idx1] |= 1 << uint(7-idx2)
1718+
}
1719+
} else {
1720+
c := make([]byte, 8)
1721+
e.PutByte(byte(0))
1722+
for _, val := range vals {
1723+
binary.BigEndian.PutUint64(c[:], val)
1724+
for i := 8 - valueSize; i < 8; i++ {
1725+
e.PutByte(c[i])
1726+
}
1727+
}
1728+
}
1729+
}
1730+
16921731
func putBytes(e *encoding.Encbuf, val uint32, width int) {
16931732
for i := width - 1; i >= 0; i-- {
16941733
e.PutByte(byte((val >> (8 * uint(i)) & 0xff)))
@@ -1705,8 +1744,7 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17051744
var curVal uint32
17061745
var idx int // Index of current element in arr.
17071746
var startingOffs []uint32 // The starting offsets of each block.
1708-
var vals []int // The converted values in the current block.
1709-
c := make([]byte, 4)
1747+
var vals []uint32 // The converted values in the current block.
17101748
startOff := len(e.Get())
17111749
e.PutBE32(0) // Footer starting offset.
17121750
for idx < len(arr) {
@@ -1716,22 +1754,22 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17161754
// Move to next block.
17171755
if idx != 0 {
17181756
startingOffs = append(startingOffs, uint32(len(e.B)))
1719-
writeRoaringBitmapBlock(e, vals, c, key, thres, bitmapSize, valueSize)
1757+
writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize)
17201758
vals = vals[:0]
17211759
}
17221760
key = curKey
17231761
}
1724-
vals = append(vals, int(curVal))
1762+
vals = append(vals, curVal)
17251763
idx += 1
17261764
}
17271765
startingOffs = append(startingOffs, uint32(len(e.B)))
1728-
writeRoaringBitmapBlock(e, vals, c, key, thres, bitmapSize, valueSize)
1766+
writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize)
17291767

17301768
// Put footer starting offset.
17311769
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
17321770
width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff))
17331771
if width == 0 {
1734-
// key 0 will result in o width.
1772+
// key 0 will result in 0 width.
17351773
width += 1
17361774
}
17371775
// e.PutBE32(uint32(len(startingOffs))) // Number of blocks.
@@ -1749,3 +1787,48 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17491787
// e.PutBE32(off - 4 - uint32(startOff))
17501788
// }
17511789
}
1790+
1791+
func writeRoaringBitmapPostings64(e *encoding.Encbuf, arr []uint64) {
1792+
key := uint64(0xffffffffffffffff) // The initial key should be unique.
1793+
bitmapSize := 1 << (bitmapBits - 3) // Bitmap size in bytes.
1794+
valueSize := bitmapBits >> 3 // The size of the element in array in bytes.
1795+
thres := (1 << bitmapBits) / bitmapBits // Threshold of number of elements in the block for choosing encoding type.
1796+
mask := (uint64(1) << uint(bitmapBits)) - 1 // Mask for the elements in the block.
1797+
var curKey uint64
1798+
var curVal uint64
1799+
var idx int // Index of current element in arr.
1800+
var startingOffs []uint32 // The starting offsets of each block.
1801+
var vals []uint64 // The converted values in the current block.
1802+
startOff := len(e.Get())
1803+
e.PutBE32(0) // Footer starting offset.
1804+
for idx < len(arr) {
1805+
curKey = arr[idx] >> bitmapBits // Key of block.
1806+
curVal = arr[idx] & mask // Value inside block.
1807+
if curKey != key {
1808+
// Move to next block.
1809+
if idx != 0 {
1810+
startingOffs = append(startingOffs, uint32(len(e.B)))
1811+
writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize)
1812+
vals = vals[:0]
1813+
}
1814+
key = curKey
1815+
}
1816+
vals = append(vals, curVal)
1817+
idx += 1
1818+
}
1819+
startingOffs = append(startingOffs, uint32(len(e.B)))
1820+
writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize)
1821+
1822+
// Put footer starting offset.
1823+
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
1824+
width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff))
1825+
if width == 0 {
1826+
// key 0 will result in 0 width.
1827+
width += 1
1828+
}
1829+
1830+
e.PutByte(byte((width + 7) / 8))
1831+
for _, off := range startingOffs {
1832+
putBytes(e, off-4-uint32(startOff), (width+7)/8)
1833+
}
1834+
}

index/postings_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,81 @@ func TestRoaringBitmapPostings(t *testing.T) {
10941094
})
10951095
}
10961096

1097+
func TestRoaringBitmapPostings64(t *testing.T) {
1098+
num := 1000
1099+
// mock a list as postings
1100+
ls := make([]uint64, num)
1101+
ls[0] = 2
1102+
for i := 1; i < num; i++ {
1103+
ls[i] = ls[i-1] + uint64(rand.Int63n(15)) + 2
1104+
// ls[i] = ls[i-1] + 10
1105+
}
1106+
1107+
buf := encoding.Encbuf{}
1108+
writeRoaringBitmapPostings64(&buf, ls)
1109+
// t.Log("len", len(buf.Get()))
1110+
1111+
t.Run("Iteration", func(t *testing.T) {
1112+
rbp := newRoaringBitmapPostings(buf.Get())
1113+
for i := 0; i < num; i++ {
1114+
testutil.Assert(t, rbp.Next() == true, "")
1115+
// t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At())
1116+
testutil.Equals(t, ls[i], rbp.At())
1117+
}
1118+
1119+
testutil.Assert(t, rbp.Next() == false, "")
1120+
testutil.Assert(t, rbp.Err() == nil, "")
1121+
})
1122+
1123+
t.Run("Seek", func(t *testing.T) {
1124+
table := []struct {
1125+
seek uint64
1126+
val uint64
1127+
found bool
1128+
}{
1129+
{
1130+
ls[0] - 1, ls[0], true,
1131+
},
1132+
{
1133+
ls[4], ls[4], true,
1134+
},
1135+
{
1136+
ls[500] - 1, ls[500], true,
1137+
},
1138+
{
1139+
ls[600] + 1, ls[601], true,
1140+
},
1141+
{
1142+
ls[600] + 1, ls[601], true,
1143+
},
1144+
{
1145+
ls[600] + 1, ls[601], true,
1146+
},
1147+
{
1148+
ls[0], ls[601], true,
1149+
},
1150+
{
1151+
ls[600], ls[601], true,
1152+
},
1153+
{
1154+
ls[999], ls[999], true,
1155+
},
1156+
{
1157+
ls[999] + 10, ls[999], false,
1158+
},
1159+
}
1160+
1161+
rbp := newRoaringBitmapPostings(buf.Get())
1162+
1163+
for _, v := range table {
1164+
// t.Log("i", i)
1165+
testutil.Equals(t, v.found, rbp.Seek(v.seek))
1166+
testutil.Equals(t, v.val, rbp.At())
1167+
testutil.Assert(t, rbp.Err() == nil, "")
1168+
}
1169+
})
1170+
}
1171+
10971172
func BenchmarkPostings(b *testing.B) {
10981173
num := 100000
10991174
// mock a list as postings

0 commit comments

Comments
 (0)