Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit e11da7c

Browse files
committed
add roaringBitmapPostings
Signed-off-by: naivewong <[email protected]>
1 parent 7cfcf3d commit e11da7c

File tree

3 files changed

+373
-5
lines changed

3 files changed

+373
-5
lines changed

index/index.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package index
1515

1616
import (
17+
"fmt"
1718
"bufio"
1819
"encoding/binary"
1920
"hash"
@@ -543,7 +544,22 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
543544
writeBaseDeltaBlockPostings(&w.buf2, refs)
544545
case 5:
545546
writeBitmapPostings(&w.buf2, refs)
546-
}
547+
case 6:
548+
writeRoaringBitmapPostings(&w.buf2, refs)
549+
}
550+
551+
// if name == "albxj" && value == "e" {
552+
// fmt.Println("---------")
553+
// rbp := newRoaringBitmapPostings(w.buf2.Get()[4:])
554+
// var temp []uint64
555+
// for rbp.Next() {
556+
// temp = append(temp, rbp.At())
557+
// }
558+
// fmt.Println("len", len(w.buf2.Get()))
559+
// fmt.Println("refs", refs)
560+
// fmt.Println("temp", temp)
561+
// fmt.Println("---------")
562+
// }
547563

548564
w.uint32s = refs
549565

@@ -916,10 +932,12 @@ func (r *Reader) Series(id uint64, lbls *labels.Labels, chks *[]chunks.Meta) err
916932
func (r *Reader) Postings(name, value string) (Postings, error) {
917933
e, ok := r.postings[name]
918934
if !ok {
935+
fmt.Println("cannot get name")
919936
return EmptyPostings(), nil
920937
}
921938
off, ok := e[value]
922939
if !ok {
940+
fmt.Println("cannot get value")
923941
return EmptyPostings(), nil
924942
}
925943
d := encoding.NewDecbufAt(r.b, int(off), castagnoliTable)
@@ -1066,6 +1084,10 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10661084
case 5:
10671085
l := d.Get()
10681086
return n, newBitmapPostings(l), d.Err()
1087+
case 6:
1088+
l := d.Get()
1089+
// fmt.Println("newRoaringBitmapPostings")
1090+
return n, newRoaringBitmapPostings(l), d.Err()
10691091
default:
10701092
return n, EmptyPostings(), d.Err()
10711093
}

index/postings.go

Lines changed: 238 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,8 @@ func (it *bigEndianPostings) Err() error {
692692
return nil
693693
}
694694

695-
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings.
696-
const postingsType = 5
695+
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings, 6 is roaringBitmapPostings.
696+
const postingsType = 6
697697

698698
type bitSlice struct {
699699
bstream []byte
@@ -1245,3 +1245,239 @@ func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
12451245
e.B[offset+idx1] |= 1 << uint(7-idx2)
12461246
}
12471247
}
1248+
1249+
// roaringBitmap block format, type 0 = array, type 1 = bitmap.
1250+
// ┌──────────┬──────────┬────────┐
1251+
// │ key <4b> │ type<1b> │ bitmap │
1252+
// └──────────┴──────────┴────────┘
1253+
type roaringBitmapPostings struct {
1254+
bs []byte
1255+
cur uint64
1256+
inside bool
1257+
idx int
1258+
idx1 int
1259+
idx2 int
1260+
footerAddr int
1261+
bitmapSize int
1262+
valueSize int
1263+
key uint32
1264+
numBlock int
1265+
blockIdx int
1266+
blockType byte
1267+
nextBlock int
1268+
}
1269+
1270+
func newRoaringBitmapPostings(bstream []byte) *roaringBitmapPostings {
1271+
if len(bstream) <= 4 {
1272+
return nil
1273+
}
1274+
x := binary.BigEndian.Uint32(bstream)
1275+
return &roaringBitmapPostings{bs: bstream[4:], bitmapSize: 1 << (bitmapBits - 3), valueSize: bitmapBits >> 3, numBlock: (len(bstream) - int(x)) / 4 - 1, footerAddr: int(x)}
1276+
}
1277+
1278+
func (it *roaringBitmapPostings) At() uint64 {
1279+
return it.cur
1280+
}
1281+
1282+
func (it *roaringBitmapPostings) Next() bool {
1283+
if it.inside {
1284+
if it.blockType == 0 {
1285+
if it.idx < it.nextBlock {
1286+
it.cur = 0
1287+
for i := 0; i < it.valueSize; i++ {
1288+
it.cur = (it.cur<<8) + uint64(it.bs[it.idx+i])
1289+
}
1290+
it.idx += it.valueSize
1291+
it.cur += uint64(it.key)
1292+
return true
1293+
}
1294+
} else {
1295+
for it.idx1 < it.bitmapSize {
1296+
if it.bs[it.idx+it.idx1] == byte(0) {
1297+
it.idx1 += 1
1298+
continue
1299+
}
1300+
for it.idx1 < it.bitmapSize {
1301+
if it.bs[it.idx+it.idx1]&(1<<uint(7-it.idx2)) != byte(0) {
1302+
it.cur = uint64(it.key) + uint64(it.idx1*8+it.idx2)
1303+
it.idx2 += 1
1304+
if it.idx2 == 8 {
1305+
it.idx1 += 1
1306+
it.idx2 = 0
1307+
}
1308+
return true
1309+
} else {
1310+
it.idx2 += 1
1311+
if it.idx2 == 8 {
1312+
it.idx1 += 1
1313+
it.idx2 = 0
1314+
}
1315+
}
1316+
}
1317+
}
1318+
it.idx += it.bitmapSize
1319+
it.idx1 = 0
1320+
it.idx2 = 0
1321+
}
1322+
it.blockIdx += 1
1323+
it.inside = false
1324+
return it.Next()
1325+
} else {
1326+
if it.idx < it.footerAddr {
1327+
it.key = binary.BigEndian.Uint32(it.bs[it.idx:]) << bitmapBits
1328+
it.blockType = it.bs[it.idx+4]
1329+
it.idx += 5
1330+
it.inside = true
1331+
if it.blockIdx != it.numBlock - 1 {
1332+
it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+(it.blockIdx+1)*4:]))
1333+
} else {
1334+
it.nextBlock = it.footerAddr
1335+
}
1336+
return it.Next()
1337+
} else {
1338+
return false
1339+
}
1340+
}
1341+
}
1342+
1343+
func (it *roaringBitmapPostings) Seek(x uint64) bool {
1344+
if it.cur >= x {
1345+
return true
1346+
}
1347+
curKey := uint32(x) >> bitmapBits
1348+
i := sort.Search(it.numBlock-it.blockIdx, func(i int) bool {
1349+
off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+4*(it.blockIdx+i):]))
1350+
return binary.BigEndian.Uint32(it.bs[off:]) > curKey
1351+
})
1352+
if i > 0 {
1353+
i -= 1
1354+
if i > 0 {
1355+
it.idx1 = 0
1356+
it.idx2 = 0
1357+
it.inside = false
1358+
it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+4*(it.blockIdx+i):]))
1359+
}
1360+
}
1361+
it.blockIdx += i
1362+
if it.Next() {
1363+
if it.cur >= x {
1364+
return true
1365+
}
1366+
if it.blockType == 0 {
1367+
// If encoding with array, binary search.
1368+
num := (it.nextBlock - it.idx) / it.valueSize
1369+
j := sort.Search(num, func(i int) bool {
1370+
var temp uint64
1371+
for j := 0; j < it.valueSize; j++ {
1372+
temp = (temp<<8) + uint64(it.bs[it.idx+j+i*it.valueSize])
1373+
}
1374+
temp += uint64(it.key)
1375+
return temp >= x
1376+
})
1377+
it.cur = 0
1378+
for i := 0; i < it.valueSize; i++ {
1379+
it.cur = (it.cur<<8) + uint64(it.bs[it.idx+i+j*it.valueSize])
1380+
}
1381+
it.cur += uint64(it.key)
1382+
it.idx += (j + 1) * it.valueSize
1383+
if j == num {
1384+
// The first element in next block should be >= x.
1385+
return it.Next()
1386+
}
1387+
return true
1388+
} else {
1389+
// If encoding with bitmap, loop next.
1390+
for it.Next() {
1391+
if it.cur >= x {
1392+
return true
1393+
}
1394+
}
1395+
return false
1396+
}
1397+
} else {
1398+
return false
1399+
}
1400+
}
1401+
1402+
func (it *roaringBitmapPostings) Err() error {
1403+
return nil
1404+
}
1405+
1406+
func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
1407+
key := uint32(0xffffffff)
1408+
bitmapSize := 1 << (bitmapBits - 3)
1409+
valueSize := bitmapBits >> 3
1410+
thres := (1<<bitmapBits)/bitmapBits
1411+
mask := uint32((1 << uint(bitmapBits)) - 1)
1412+
var curKey uint32
1413+
var curVal uint32
1414+
var offset int // The starting offset of the bitmap of each block.
1415+
var idx int
1416+
var idx1 int
1417+
var idx2 int
1418+
var startingOffs []uint32
1419+
var vals []int
1420+
c := make([]byte, 4)
1421+
startOff := len(e.Get())
1422+
e.PutBE32(0) // Footer starting offset.
1423+
for idx < len(arr) {
1424+
curKey = arr[idx] >> bitmapBits
1425+
curVal = arr[idx] & mask
1426+
if curKey != key {
1427+
if idx != 0 {
1428+
startingOffs = append(startingOffs, uint32(len(e.B)))
1429+
e.PutBE32(uint32(key))
1430+
if len(vals) > thres {
1431+
e.PutByte(byte(1))
1432+
offset = len(e.Get())
1433+
for i := 0; i < bitmapSize; i++ {
1434+
e.PutByte(byte(0))
1435+
}
1436+
for _, val := range vals {
1437+
idx1 = val >> 3
1438+
idx2 = val % 8
1439+
e.B[offset+idx1] |= 1 << uint(7-idx2)
1440+
}
1441+
} else {
1442+
e.PutByte(byte(0))
1443+
for _, val := range vals {
1444+
binary.BigEndian.PutUint32(c[:], uint32(val))
1445+
for i := 4 - valueSize; i < 4; i++ {
1446+
e.PutByte(c[i])
1447+
}
1448+
}
1449+
}
1450+
vals = vals[:0]
1451+
}
1452+
key = curKey
1453+
}
1454+
vals = append(vals, int(curVal))
1455+
idx += 1
1456+
}
1457+
startingOffs = append(startingOffs, uint32(len(e.B)))
1458+
e.PutBE32(uint32(key))
1459+
if len(vals) > thres {
1460+
e.PutByte(byte(1))
1461+
offset = len(e.Get())
1462+
for i := 0; i < bitmapSize; i++ {
1463+
e.PutByte(byte(0))
1464+
}
1465+
for _, val := range vals {
1466+
idx1 = val >> 3
1467+
idx2 = val % 8
1468+
e.B[offset+idx1] |= 1 << uint(7-idx2)
1469+
}
1470+
} else {
1471+
e.PutByte(byte(0))
1472+
for _, val := range vals {
1473+
binary.BigEndian.PutUint32(c[:], uint32(val))
1474+
for i := 4 - valueSize; i < 4; i++ {
1475+
e.PutByte(c[i])
1476+
}
1477+
}
1478+
}
1479+
binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-4-startOff))
1480+
for _, off := range startingOffs {
1481+
e.PutBE32(off-4-uint32(startOff))
1482+
}
1483+
}

0 commit comments

Comments
 (0)