Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit ef22dcd

Browse files
committed
add bitmapPostings
Signed-off-by: naivewong <[email protected]>
1 parent bf6c0ae commit ef22dcd

File tree

4 files changed

+241
-2
lines changed

4 files changed

+241
-2
lines changed

index/index.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,8 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
541541
writeDeltaBlockPostings(&w.buf2, refs)
542542
case 4:
543543
writeBaseDeltaBlockPostings(&w.buf2, refs)
544+
case 5:
545+
writeBitmapPostings(&w.buf2, refs)
544546
}
545547

546548
w.uint32s = refs
@@ -1061,6 +1063,9 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10611063
case 4:
10621064
l := d.Get()
10631065
return n, newBaseDeltaBlockPostings(l, n), d.Err()
1066+
case 5:
1067+
l := d.Get()
1068+
return n, newBitmapPostings(l), d.Err()
10641069
default:
10651070
return n, EmptyPostings(), d.Err()
10661071
}

index/index_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/prometheus/tsdb/encoding"
2828
"github.com/prometheus/tsdb/labels"
2929
"github.com/prometheus/tsdb/testutil"
30+
"github.com/prometheus/tsdb/fileutil"
3031
)
3132

3233
type series struct {
@@ -338,6 +339,12 @@ func TestPersistence_index_e2e(t *testing.T) {
338339
err = iw.Close()
339340
testutil.Ok(t, err)
340341

342+
f, err := fileutil.OpenMmapFile(filepath.Join(dir, indexFilename))
343+
testutil.Ok(t, err)
344+
toc, err := NewTOCFromByteSlice(realByteSlice(f.Bytes()))
345+
testutil.Ok(t, err)
346+
t.Log("size of postings =", toc.LabelIndicesTable - toc.Postings)
347+
341348
ir, err := NewFileReader(filepath.Join(dir, indexFilename))
342349
testutil.Ok(t, err)
343350

index/postings.go

Lines changed: 123 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,8 @@ func (it *bigEndianPostings) Err() error {
692692
return nil
693693
}
694694

695-
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock.
696-
const postingsType = 4
695+
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings.
696+
const postingsType = 5
697697

698698
type bitSlice struct {
699699
bstream []byte
@@ -1124,3 +1124,124 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11241124
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
11251125
}
11261126
}
1127+
1128+
// 8bits -> 256/8=32bytes, 12bits -> 4096/8=512bytes, 16bits -> 65536/8=8192bytes.
1129+
const bitmapBits = 8
1130+
1131+
// Bitmap block format.
1132+
// ┌──────────┬────────┐
1133+
// │ key <4b> │ bitmap │
1134+
// └──────────┴────────┘
1135+
type bitmapPostings struct {
1136+
bs []byte
1137+
cur uint64
1138+
inside bool
1139+
idx1 int
1140+
idx2 int
1141+
bitmapSize int
1142+
key uint32
1143+
}
1144+
1145+
func newBitmapPostings(bstream []byte) *bitmapPostings {
1146+
return &bitmapPostings{bs: bstream, bitmapSize: 1<<(bitmapBits-3)}
1147+
}
1148+
1149+
func (it *bitmapPostings) At() uint64 {
1150+
return it.cur
1151+
}
1152+
1153+
func (it *bitmapPostings) Next() bool {
1154+
if it.inside {
1155+
for it.idx1 < it.bitmapSize {
1156+
if it.bs[it.idx1+4] == byte(0) {
1157+
it.idx1 += 1
1158+
continue
1159+
}
1160+
for it.idx1 < it.bitmapSize {
1161+
if it.bs[it.idx1+4] & (1 << uint(7 - it.idx2)) != byte(0) {
1162+
it.cur = uint64(it.key << bitmapBits) + uint64(it.idx1 * 8 + it.idx2)
1163+
it.idx2 += 1
1164+
if it.idx2 == 8 {
1165+
it.idx1 += 1
1166+
it.idx2 = 0
1167+
}
1168+
return true
1169+
} else {
1170+
it.idx2 += 1
1171+
if it.idx2 == 8 {
1172+
it.idx1 += 1
1173+
it.idx2 = 0
1174+
}
1175+
}
1176+
}
1177+
}
1178+
it.bs = it.bs[it.bitmapSize+4:]
1179+
it.inside = false
1180+
it.idx1 = 0
1181+
return it.Next()
1182+
} else {
1183+
if len(it.bs) - 4 >= it.bitmapSize {
1184+
it.key = binary.BigEndian.Uint32(it.bs)
1185+
it.inside = true
1186+
return it.Next()
1187+
} else {
1188+
return false
1189+
}
1190+
}
1191+
}
1192+
1193+
func (it *bitmapPostings) Seek(x uint64) bool {
1194+
if it.cur >= x {
1195+
return true
1196+
}
1197+
curKey := uint32(x) >> bitmapBits
1198+
// curVal := uint32(x) & uint32((1 << uint(bitmapBits)) - 1)
1199+
i := sort.Search(len(it.bs)/(it.bitmapSize+4), func(i int) bool {
1200+
return binary.BigEndian.Uint32(it.bs[i*(it.bitmapSize+4):]) > curKey
1201+
})
1202+
if i > 0 {
1203+
i -= 1
1204+
if i > 0 {
1205+
it.idx1 = 0
1206+
it.idx2 = 0
1207+
it.bs = it.bs[i*(it.bitmapSize+4):]
1208+
it.inside = false
1209+
}
1210+
}
1211+
for it.Next() {
1212+
if it.At() >= x {
1213+
return true
1214+
}
1215+
}
1216+
return false
1217+
}
1218+
1219+
func (it *bitmapPostings) Err() error {
1220+
return nil
1221+
}
1222+
1223+
func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
1224+
key := uint32(0xffffffff)
1225+
bitmapSize := 1 << (bitmapBits - 3)
1226+
mask := uint32((1 << uint(bitmapBits)) - 1)
1227+
var curKey uint32
1228+
var curVal uint32
1229+
var offset int // The starting offset of the bitmap of each block.
1230+
var idx1 int
1231+
var idx2 int
1232+
for _, val := range arr {
1233+
curKey = val >> bitmapBits
1234+
curVal = val & mask
1235+
idx1 = int(curVal) >> 3
1236+
idx2 = int(curVal) % 8
1237+
if curKey != key {
1238+
key = curKey
1239+
e.PutBE32(uint32(key))
1240+
offset = len(e.Get())
1241+
for i := 0; i < bitmapSize; i++ {
1242+
e.PutByte(byte(0))
1243+
}
1244+
}
1245+
e.B[offset+idx1] |= 1 << uint(7 - idx2)
1246+
}
1247+
}

index/postings_test.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,80 @@ func TestBaseDeltaBlockPostings(t *testing.T) {
945945
})
946946
}
947947

948+
func TestBitmapPostings(t *testing.T) {
949+
num := 1000
950+
// mock a list as postings
951+
ls := make([]uint32, num)
952+
ls[0] = 2
953+
for i := 1; i < num; i++ {
954+
ls[i] = ls[i-1] + uint32(rand.Int31n(25)) + 2
955+
// ls[i] = ls[i-1] + 2
956+
}
957+
958+
buf := encoding.Encbuf{}
959+
writeBitmapPostings(&buf, ls)
960+
// t.Log("len", len(buf.Get()))
961+
962+
t.Run("Iteration", func(t *testing.T) {
963+
bp := newBitmapPostings(buf.Get())
964+
for i := 0; i < num; i++ {
965+
testutil.Assert(t, bp.Next() == true, "")
966+
// t.Log("ls[i] =", ls[i], "bp.At() =", bp.At())
967+
testutil.Equals(t, uint64(ls[i]), bp.At())
968+
}
969+
970+
testutil.Assert(t, bp.Next() == false, "")
971+
testutil.Assert(t, bp.Err() == nil, "")
972+
})
973+
974+
t.Run("Seek", func(t *testing.T) {
975+
table := []struct {
976+
seek uint32
977+
val uint32
978+
found bool
979+
}{
980+
{
981+
ls[0] - 1, ls[0], true,
982+
},
983+
{
984+
ls[4], ls[4], true,
985+
},
986+
{
987+
ls[500] - 1, ls[500], true,
988+
},
989+
{
990+
ls[600] + 1, ls[601], true,
991+
},
992+
{
993+
ls[600] + 1, ls[601], true,
994+
},
995+
{
996+
ls[600] + 1, ls[601], true,
997+
},
998+
{
999+
ls[0], ls[601], true,
1000+
},
1001+
{
1002+
ls[600], ls[601], true,
1003+
},
1004+
{
1005+
ls[999], ls[999], true,
1006+
},
1007+
{
1008+
ls[999] + 10, ls[999], false,
1009+
},
1010+
}
1011+
1012+
bp := newBitmapPostings(buf.Get())
1013+
1014+
for _, v := range table {
1015+
testutil.Equals(t, v.found, bp.Seek(uint64(v.seek)))
1016+
testutil.Equals(t, uint64(v.val), bp.At())
1017+
testutil.Assert(t, bp.Err() == nil, "")
1018+
}
1019+
})
1020+
}
1021+
9481022
func BenchmarkPostings(b *testing.B) {
9491023
num := 100000
9501024
// mock a list as postings
@@ -977,6 +1051,11 @@ func BenchmarkPostings(b *testing.B) {
9771051
writeBaseDeltaBlockPostings(&bufBDB, ls)
9781052
// b.Log(len(bufBDB.Get()))
9791053

1054+
// bitmapPostings.
1055+
bufBM := encoding.Encbuf{}
1056+
writeBitmapPostings(&bufBM, ls)
1057+
// b.Log("bitmapPostings size", bitmapBits, "bits =", len(bufBM.Get()))
1058+
9801059
table := []struct {
9811060
seek uint32
9821061
val uint32
@@ -1070,6 +1149,20 @@ func BenchmarkPostings(b *testing.B) {
10701149
testutil.Assert(bench, bdbp.Err() == nil, "")
10711150
}
10721151
})
1152+
b.Run("bitmapPostingsIteration", func(bench *testing.B) {
1153+
bench.ResetTimer()
1154+
bench.ReportAllocs()
1155+
for j := 0; j < bench.N; j++ {
1156+
bm := newBitmapPostings(bufBM.Get())
1157+
1158+
for i := 0; i < num; i++ {
1159+
testutil.Assert(bench, bm.Next() == true, "")
1160+
testutil.Equals(bench, uint64(ls[i]), bm.At())
1161+
}
1162+
testutil.Assert(bench, bm.Next() == false, "")
1163+
testutil.Assert(bench, bm.Err() == nil, "")
1164+
}
1165+
})
10731166

10741167
b.Run("bigEndianSeek", func(bench *testing.B) {
10751168
bench.ResetTimer()
@@ -1123,6 +1216,19 @@ func BenchmarkPostings(b *testing.B) {
11231216
}
11241217
}
11251218
})
1219+
b.Run("bitmapPostingsSeek", func(bench *testing.B) {
1220+
bench.ResetTimer()
1221+
bench.ReportAllocs()
1222+
for j := 0; j < bench.N; j++ {
1223+
bm := newBitmapPostings(bufBM.Get())
1224+
1225+
for _, v := range table {
1226+
testutil.Equals(bench, v.found, bm.Seek(uint64(v.seek)))
1227+
testutil.Equals(bench, uint64(v.val), bm.At())
1228+
testutil.Assert(bench, bm.Err() == nil, "")
1229+
}
1230+
}
1231+
})
11261232
}
11271233

11281234
func TestIntersectWithMerge(t *testing.T) {

0 commit comments

Comments
 (0)