Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit b3f2b5e

Browse files
committed
improve baseDeltaBlockPostings
Signed-off-by: naivewong <[email protected]>
1 parent ce55654 commit b3f2b5e

File tree

3 files changed

+157
-120
lines changed

3 files changed

+157
-120
lines changed

index/index.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,7 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10821082
return n, newDeltaBlockPostings(l, n), d.Err()
10831083
case 4:
10841084
l := d.Get()
1085-
return n, newBaseDeltaBlockPostings(l, n), d.Err()
1085+
return n, newBaseDeltaBlockPostings(l), d.Err()
10861086
case 5:
10871087
l := d.Get()
10881088
return n, newBitmapPostings(l), d.Err()

index/postings.go

Lines changed: 126 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
package index
1515

1616
import (
17+
// "time"
18+
// "fmt"
1719
"container/heap"
1820
"encoding/binary"
1921
"math/bits"
@@ -693,7 +695,7 @@ func (it *bigEndianPostings) Err() error {
693695
}
694696

695697
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings, 6 is roaringBitmapPostings.
696-
const postingsType = 2
698+
const postingsType = 4
697699

698700
type bitSlice struct {
699701
bstream []byte
@@ -812,7 +814,8 @@ func (it *baseDeltaPostings) Err() error {
812814
return nil
813815
}
814816

815-
const deltaBlockSize = 256
817+
const deltaBlockSize = 32
818+
const deltaBlockBits = 5
816819

817820
// Block format(delta is to the previous value).
818821
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
@@ -970,71 +973,55 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
970973
}
971974

972975
// Block format(delta is to the base).
973-
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
974-
// │ base <uvarint> │ idx <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bits> │ ... │ delta n <bits> │
975-
// └────────────────┴───────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
976+
// ┌────────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
977+
// │ base <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bytes> │ ... │ delta n <bytes> │
978+
// └────────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
976979
type baseDeltaBlockPostings struct {
977980
bs bitSlice
978-
size int
979981
count int // count in current block.
980982
idxBlock int
981983
idx int
982984
offset int // offset in bit.
983985
cur uint64
984986
base uint64
987+
mask uint32
988+
prel int
985989
}
986990

987-
func newBaseDeltaBlockPostings(bstream []byte, size int) *baseDeltaBlockPostings {
988-
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}, size: size}
989-
}
990-
991-
func (it *baseDeltaBlockPostings) GetOff() int {
992-
return it.offset
993-
}
994-
func (it *baseDeltaBlockPostings) GetWidth() int {
995-
return it.bs.width
991+
func newBaseDeltaBlockPostings(bstream []byte) *baseDeltaBlockPostings {
992+
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}}
996993
}
997994

998995
func (it *baseDeltaBlockPostings) At() uint64 {
999996
return it.cur
1000997
}
1001998

1002999
func (it *baseDeltaBlockPostings) Next() bool {
1003-
if it.offset >= len(it.bs.bstream)<<3 || it.idx >= it.size {
1000+
if it.offset >= len(it.bs.bstream) {
10041001
return false
10051002
}
1006-
if it.offset%(deltaBlockSize<<3) == 0 {
1007-
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
1008-
if n < 1 {
1009-
return false
1010-
}
1003+
if it.offset%deltaBlockSize == 0 {
1004+
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
10111005
it.cur = val
10121006
it.base = val
1013-
it.offset += n << 3
1014-
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
1015-
if n < 1 {
1016-
return false
1017-
}
1018-
it.idx = int(val) + 1
1019-
it.offset += n << 3
1020-
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
1021-
if n < 1 {
1022-
return false
1023-
}
1007+
it.offset += n
1008+
1009+
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
10241010
it.count = int(val)
1025-
it.offset += n << 3
1026-
it.bs.width = int(it.bs.bstream[it.offset>>3])
1027-
it.offset += 8
1011+
it.offset += n
1012+
it.bs.width = int(it.bs.bstream[it.offset])
1013+
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
1014+
it.prel = 4 - it.bs.width
1015+
it.offset += 1
10281016
it.idxBlock = 1
10291017
return true
10301018
}
10311019

1032-
it.cur = it.bs.readBits(it.offset) + it.base
1020+
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
10331021
it.offset += it.bs.width
1034-
it.idx += 1
10351022
it.idxBlock += 1
10361023
if it.idxBlock == it.count {
1037-
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
1024+
it.offset = (((it.offset-1)>>deltaBlockBits) + 1) << deltaBlockBits
10381025
}
10391026
return true
10401027
}
@@ -1043,69 +1030,109 @@ func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
10431030
if it.cur >= x {
10441031
return true
10451032
}
1046-
1047-
startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
1048-
num := (len(it.bs.bstream)-1)/deltaBlockSize - (it.offset-1)/(deltaBlockSize<<3) + 1
1049-
// Do binary search between current position and end.
1050-
i := sort.Search(num, func(i int) bool {
1051-
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
1052-
return val > x
1053-
})
1054-
if i > 0 {
1055-
// Go to the previous block because the previous block
1056-
// may contain the first value >= x.
1057-
i -= 1
1033+
if it.offset >= len(it.bs.bstream) {
1034+
return false
1035+
}
1036+
startOff := (((it.offset)>>deltaBlockBits)+1)<<deltaBlockBits
1037+
num := (len(it.bs.bstream)>>deltaBlockBits) - (startOff>>deltaBlockBits) + 1
1038+
if num > 0 {
1039+
// Fast path to check if the binary search among blocks is needed.
1040+
val, _ := binary.Uvarint(it.bs.bstream[startOff:])
1041+
if val <= x {
1042+
// Do binary search between current position and end.
1043+
i := sort.Search(num, func(i int) bool {
1044+
val, _ := binary.Uvarint(it.bs.bstream[startOff+(i<<deltaBlockBits):])
1045+
return val > x
1046+
})
1047+
if i > 0 {
1048+
// Go to the previous block because the previous block
1049+
// may contain the first value >= x.
1050+
i -= 1
1051+
}
1052+
it.offset = startOff + (i<<deltaBlockBits)
1053+
1054+
// Read base, and width.
1055+
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
1056+
it.cur = val
1057+
it.base = val
1058+
it.offset += n
1059+
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
1060+
it.count = int(val)
1061+
it.offset += n
1062+
it.bs.width = int(it.bs.bstream[it.offset])
1063+
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
1064+
it.prel = 4 - it.bs.width
1065+
it.offset += 1
1066+
it.idxBlock = 1
1067+
if x <= it.base {
1068+
return true
1069+
} else {
1070+
temp := x - it.base
1071+
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1072+
return uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp
1073+
})
1074+
if j < it.count-it.idxBlock {
1075+
it.offset += j * it.bs.width
1076+
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
1077+
it.idxBlock += j + 1
1078+
if it.idxBlock == it.count {
1079+
// it.offset = startOff + ((i+1)<<deltaBlockBits)
1080+
it.offset = ((startOff>>deltaBlockBits)+i+1)<<deltaBlockBits
1081+
} else {
1082+
it.offset += it.bs.width
1083+
}
1084+
} else {
1085+
// it.offset = startOff + ((i+1)<<deltaBlockBits)
1086+
it.offset = ((startOff>>deltaBlockBits)+i+1)<<deltaBlockBits
1087+
return it.Next()
1088+
}
1089+
return true
1090+
}
1091+
}
10581092
}
10591093

1060-
if i == 0 && it.idx > 0 {
1094+
// Search in current block.
1095+
startOff -= deltaBlockSize
1096+
if it.offset == startOff {
1097+
// Read base, and width.
1098+
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
1099+
it.cur = val
1100+
it.base = val
1101+
it.offset += n
1102+
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
1103+
it.count = int(val)
1104+
it.offset += n
1105+
it.bs.width = int(it.bs.bstream[it.offset])
1106+
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
1107+
it.prel = 4 - it.bs.width
1108+
it.offset += 1
1109+
it.idxBlock = 1
1110+
}
1111+
if x <= it.base {
1112+
return true
1113+
} else {
10611114
temp := x - it.base
10621115
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1063-
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
1116+
return uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp
10641117
})
1065-
10661118
if j < it.count-it.idxBlock {
10671119
it.offset += j * it.bs.width
1068-
it.cur = it.bs.readBits(it.offset) + it.base
1069-
it.offset += it.bs.width
1120+
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
10701121
it.idxBlock += j + 1
1071-
it.idx += j + 1
10721122
if it.idxBlock == it.count {
1073-
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
1123+
// it.offset = startOff + deltaBlockSize
1124+
it.offset = ((startOff>>deltaBlockBits)+1)<<deltaBlockBits
1125+
} else {
1126+
it.offset += it.bs.width
10741127
}
10751128
} else {
1076-
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
1129+
// it.offset = startOff + deltaBlockSize
1130+
it.offset = ((startOff>>deltaBlockBits)+1)<<deltaBlockBits
10771131
return it.Next()
10781132
}
10791133
return true
1080-
} else {
1081-
it.offset = (startOff + i*deltaBlockSize) << 3
1082-
1083-
// Read base, idx, and width.
1084-
it.Next()
1085-
if x <= it.base {
1086-
return true
1087-
} else {
1088-
temp := x - it.base
1089-
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1090-
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
1091-
})
1092-
1093-
if j < it.count-it.idxBlock {
1094-
it.offset += j * it.bs.width
1095-
it.cur = it.bs.readBits(it.offset) + it.base
1096-
it.offset += it.bs.width
1097-
it.idxBlock += j + 1
1098-
it.idx += j + 1
1099-
if it.idxBlock == it.count {
1100-
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
1101-
}
1102-
} else {
1103-
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
1104-
return it.Next()
1105-
}
1106-
return true
1107-
}
11081134
}
1135+
11091136
}
11101137

11111138
func (it *baseDeltaBlockPostings) Err() error {
@@ -1121,16 +1148,18 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11211148
var max int
11221149
for i < len(arr) {
11231150
e.PutUvarint32(arr[i]) // Put base.
1124-
e.PutUvarint64(uint64(i)) // Put idx.
1125-
remaining = (deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1) << 3
1151+
remaining = deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1
11261152
deltas = deltas[:0]
11271153
base = arr[i]
11281154
max = -1
11291155
i += 1
11301156
for i < len(arr) {
11311157
delta := arr[i] - base
1132-
cur := bits.Len32(delta)
1133-
if remaining-cur*(len(deltas)+1)-(((bits.Len(uint(len(deltas)))>>3)+1)<<3) >= 0 {
1158+
cur := (bits.Len32(delta) + 7) >> 3
1159+
if cur == 0 {
1160+
cur = 1
1161+
}
1162+
if remaining-cur*(len(deltas)+1)-((bits.Len(uint(len(deltas)))>>3)+1) >= 0 {
11341163
deltas = append(deltas, delta)
11351164
max = cur
11361165
} else {
@@ -1140,28 +1169,22 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11401169
}
11411170
e.PutUvarint64(uint64(len(deltas) + 1))
11421171
e.PutByte(byte(max))
1143-
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) << 3
1172+
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1)
11441173
for _, delta := range deltas {
1145-
e.PutBits(uint64(delta), max)
1174+
for j := max - 1; j >= 0; j-- {
1175+
e.B = append(e.B, byte((delta>>(8*uint(j))&0xff)))
1176+
}
11461177
remaining -= max
11471178
}
11481179

11491180
if i == len(arr) {
11501181
break
11511182
}
11521183

1153-
for remaining >= 64 {
1154-
e.PutBits(uint64(0), 64)
1155-
remaining -= 64
1156-
}
1157-
1158-
if remaining > 0 {
1159-
e.PutBits(uint64(0), remaining)
1184+
for remaining > 0 {
1185+
e.PutByte(0)
1186+
remaining -= 1
11601187
}
1161-
e.Count = 0
1162-
1163-
// There can be one more extra 0.
1164-
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
11651188
}
11661189
}
11671190

0 commit comments

Comments
 (0)