Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit d130757

Browse files
committed
improve baseDeltaBlockPostings
Signed-off-by: naivewong <[email protected]>
1 parent ce55654 commit d130757

File tree

3 files changed

+158
-120
lines changed

3 files changed

+158
-120
lines changed

index/index.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,7 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10821082
return n, newDeltaBlockPostings(l, n), d.Err()
10831083
case 4:
10841084
l := d.Get()
1085-
return n, newBaseDeltaBlockPostings(l, n), d.Err()
1085+
return n, newBaseDeltaBlockPostings(l), d.Err()
10861086
case 5:
10871087
l := d.Get()
10881088
return n, newBitmapPostings(l), d.Err()

index/postings.go

Lines changed: 127 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
package index
1515

1616
import (
17+
// "time"
18+
// "fmt"
1719
"container/heap"
1820
"encoding/binary"
1921
"math/bits"
@@ -693,7 +695,7 @@ func (it *bigEndianPostings) Err() error {
693695
}
694696

695697
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings, 6 is roaringBitmapPostings.
696-
const postingsType = 2
698+
const postingsType = 4
697699

698700
type bitSlice struct {
699701
bstream []byte
@@ -812,7 +814,8 @@ func (it *baseDeltaPostings) Err() error {
812814
return nil
813815
}
814816

815-
const deltaBlockSize = 256
817+
const deltaBlockSize = 128
818+
const deltaBlockBits = 7
816819

817820
// Block format(delta is to the previous value).
818821
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
@@ -970,71 +973,55 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
970973
}
971974

972975
// Block format(delta is to the base).
973-
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
974-
// │ base <uvarint> │ idx <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bits> │ ... │ delta n <bits> │
975-
// └────────────────┴───────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
976+
// ┌────────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
977+
// │ base <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bytes> │ ... │ delta n <bytes> │
978+
// └────────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
976979
type baseDeltaBlockPostings struct {
977980
bs bitSlice
978-
size int
979981
count int // count in current block.
980982
idxBlock int
981983
idx int
982984
offset int // offset in bit.
983985
cur uint64
984986
base uint64
987+
mask uint32
988+
prel int
985989
}
986990

987-
func newBaseDeltaBlockPostings(bstream []byte, size int) *baseDeltaBlockPostings {
988-
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}, size: size}
989-
}
990-
991-
func (it *baseDeltaBlockPostings) GetOff() int {
992-
return it.offset
993-
}
994-
func (it *baseDeltaBlockPostings) GetWidth() int {
995-
return it.bs.width
991+
func newBaseDeltaBlockPostings(bstream []byte) *baseDeltaBlockPostings {
992+
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}}
996993
}
997994

998995
func (it *baseDeltaBlockPostings) At() uint64 {
999996
return it.cur
1000997
}
1001998

1002999
func (it *baseDeltaBlockPostings) Next() bool {
1003-
if it.offset >= len(it.bs.bstream)<<3 || it.idx >= it.size {
1000+
if it.offset >= len(it.bs.bstream) {
10041001
return false
10051002
}
1006-
if it.offset%(deltaBlockSize<<3) == 0 {
1007-
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
1008-
if n < 1 {
1009-
return false
1010-
}
1003+
if it.offset%deltaBlockSize == 0 {
1004+
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
10111005
it.cur = val
10121006
it.base = val
1013-
it.offset += n << 3
1014-
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
1015-
if n < 1 {
1016-
return false
1017-
}
1018-
it.idx = int(val) + 1
1019-
it.offset += n << 3
1020-
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
1021-
if n < 1 {
1022-
return false
1023-
}
1007+
it.offset += n
1008+
1009+
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
10241010
it.count = int(val)
1025-
it.offset += n << 3
1026-
it.bs.width = int(it.bs.bstream[it.offset>>3])
1027-
it.offset += 8
1011+
it.offset += n
1012+
it.bs.width = int(it.bs.bstream[it.offset])
1013+
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
1014+
it.prel = 4 - it.bs.width
1015+
it.offset += 1
10281016
it.idxBlock = 1
10291017
return true
10301018
}
10311019

1032-
it.cur = it.bs.readBits(it.offset) + it.base
1020+
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
10331021
it.offset += it.bs.width
1034-
it.idx += 1
10351022
it.idxBlock += 1
10361023
if it.idxBlock == it.count {
1037-
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
1024+
it.offset = (((it.offset-1)>>deltaBlockBits) + 1) << deltaBlockBits
10381025
}
10391026
return true
10401027
}
@@ -1043,69 +1030,110 @@ func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
10431030
if it.cur >= x {
10441031
return true
10451032
}
1046-
1047-
startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
1048-
num := (len(it.bs.bstream)-1)/deltaBlockSize - (it.offset-1)/(deltaBlockSize<<3) + 1
1049-
// Do binary search between current position and end.
1050-
i := sort.Search(num, func(i int) bool {
1051-
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
1052-
return val > x
1053-
})
1054-
if i > 0 {
1055-
// Go to the previous block because the previous block
1056-
// may contain the first value >= x.
1057-
i -= 1
1033+
if it.offset >= len(it.bs.bstream) {
1034+
return false
1035+
}
1036+
startOff := (((it.offset)>>deltaBlockBits)+1)<<deltaBlockBits
1037+
num := (len(it.bs.bstream)>>deltaBlockBits) - startOff >> deltaBlockBits + 1
1038+
var i int
1039+
if num > 0 {
1040+
// Fast path to check if the binary search among blocks is needed.
1041+
val, _ := binary.Uvarint(it.bs.bstream[startOff+deltaBlockSize:])
1042+
if val <= x {
1043+
// Do binary search between current position and end.
1044+
i = sort.Search(num, func(i int) bool {
1045+
val, _ := binary.Uvarint(it.bs.bstream[startOff+(i<<deltaBlockBits):])
1046+
return val > x
1047+
})
1048+
if i > 0 {
1049+
// Go to the previous block because the previous block
1050+
// may contain the first value >= x.
1051+
i -= 1
1052+
}
1053+
it.offset = startOff + (i<<deltaBlockBits)
1054+
1055+
// Read base, and width.
1056+
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
1057+
it.cur = val
1058+
it.base = val
1059+
it.offset += n
1060+
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
1061+
it.count = int(val)
1062+
it.offset += n
1063+
it.bs.width = int(it.bs.bstream[it.offset])
1064+
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
1065+
it.prel = 4 - it.bs.width
1066+
it.offset += 1
1067+
it.idxBlock = 1
1068+
if x <= it.base {
1069+
return true
1070+
} else {
1071+
temp := x - it.base
1072+
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1073+
return uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp
1074+
})
1075+
if j < it.count-it.idxBlock {
1076+
it.offset += j * it.bs.width
1077+
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
1078+
it.idxBlock += j + 1
1079+
if it.idxBlock == it.count {
1080+
// it.offset = startOff + ((i+1)<<deltaBlockBits)
1081+
it.offset = ((startOff>>deltaBlockBits)+i+1)<<deltaBlockBits
1082+
} else {
1083+
it.offset += it.bs.width
1084+
}
1085+
} else {
1086+
// it.offset = startOff + ((i+1)<<deltaBlockBits)
1087+
it.offset = ((startOff>>deltaBlockBits)+i+1)<<deltaBlockBits
1088+
return it.Next()
1089+
}
1090+
return true
1091+
}
1092+
}
10581093
}
10591094

1060-
if i == 0 && it.idx > 0 {
1095+
// Search in current block.
1096+
startOff -= deltaBlockSize
1097+
if it.offset == startOff {
1098+
// Read base, and width.
1099+
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
1100+
it.cur = val
1101+
it.base = val
1102+
it.offset += n
1103+
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
1104+
it.count = int(val)
1105+
it.offset += n
1106+
it.bs.width = int(it.bs.bstream[it.offset])
1107+
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
1108+
it.prel = 4 - it.bs.width
1109+
it.offset += 1
1110+
it.idxBlock = 1
1111+
}
1112+
if x <= it.base {
1113+
return true
1114+
} else {
10611115
temp := x - it.base
10621116
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1063-
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
1117+
return uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp
10641118
})
1065-
10661119
if j < it.count-it.idxBlock {
10671120
it.offset += j * it.bs.width
1068-
it.cur = it.bs.readBits(it.offset) + it.base
1069-
it.offset += it.bs.width
1121+
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
10701122
it.idxBlock += j + 1
1071-
it.idx += j + 1
10721123
if it.idxBlock == it.count {
1073-
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
1124+
// it.offset = startOff + deltaBlockSize
1125+
it.offset = ((startOff>>deltaBlockBits)+1)<<deltaBlockBits
1126+
} else {
1127+
it.offset += it.bs.width
10741128
}
10751129
} else {
1076-
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
1130+
// it.offset = startOff + deltaBlockSize
1131+
it.offset = ((startOff>>deltaBlockBits)+1)<<deltaBlockBits
10771132
return it.Next()
10781133
}
10791134
return true
1080-
} else {
1081-
it.offset = (startOff + i*deltaBlockSize) << 3
1082-
1083-
// Read base, idx, and width.
1084-
it.Next()
1085-
if x <= it.base {
1086-
return true
1087-
} else {
1088-
temp := x - it.base
1089-
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1090-
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
1091-
})
1092-
1093-
if j < it.count-it.idxBlock {
1094-
it.offset += j * it.bs.width
1095-
it.cur = it.bs.readBits(it.offset) + it.base
1096-
it.offset += it.bs.width
1097-
it.idxBlock += j + 1
1098-
it.idx += j + 1
1099-
if it.idxBlock == it.count {
1100-
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
1101-
}
1102-
} else {
1103-
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
1104-
return it.Next()
1105-
}
1106-
return true
1107-
}
11081135
}
1136+
11091137
}
11101138

11111139
func (it *baseDeltaBlockPostings) Err() error {
@@ -1121,16 +1149,18 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11211149
var max int
11221150
for i < len(arr) {
11231151
e.PutUvarint32(arr[i]) // Put base.
1124-
e.PutUvarint64(uint64(i)) // Put idx.
1125-
remaining = (deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1) << 3
1152+
remaining = deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1
11261153
deltas = deltas[:0]
11271154
base = arr[i]
11281155
max = -1
11291156
i += 1
11301157
for i < len(arr) {
11311158
delta := arr[i] - base
1132-
cur := bits.Len32(delta)
1133-
if remaining-cur*(len(deltas)+1)-(((bits.Len(uint(len(deltas)))>>3)+1)<<3) >= 0 {
1159+
cur := (bits.Len32(delta) + 7) >> 3
1160+
if cur == 0 {
1161+
cur = 1
1162+
}
1163+
if remaining-cur*(len(deltas)+1)-((bits.Len(uint(len(deltas)))>>3)+1) >= 0 {
11341164
deltas = append(deltas, delta)
11351165
max = cur
11361166
} else {
@@ -1140,28 +1170,22 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11401170
}
11411171
e.PutUvarint64(uint64(len(deltas) + 1))
11421172
e.PutByte(byte(max))
1143-
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) << 3
1173+
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1)
11441174
for _, delta := range deltas {
1145-
e.PutBits(uint64(delta), max)
1175+
for j := max - 1; j >= 0; j-- {
1176+
e.B = append(e.B, byte((delta>>(8*uint(j))&0xff)))
1177+
}
11461178
remaining -= max
11471179
}
11481180

11491181
if i == len(arr) {
11501182
break
11511183
}
11521184

1153-
for remaining >= 64 {
1154-
e.PutBits(uint64(0), 64)
1155-
remaining -= 64
1156-
}
1157-
1158-
if remaining > 0 {
1159-
e.PutBits(uint64(0), remaining)
1185+
for remaining > 0 {
1186+
e.PutByte(0)
1187+
remaining -= 1
11601188
}
1161-
e.Count = 0
1162-
1163-
// There can be one more extra 0.
1164-
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
11651189
}
11661190
}
11671191

0 commit comments

Comments
 (0)