Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit bf6c0ae

Browse files
committed
fix bug in Seek and add another idea
Signed-off-by: naivewong <[email protected]>
1 parent bad269a commit bf6c0ae

File tree

3 files changed

+320
-30
lines changed

3 files changed

+320
-30
lines changed

index/index.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,8 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
539539
}
540540
case 3:
541541
writeDeltaBlockPostings(&w.buf2, refs)
542+
case 4:
543+
writeBaseDeltaBlockPostings(&w.buf2, refs)
542544
}
543545

544546
w.uint32s = refs
@@ -1056,6 +1058,9 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10561058
case 3:
10571059
l := d.Get()
10581060
return n, newDeltaBlockPostings(l, n), d.Err()
1061+
case 4:
1062+
l := d.Get()
1063+
return n, newBaseDeltaBlockPostings(l, n), d.Err()
10591064
default:
10601065
return n, EmptyPostings(), d.Err()
10611066
}

index/postings.go

Lines changed: 208 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,8 @@ func (it *bigEndianPostings) Err() error {
692692
return nil
693693
}
694694

695-
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock.
696-
const postingsType = 3
695+
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock.
696+
const postingsType = 4
697697

698698
type bitSlice struct {
699699
bstream []byte
@@ -778,8 +778,9 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
778778

779779
num := it.size - it.idx
780780
// Do binary search between current position and end.
781+
x -= uint64(it.base)
781782
i := sort.Search(num, func(i int) bool {
782-
return it.bs.readBits((i+it.idx)*it.bs.width) + uint64(it.base) >= x
783+
return it.bs.readBits((i+it.idx)*it.bs.width) >= x
783784
})
784785
if i < num {
785786
it.cur = it.bs.readBits((i+it.idx)*it.bs.width) + uint64(it.base)
@@ -794,14 +795,14 @@ func (it *baseDeltaPostings) Err() error {
794795
return nil
795796
}
796797

797-
const deltaBlockSize = 256
798+
const deltaBlockSize = 128
798799

799800
// Block format(delta is to the previous value).
800-
// ┌────────────────┬───────────────┬────────────┬────────────────┬─────┬────────────────┐
801-
// │ base <uvarint> │ idx <uvarint> │ width <1b> │ delta 1 <bits> │ ... │ delta n <bits> │
802-
// └────────────────┴───────────────┴────────────┴────────────────┴─────┴────────────────┘
801+
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
802+
// │ base <uvarint> │ idx <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bits> │ ... │ delta n <bits> │
803+
// └────────────────┴───────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
803804
type deltaBlockPostings struct {
804-
bs bitSlice
805+
bs bitSlice
805806
size int
806807
count int // count in current block.
807808
idxBlock int
@@ -826,29 +827,29 @@ func (it *deltaBlockPostings) At() uint64 {
826827
}
827828

828829
func (it *deltaBlockPostings) Next() bool {
829-
if it.offset >= len(it.bs.bstream) * 8 || it.idx >= it.size {
830+
if it.offset >= len(it.bs.bstream) << 3 || it.idx >= it.size {
830831
return false
831832
}
832-
if it.offset % (deltaBlockSize * 8) == 0 {
833-
val, n := binary.Uvarint(it.bs.bstream[it.offset/8:])
833+
if it.offset % (deltaBlockSize << 3) == 0 {
834+
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
834835
if n < 1 {
835836
return false
836837
}
837838
it.cur = val
838-
it.offset += n * 8
839-
val, n = binary.Uvarint(it.bs.bstream[it.offset/8:])
839+
it.offset += n << 3
840+
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
840841
if n < 1 {
841842
return false
842843
}
843844
it.idx = int(val) + 1
844-
it.offset += n * 8
845-
val, n = binary.Uvarint(it.bs.bstream[it.offset/8:])
845+
it.offset += n << 3
846+
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
846847
if n < 1 {
847848
return false
848849
}
849850
it.count = int(val)
850-
it.offset += n * 8
851-
it.bs.width = int(it.bs.bstream[it.offset/8])
851+
it.offset += n << 3
852+
it.bs.width = int(it.bs.bstream[it.offset>>3])
852853
it.offset += 8
853854
it.idxBlock = 1
854855
return true
@@ -859,7 +860,7 @@ func (it *deltaBlockPostings) Next() bool {
859860
it.idx += 1
860861
it.idxBlock += 1
861862
if it.idxBlock == it.count {
862-
it.offset = ((it.offset-1) / (deltaBlockSize * 8) + 1) * deltaBlockSize * 8
863+
it.offset = ((it.offset-1) / (deltaBlockSize << 3) + 1) * deltaBlockSize << 3
863864
}
864865
return true
865866
}
@@ -869,8 +870,8 @@ func (it *deltaBlockPostings) Seek(x uint64) bool {
869870
return true
870871
}
871872

872-
startOff := it.offset / (deltaBlockSize * 8) * deltaBlockSize
873-
num := len(it.bs.bstream) / deltaBlockSize - it.offset / (deltaBlockSize * 8)
873+
startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
874+
num := (len(it.bs.bstream) - 1) / deltaBlockSize - (it.offset - 1) / (deltaBlockSize << 3) + 1
874875
// Do binary search between current position and end.
875876
i := sort.Search(num, func(i int) bool {
876877
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
@@ -881,7 +882,7 @@ func (it *deltaBlockPostings) Seek(x uint64) bool {
881882
// may contain the first value >= x.
882883
i -= 1
883884
}
884-
it.offset = (startOff + i * deltaBlockSize) * 8
885+
it.offset = (startOff + i * deltaBlockSize) << 3
885886
for it.Next() {
886887
if it.At() >= x {
887888
return true
@@ -897,21 +898,25 @@ func (it *deltaBlockPostings) Err() error {
897898
func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
898899
i := 0
899900
startLen := len(e.B)
901+
deltas := []uint32{}
902+
var remaining int
903+
var preVal uint32
904+
var max int
900905
for i < len(arr) {
901906
e.PutUvarint32(arr[i]) // Put base.
902907
e.PutUvarint64(uint64(i)) // Put idx.
903-
remaining := (deltaBlockSize - (len(e.B) - startLen) % deltaBlockSize - 1) * 8
904-
deltas := []uint64{}
905-
preVal := arr[i]
906-
max := -1
908+
remaining = (deltaBlockSize - (len(e.B) - startLen) % deltaBlockSize - 1) << 3
909+
deltas = deltas[:0]
910+
preVal = arr[i]
911+
max = -1
907912
i += 1
908913
for i < len(arr) {
909-
delta := uint64(arr[i] - preVal)
910-
cur := bits.Len64(delta)
914+
delta := arr[i] - preVal
915+
cur := bits.Len32(delta)
911916
if cur <= max {
912917
cur = max
913918
}
914-
if remaining - cur * (len(deltas) + 1) - (bits.Len(uint(len(deltas))) / 8 + 1) * 8 >= 0 {
919+
if remaining - cur * (len(deltas) + 1) - (((bits.Len(uint(len(deltas))) >> 3) + 1) << 3) >= 0 {
915920
deltas = append(deltas, delta)
916921
max = cur
917922
preVal = arr[i]
@@ -922,9 +927,182 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
922927
}
923928
e.PutUvarint64(uint64(len(deltas) + 1))
924929
e.PutByte(byte(max))
925-
remaining -= (bits.Len(uint(len(deltas))) / 8 + 1) * 8
930+
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) << 3
926931
for _, delta := range deltas {
927-
e.PutBits(delta, max)
932+
e.PutBits(uint64(delta), max)
933+
remaining -= max
934+
}
935+
936+
if i == len(arr) {
937+
break
938+
}
939+
940+
for remaining >= 64 {
941+
e.PutBits(uint64(0), 64)
942+
remaining -= 64
943+
}
944+
945+
if remaining > 0 {
946+
e.PutBits(uint64(0), remaining)
947+
}
948+
e.Count = 0
949+
950+
// There can be one more extra 0.
951+
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
952+
}
953+
}
954+
955+
// Block format(delta is to the base).
956+
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
957+
// │ base <uvarint> │ idx <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bits> │ ... │ delta n <bits> │
958+
// └────────────────┴───────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
959+
type baseDeltaBlockPostings struct {
960+
bs bitSlice
961+
size int
962+
count int // count in current block.
963+
idxBlock int
964+
idx int
965+
offset int // offset in bit.
966+
cur uint64
967+
base uint64
968+
}
969+
970+
func newBaseDeltaBlockPostings(bstream []byte, size int) *baseDeltaBlockPostings {
971+
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}, size: size}
972+
}
973+
974+
func (it *baseDeltaBlockPostings) GetOff() int {
975+
return it.offset
976+
}
977+
func (it *baseDeltaBlockPostings) GetWidth() int {
978+
return it.bs.width
979+
}
980+
981+
func (it *baseDeltaBlockPostings) At() uint64 {
982+
return it.cur
983+
}
984+
985+
func (it *baseDeltaBlockPostings) Next() bool {
986+
if it.offset >= len(it.bs.bstream) << 3 || it.idx >= it.size {
987+
return false
988+
}
989+
if it.offset % (deltaBlockSize << 3) == 0 {
990+
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
991+
if n < 1 {
992+
return false
993+
}
994+
it.cur = val
995+
it.base = val
996+
it.offset += n << 3
997+
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
998+
if n < 1 {
999+
return false
1000+
}
1001+
it.idx = int(val) + 1
1002+
it.offset += n << 3
1003+
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
1004+
if n < 1 {
1005+
return false
1006+
}
1007+
it.count = int(val)
1008+
it.offset += n << 3
1009+
it.bs.width = int(it.bs.bstream[it.offset>>3])
1010+
it.offset += 8
1011+
it.idxBlock = 1
1012+
return true
1013+
}
1014+
1015+
it.cur = it.bs.readBits(it.offset) + it.base
1016+
it.offset += it.bs.width
1017+
it.idx += 1
1018+
it.idxBlock += 1
1019+
if it.idxBlock == it.count {
1020+
it.offset = ((it.offset-1) / (deltaBlockSize << 3) + 1) * deltaBlockSize << 3
1021+
}
1022+
return true
1023+
}
1024+
1025+
func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
1026+
if it.cur >= x {
1027+
return true
1028+
}
1029+
1030+
startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
1031+
num := (len(it.bs.bstream) - 1) / deltaBlockSize - (it.offset - 1) / (deltaBlockSize << 3) + 1
1032+
// Do binary search between current position and end.
1033+
i := sort.Search(num, func(i int) bool {
1034+
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
1035+
return val > x
1036+
})
1037+
if i > 0 {
1038+
// Go to the previous block because the previous block
1039+
// may contain the first value >= x.
1040+
i -= 1
1041+
}
1042+
it.offset = (startOff + i * deltaBlockSize) << 3
1043+
1044+
// Read base, idx, and width.
1045+
it.Next()
1046+
if x <= it.base {
1047+
return true
1048+
} else {
1049+
temp := x - it.base
1050+
j := sort.Search(it.count - it.idxBlock, func(i int) bool {
1051+
return it.bs.readBits(it.offset + i * it.bs.width) >= temp
1052+
})
1053+
1054+
if j < it.count - it.idxBlock {
1055+
it.offset += j * it.bs.width
1056+
it.cur = it.bs.readBits(it.offset) + it.base
1057+
it.offset += it.bs.width
1058+
it.idxBlock += j + 1
1059+
it.idx += j + 1
1060+
if it.idxBlock == it.count {
1061+
it.offset = ((it.offset-1) / (deltaBlockSize << 3) + 1) * deltaBlockSize << 3
1062+
}
1063+
} else {
1064+
it.offset = (startOff + (i + 1) * deltaBlockSize) << 3
1065+
return it.Next()
1066+
}
1067+
return true
1068+
}
1069+
}
1070+
1071+
func (it *baseDeltaBlockPostings) Err() error {
1072+
return nil
1073+
}
1074+
1075+
func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
1076+
i := 0
1077+
startLen := len(e.B)
1078+
deltas := []uint32{}
1079+
var remaining int
1080+
var base uint32
1081+
var max int
1082+
for i < len(arr) {
1083+
e.PutUvarint32(arr[i]) // Put base.
1084+
e.PutUvarint64(uint64(i)) // Put idx.
1085+
remaining = (deltaBlockSize - (len(e.B) - startLen) % deltaBlockSize - 1) << 3
1086+
deltas = deltas[:0]
1087+
base = arr[i]
1088+
max = -1
1089+
i += 1
1090+
for i < len(arr) {
1091+
delta := arr[i] - base
1092+
cur := bits.Len32(delta)
1093+
if remaining - cur * (len(deltas) + 1) - (((bits.Len(uint(len(deltas))) >> 3) + 1) << 3) >= 0 {
1094+
deltas = append(deltas, delta)
1095+
max = cur
1096+
} else {
1097+
break
1098+
}
1099+
i += 1
1100+
}
1101+
e.PutUvarint64(uint64(len(deltas) + 1))
1102+
e.PutByte(byte(max))
1103+
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) << 3
1104+
for _, delta := range deltas {
1105+
e.PutBits(uint64(delta), max)
9281106
remaining -= max
9291107
}
9301108

0 commit comments

Comments
 (0)