1414package index
1515
1616import (
17+ // "time"
18+ // "fmt"
1719 "container/heap"
1820 "encoding/binary"
1921 "math/bits"
@@ -693,7 +695,7 @@ func (it *bigEndianPostings) Err() error {
693695}
694696
695697// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings, 6 is roaringBitmapPostings.
696- const postingsType = 2
698+ const postingsType = 4
697699
698700type bitSlice struct {
699701 bstream []byte
@@ -812,7 +814,8 @@ func (it *baseDeltaPostings) Err() error {
812814 return nil
813815}
814816
815- const deltaBlockSize = 256
817+ const deltaBlockSize = 32
818+ const deltaBlockBits = 5
816819
817820// Block format(delta is to the previous value).
818821// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
@@ -970,71 +973,55 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
970973}
971974
972975// Block format(delta is to the base).
973- // ┌────────────────┬───────────────┬───────────────── ┬────────────┬────────────────┬─────┬────────────────┐
974- // │ base <uvarint> │ idx <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bits > │ ... │ delta n <bits > │
975- // └────────────────┴───────────────┴───────────────── ┴────────────┴────────────────┴─────┴────────────────┘
976+ // ┌────────────────┬───────────────── ┬────────────┬───────────────── ┬─────┬─ ────────────────┐
977+ // │ base <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bytes > │ ... │ delta n <bytes > │
978+ // └────────────────┴───────────────── ┴────────────┴───────────────── ┴─────┴─ ────────────────┘
976979type baseDeltaBlockPostings struct {
977980 bs bitSlice
978- size int
979981 count int // count in current block.
980982 idxBlock int
981983 idx int
982984 offset int // offset in bit.
983985 cur uint64
984986 base uint64
987+ mask uint32
988+ prel int
985989}
986990
987- func newBaseDeltaBlockPostings (bstream []byte , size int ) * baseDeltaBlockPostings {
988- return & baseDeltaBlockPostings {bs : bitSlice {bstream : bstream }, size : size }
989- }
990-
991- func (it * baseDeltaBlockPostings ) GetOff () int {
992- return it .offset
993- }
994- func (it * baseDeltaBlockPostings ) GetWidth () int {
995- return it .bs .width
991+ func newBaseDeltaBlockPostings (bstream []byte ) * baseDeltaBlockPostings {
992+ return & baseDeltaBlockPostings {bs : bitSlice {bstream : bstream }}
996993}
997994
998995func (it * baseDeltaBlockPostings ) At () uint64 {
999996 return it .cur
1000997}
1001998
1002999func (it * baseDeltaBlockPostings ) Next () bool {
1003- if it .offset >= len (it .bs .bstream )<< 3 || it . idx >= it . size {
1000+ if it .offset >= len (it .bs .bstream ) {
10041001 return false
10051002 }
1006- if it .offset % (deltaBlockSize << 3 ) == 0 {
1007- val , n := binary .Uvarint (it .bs .bstream [it .offset >> 3 :])
1008- if n < 1 {
1009- return false
1010- }
1003+ if it .offset % deltaBlockSize == 0 {
1004+ val , n := binary .Uvarint (it .bs .bstream [it .offset :])
10111005 it .cur = val
10121006 it .base = val
1013- it .offset += n << 3
1014- val , n = binary .Uvarint (it .bs .bstream [it .offset >> 3 :])
1015- if n < 1 {
1016- return false
1017- }
1018- it .idx = int (val ) + 1
1019- it .offset += n << 3
1020- val , n = binary .Uvarint (it .bs .bstream [it .offset >> 3 :])
1021- if n < 1 {
1022- return false
1023- }
1007+ it .offset += n
1008+
1009+ val , n = binary .Uvarint (it .bs .bstream [it .offset :])
10241010 it .count = int (val )
1025- it .offset += n << 3
1026- it .bs .width = int (it .bs .bstream [it .offset >> 3 ])
1027- it .offset += 8
1011+ it .offset += n
1012+ it .bs .width = int (it .bs .bstream [it .offset ])
1013+ it .mask = (uint32 (1 ) << uint (8 * it .bs .width )) - 1
1014+ it .prel = 4 - it .bs .width
1015+ it .offset += 1
10281016 it .idxBlock = 1
10291017 return true
10301018 }
10311019
1032- it .cur = it .bs .readBits ( it .offset ) + it .base
1020+ it .cur = uint64 ( binary . BigEndian . Uint32 ( it .bs .bstream [ it .offset - it . prel :]) & it . mask ) + it .base
10331021 it .offset += it .bs .width
1034- it .idx += 1
10351022 it .idxBlock += 1
10361023 if it .idxBlock == it .count {
1037- it .offset = ((it .offset - 1 )/ ( deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
1024+ it .offset = ((( it .offset - 1 )>> deltaBlockBits ) + 1 ) << deltaBlockBits
10381025 }
10391026 return true
10401027}
@@ -1043,69 +1030,109 @@ func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
10431030 if it .cur >= x {
10441031 return true
10451032 }
1046-
1047- startOff := (it .offset - 1 ) / (deltaBlockSize << 3 ) * deltaBlockSize
1048- num := (len (it .bs .bstream )- 1 )/ deltaBlockSize - (it .offset - 1 )/ (deltaBlockSize << 3 ) + 1
1049- // Do binary search between current position and end.
1050- i := sort .Search (num , func (i int ) bool {
1051- val , _ := binary .Uvarint (it .bs .bstream [startOff + i * deltaBlockSize :])
1052- return val > x
1053- })
1054- if i > 0 {
1055- // Go to the previous block because the previous block
1056- // may contain the first value >= x.
1057- i -= 1
1033+ if it .offset >= len (it .bs .bstream ) {
1034+ return false
1035+ }
1036+ startOff := (((it .offset )>> deltaBlockBits )+ 1 )<< deltaBlockBits
1037+ num := (len (it .bs .bstream )>> deltaBlockBits ) - (startOff >> deltaBlockBits ) + 1
1038+ if num > 0 {
1039+ // Fast path to check if the binary search among blocks is needed.
1040+ val , _ := binary .Uvarint (it .bs .bstream [startOff :])
1041+ if val <= x {
1042+ // Do binary search between current position and end.
1043+ i := sort .Search (num , func (i int ) bool {
1044+ val , _ := binary .Uvarint (it .bs .bstream [startOff + (i << deltaBlockBits ):])
1045+ return val > x
1046+ })
1047+ if i > 0 {
1048+ // Go to the previous block because the previous block
1049+ // may contain the first value >= x.
1050+ i -= 1
1051+ }
1052+ it .offset = startOff + (i << deltaBlockBits )
1053+
1054+ // Read base, and width.
1055+ val , n := binary .Uvarint (it .bs .bstream [it .offset :])
1056+ it .cur = val
1057+ it .base = val
1058+ it .offset += n
1059+ val , n = binary .Uvarint (it .bs .bstream [it .offset :])
1060+ it .count = int (val )
1061+ it .offset += n
1062+ it .bs .width = int (it .bs .bstream [it .offset ])
1063+ it .mask = (uint32 (1 ) << uint (8 * it .bs .width )) - 1
1064+ it .prel = 4 - it .bs .width
1065+ it .offset += 1
1066+ it .idxBlock = 1
1067+ if x <= it .base {
1068+ return true
1069+ } else {
1070+ temp := x - it .base
1071+ j := sort .Search (it .count - it .idxBlock , func (i int ) bool {
1072+ return uint64 (binary .BigEndian .Uint32 (it .bs .bstream [it .offset + i * it .bs .width - it .prel :])& it .mask ) >= temp
1073+ })
1074+ if j < it .count - it .idxBlock {
1075+ it .offset += j * it .bs .width
1076+ it .cur = uint64 (binary .BigEndian .Uint32 (it .bs .bstream [it .offset - it .prel :])& it .mask ) + it .base
1077+ it .idxBlock += j + 1
1078+ if it .idxBlock == it .count {
1079+ // it.offset = startOff + ((i+1)<<deltaBlockBits)
1080+ it .offset = ((startOff >> deltaBlockBits )+ i + 1 )<< deltaBlockBits
1081+ } else {
1082+ it .offset += it .bs .width
1083+ }
1084+ } else {
1085+ // it.offset = startOff + ((i+1)<<deltaBlockBits)
1086+ it .offset = ((startOff >> deltaBlockBits )+ i + 1 )<< deltaBlockBits
1087+ return it .Next ()
1088+ }
1089+ return true
1090+ }
1091+ }
10581092 }
10591093
1060- if i == 0 && it .idx > 0 {
1094+ // Search in current block.
1095+ startOff -= deltaBlockSize
1096+ if it .offset == startOff {
1097+ // Read base, and width.
1098+ val , n := binary .Uvarint (it .bs .bstream [it .offset :])
1099+ it .cur = val
1100+ it .base = val
1101+ it .offset += n
1102+ val , n = binary .Uvarint (it .bs .bstream [it .offset :])
1103+ it .count = int (val )
1104+ it .offset += n
1105+ it .bs .width = int (it .bs .bstream [it .offset ])
1106+ it .mask = (uint32 (1 ) << uint (8 * it .bs .width )) - 1
1107+ it .prel = 4 - it .bs .width
1108+ it .offset += 1
1109+ it .idxBlock = 1
1110+ }
1111+ if x <= it .base {
1112+ return true
1113+ } else {
10611114 temp := x - it .base
10621115 j := sort .Search (it .count - it .idxBlock , func (i int ) bool {
1063- return it .bs .readBits ( it .offset + i * it .bs .width ) >= temp
1116+ return uint64 ( binary . BigEndian . Uint32 ( it .bs .bstream [ it .offset + i * it .bs .width - it . prel :]) & it . mask ) >= temp
10641117 })
1065-
10661118 if j < it .count - it .idxBlock {
10671119 it .offset += j * it .bs .width
1068- it .cur = it .bs .readBits (it .offset ) + it .base
1069- it .offset += it .bs .width
1120+ it .cur = uint64 (binary .BigEndian .Uint32 (it .bs .bstream [it .offset - it .prel :])& it .mask ) + it .base
10701121 it .idxBlock += j + 1
1071- it .idx += j + 1
10721122 if it .idxBlock == it .count {
1073- it .offset = ((it .offset - 1 )/ (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
1123+ // it.offset = startOff + deltaBlockSize
1124+ it .offset = ((startOff >> deltaBlockBits )+ 1 )<< deltaBlockBits
1125+ } else {
1126+ it .offset += it .bs .width
10741127 }
10751128 } else {
1076- it .offset = (startOff + (i + 1 )* deltaBlockSize ) << 3
1129+ // it.offset = startOff + deltaBlockSize
1130+ it .offset = ((startOff >> deltaBlockBits )+ 1 )<< deltaBlockBits
10771131 return it .Next ()
10781132 }
10791133 return true
1080- } else {
1081- it .offset = (startOff + i * deltaBlockSize ) << 3
1082-
1083- // Read base, idx, and width.
1084- it .Next ()
1085- if x <= it .base {
1086- return true
1087- } else {
1088- temp := x - it .base
1089- j := sort .Search (it .count - it .idxBlock , func (i int ) bool {
1090- return it .bs .readBits (it .offset + i * it .bs .width ) >= temp
1091- })
1092-
1093- if j < it .count - it .idxBlock {
1094- it .offset += j * it .bs .width
1095- it .cur = it .bs .readBits (it .offset ) + it .base
1096- it .offset += it .bs .width
1097- it .idxBlock += j + 1
1098- it .idx += j + 1
1099- if it .idxBlock == it .count {
1100- it .offset = ((it .offset - 1 )/ (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
1101- }
1102- } else {
1103- it .offset = (startOff + (i + 1 )* deltaBlockSize ) << 3
1104- return it .Next ()
1105- }
1106- return true
1107- }
11081134 }
1135+
11091136}
11101137
11111138func (it * baseDeltaBlockPostings ) Err () error {
@@ -1121,16 +1148,18 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11211148 var max int
11221149 for i < len (arr ) {
11231150 e .PutUvarint32 (arr [i ]) // Put base.
1124- e .PutUvarint64 (uint64 (i )) // Put idx.
1125- remaining = (deltaBlockSize - (len (e .B )- startLen )% deltaBlockSize - 1 ) << 3
1151+ remaining = deltaBlockSize - (len (e .B )- startLen )% deltaBlockSize - 1
11261152 deltas = deltas [:0 ]
11271153 base = arr [i ]
11281154 max = - 1
11291155 i += 1
11301156 for i < len (arr ) {
11311157 delta := arr [i ] - base
1132- cur := bits .Len32 (delta )
1133- if remaining - cur * (len (deltas )+ 1 )- (((bits .Len (uint (len (deltas )))>> 3 )+ 1 )<< 3 ) >= 0 {
1158+ cur := (bits .Len32 (delta ) + 7 ) >> 3
1159+ if cur == 0 {
1160+ cur = 1
1161+ }
1162+ if remaining - cur * (len (deltas )+ 1 )- ((bits .Len (uint (len (deltas )))>> 3 )+ 1 ) >= 0 {
11341163 deltas = append (deltas , delta )
11351164 max = cur
11361165 } else {
@@ -1140,28 +1169,22 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11401169 }
11411170 e .PutUvarint64 (uint64 (len (deltas ) + 1 ))
11421171 e .PutByte (byte (max ))
1143- remaining -= ((bits .Len (uint (len (deltas ))) >> 3 ) + 1 ) << 3
1172+ remaining -= ((bits .Len (uint (len (deltas ))) >> 3 ) + 1 )
11441173 for _ , delta := range deltas {
1145- e .PutBits (uint64 (delta ), max )
1174+ for j := max - 1 ; j >= 0 ; j -- {
1175+ e .B = append (e .B , byte ((delta >> (8 * uint (j ))& 0xff )))
1176+ }
11461177 remaining -= max
11471178 }
11481179
11491180 if i == len (arr ) {
11501181 break
11511182 }
11521183
1153- for remaining >= 64 {
1154- e .PutBits (uint64 (0 ), 64 )
1155- remaining -= 64
1156- }
1157-
1158- if remaining > 0 {
1159- e .PutBits (uint64 (0 ), remaining )
1184+ for remaining > 0 {
1185+ e .PutByte (0 )
1186+ remaining -= 1
11601187 }
1161- e .Count = 0
1162-
1163- // There can be one more extra 0.
1164- e .B = e .B [:len (e .B )- (len (e .B )- startLen )% deltaBlockSize ]
11651188 }
11661189}
11671190
0 commit comments