@@ -692,8 +692,8 @@ func (it *bigEndianPostings) Err() error {
692692 return nil
693693}
694694
695- // 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock.
696- const postingsType = 4
695+ // 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings .
696+ const postingsType = 5
697697
698698type bitSlice struct {
699699 bstream []byte
@@ -731,8 +731,8 @@ func (bs *bitSlice) readBits(offset int) uint64 {
731731 return u
732732 }
733733
734- if nbits > int (8 - count ) {
735- u = (u << uint (8 - count )) | uint64 ((bs .bstream [idx ]<< count )>> count )
734+ if nbits > int (8 - count ) {
735+ u = (u << uint (8 - count )) | uint64 ((bs .bstream [idx ]<< count )>> count )
736736 nbits -= int (8 - count )
737737 idx += 1
738738
@@ -827,10 +827,10 @@ func (it *deltaBlockPostings) At() uint64 {
827827}
828828
829829func (it * deltaBlockPostings ) Next () bool {
830- if it .offset >= len (it .bs .bstream ) << 3 || it .idx >= it .size {
830+ if it .offset >= len (it .bs .bstream )<< 3 || it .idx >= it .size {
831831 return false
832832 }
833- if it .offset % (deltaBlockSize << 3 ) == 0 {
833+ if it .offset % (deltaBlockSize << 3 ) == 0 {
834834 val , n := binary .Uvarint (it .bs .bstream [it .offset >> 3 :])
835835 if n < 1 {
836836 return false
@@ -854,13 +854,13 @@ func (it *deltaBlockPostings) Next() bool {
854854 it .idxBlock = 1
855855 return true
856856 }
857-
857+
858858 it .cur = it .bs .readBits (it .offset ) + it .cur
859859 it .offset += it .bs .width
860860 it .idx += 1
861861 it .idxBlock += 1
862862 if it .idxBlock == it .count {
863- it .offset = ((it .offset - 1 ) / (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
863+ it .offset = ((it .offset - 1 )/ (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
864864 }
865865 return true
866866}
@@ -871,18 +871,18 @@ func (it *deltaBlockPostings) Seek(x uint64) bool {
871871 }
872872
873873 startOff := (it .offset - 1 ) / (deltaBlockSize << 3 ) * deltaBlockSize
874- num := (len (it .bs .bstream ) - 1 ) / deltaBlockSize - (it .offset - 1 ) / (deltaBlockSize << 3 ) + 1
874+ num := (len (it .bs .bstream )- 1 ) / deltaBlockSize - (it .offset - 1 ) / (deltaBlockSize << 3 ) + 1
875875 // Do binary search between current position and end.
876876 i := sort .Search (num , func (i int ) bool {
877877 val , _ := binary .Uvarint (it .bs .bstream [startOff + i * deltaBlockSize :])
878878 return val > x
879879 })
880880 if i > 0 {
881- // Go to the previous block because the previous block
881+ // Go to the previous block because the previous block
882882 // may contain the first value >= x.
883883 i -= 1
884884 }
885- it .offset = (startOff + i * deltaBlockSize ) << 3
885+ it .offset = (startOff + i * deltaBlockSize ) << 3
886886 for it .Next () {
887887 if it .At () >= x {
888888 return true
@@ -903,9 +903,9 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
903903 var preVal uint32
904904 var max int
905905 for i < len (arr ) {
906- e .PutUvarint32 (arr [i ]) // Put base.
906+ e .PutUvarint32 (arr [i ]) // Put base.
907907 e .PutUvarint64 (uint64 (i )) // Put idx.
908- remaining = (deltaBlockSize - (len (e .B ) - startLen ) % deltaBlockSize - 1 ) << 3
908+ remaining = (deltaBlockSize - (len (e .B )- startLen )% deltaBlockSize - 1 ) << 3
909909 deltas = deltas [:0 ]
910910 preVal = arr [i ]
911911 max = - 1
@@ -916,7 +916,7 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
916916 if cur <= max {
917917 cur = max
918918 }
919- if remaining - cur * (len (deltas ) + 1 ) - (((bits .Len (uint (len (deltas ))) >> 3 ) + 1 ) << 3 ) >= 0 {
919+ if remaining - cur * (len (deltas )+ 1 ) - (((bits .Len (uint (len (deltas )))>> 3 ) + 1 ) << 3 ) >= 0 {
920920 deltas = append (deltas , delta )
921921 max = cur
922922 preVal = arr [i ]
@@ -946,7 +946,7 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
946946 e .PutBits (uint64 (0 ), remaining )
947947 }
948948 e .Count = 0
949-
949+
950950 // There can be one more extra 0.
951951 e .B = e .B [:len (e .B )- (len (e .B )- startLen )% deltaBlockSize ]
952952 }
@@ -983,10 +983,10 @@ func (it *baseDeltaBlockPostings) At() uint64 {
983983}
984984
985985func (it * baseDeltaBlockPostings ) Next () bool {
986- if it .offset >= len (it .bs .bstream ) << 3 || it .idx >= it .size {
986+ if it .offset >= len (it .bs .bstream )<< 3 || it .idx >= it .size {
987987 return false
988988 }
989- if it .offset % (deltaBlockSize << 3 ) == 0 {
989+ if it .offset % (deltaBlockSize << 3 ) == 0 {
990990 val , n := binary .Uvarint (it .bs .bstream [it .offset >> 3 :])
991991 if n < 1 {
992992 return false
@@ -1011,13 +1011,13 @@ func (it *baseDeltaBlockPostings) Next() bool {
10111011 it .idxBlock = 1
10121012 return true
10131013 }
1014-
1014+
10151015 it .cur = it .bs .readBits (it .offset ) + it .base
10161016 it .offset += it .bs .width
10171017 it .idx += 1
10181018 it .idxBlock += 1
10191019 if it .idxBlock == it .count {
1020- it .offset = ((it .offset - 1 ) / (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
1020+ it .offset = ((it .offset - 1 )/ (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
10211021 }
10221022 return true
10231023}
@@ -1028,40 +1028,40 @@ func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
10281028 }
10291029
10301030 startOff := (it .offset - 1 ) / (deltaBlockSize << 3 ) * deltaBlockSize
1031- num := (len (it .bs .bstream ) - 1 ) / deltaBlockSize - (it .offset - 1 ) / (deltaBlockSize << 3 ) + 1
1031+ num := (len (it .bs .bstream )- 1 ) / deltaBlockSize - (it .offset - 1 ) / (deltaBlockSize << 3 ) + 1
10321032 // Do binary search between current position and end.
10331033 i := sort .Search (num , func (i int ) bool {
10341034 val , _ := binary .Uvarint (it .bs .bstream [startOff + i * deltaBlockSize :])
10351035 return val > x
10361036 })
10371037 if i > 0 {
1038- // Go to the previous block because the previous block
1038+ // Go to the previous block because the previous block
10391039 // may contain the first value >= x.
10401040 i -= 1
10411041 }
1042- it .offset = (startOff + i * deltaBlockSize ) << 3
1043-
1042+ it .offset = (startOff + i * deltaBlockSize ) << 3
1043+
10441044 // Read base, idx, and width.
10451045 it .Next ()
10461046 if x <= it .base {
10471047 return true
10481048 } else {
10491049 temp := x - it .base
1050- j := sort .Search (it .count - it .idxBlock , func (i int ) bool {
1051- return it .bs .readBits (it .offset + i * it .bs .width ) >= temp
1050+ j := sort .Search (it .count - it .idxBlock , func (i int ) bool {
1051+ return it .bs .readBits (it .offset + i * it .bs .width ) >= temp
10521052 })
10531053
1054- if j < it .count - it .idxBlock {
1054+ if j < it .count - it .idxBlock {
10551055 it .offset += j * it .bs .width
10561056 it .cur = it .bs .readBits (it .offset ) + it .base
10571057 it .offset += it .bs .width
10581058 it .idxBlock += j + 1
10591059 it .idx += j + 1
10601060 if it .idxBlock == it .count {
1061- it .offset = ((it .offset - 1 ) / (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
1061+ it .offset = ((it .offset - 1 )/ (deltaBlockSize << 3 ) + 1 ) * deltaBlockSize << 3
10621062 }
10631063 } else {
1064- it .offset = (startOff + (i + 1 ) * deltaBlockSize ) << 3
1064+ it .offset = (startOff + (i + 1 ) * deltaBlockSize ) << 3
10651065 return it .Next ()
10661066 }
10671067 return true
@@ -1080,17 +1080,17 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
10801080 var base uint32
10811081 var max int
10821082 for i < len (arr ) {
1083- e .PutUvarint32 (arr [i ]) // Put base.
1083+ e .PutUvarint32 (arr [i ]) // Put base.
10841084 e .PutUvarint64 (uint64 (i )) // Put idx.
1085- remaining = (deltaBlockSize - (len (e .B ) - startLen ) % deltaBlockSize - 1 ) << 3
1085+ remaining = (deltaBlockSize - (len (e .B )- startLen )% deltaBlockSize - 1 ) << 3
10861086 deltas = deltas [:0 ]
10871087 base = arr [i ]
10881088 max = - 1
10891089 i += 1
10901090 for i < len (arr ) {
10911091 delta := arr [i ] - base
10921092 cur := bits .Len32 (delta )
1093- if remaining - cur * (len (deltas ) + 1 ) - (((bits .Len (uint (len (deltas ))) >> 3 ) + 1 ) << 3 ) >= 0 {
1093+ if remaining - cur * (len (deltas )+ 1 ) - (((bits .Len (uint (len (deltas )))>> 3 ) + 1 ) << 3 ) >= 0 {
10941094 deltas = append (deltas , delta )
10951095 max = cur
10961096 } else {
@@ -1119,8 +1119,129 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11191119 e .PutBits (uint64 (0 ), remaining )
11201120 }
11211121 e .Count = 0
1122-
1122+
11231123 // There can be one more extra 0.
11241124 e .B = e .B [:len (e .B )- (len (e .B )- startLen )% deltaBlockSize ]
11251125 }
11261126}
1127+
1128+ // 8bits -> 256/8=32bytes, 12bits -> 4096/8=512bytes, 16bits -> 65536/8=8192bytes.
1129+ const bitmapBits = 8
1130+
1131+ // Bitmap block format.
1132+ // ┌──────────┬────────┐
1133+ // │ key <4b> │ bitmap │
1134+ // └──────────┴────────┘
1135+ type bitmapPostings struct {
1136+ bs []byte
1137+ cur uint64
1138+ inside bool
1139+ idx1 int
1140+ idx2 int
1141+ bitmapSize int
1142+ key uint32
1143+ }
1144+
1145+ func newBitmapPostings (bstream []byte ) * bitmapPostings {
1146+ return & bitmapPostings {bs : bstream , bitmapSize : 1 << (bitmapBits - 3 )}
1147+ }
1148+
1149+ func (it * bitmapPostings ) At () uint64 {
1150+ return it .cur
1151+ }
1152+
1153+ func (it * bitmapPostings ) Next () bool {
1154+ if it .inside {
1155+ for it .idx1 < it .bitmapSize {
1156+ if it .bs [it .idx1 + 4 ] == byte (0 ) {
1157+ it .idx1 += 1
1158+ continue
1159+ }
1160+ for it .idx1 < it .bitmapSize {
1161+ if it .bs [it .idx1 + 4 ]& (1 << uint (7 - it .idx2 )) != byte (0 ) {
1162+ it .cur = uint64 (it .key << bitmapBits ) + uint64 (it .idx1 * 8 + it .idx2 )
1163+ it .idx2 += 1
1164+ if it .idx2 == 8 {
1165+ it .idx1 += 1
1166+ it .idx2 = 0
1167+ }
1168+ return true
1169+ } else {
1170+ it .idx2 += 1
1171+ if it .idx2 == 8 {
1172+ it .idx1 += 1
1173+ it .idx2 = 0
1174+ }
1175+ }
1176+ }
1177+ }
1178+ it .bs = it .bs [it .bitmapSize + 4 :]
1179+ it .inside = false
1180+ it .idx1 = 0
1181+ return it .Next ()
1182+ } else {
1183+ if len (it .bs )- 4 >= it .bitmapSize {
1184+ it .key = binary .BigEndian .Uint32 (it .bs )
1185+ it .inside = true
1186+ return it .Next ()
1187+ } else {
1188+ return false
1189+ }
1190+ }
1191+ }
1192+
1193+ func (it * bitmapPostings ) Seek (x uint64 ) bool {
1194+ if it .cur >= x {
1195+ return true
1196+ }
1197+ curKey := uint32 (x ) >> bitmapBits
1198+ // curVal := uint32(x) & uint32((1 << uint(bitmapBits)) - 1)
1199+ i := sort .Search (len (it .bs )/ (it .bitmapSize + 4 ), func (i int ) bool {
1200+ return binary .BigEndian .Uint32 (it .bs [i * (it .bitmapSize + 4 ):]) > curKey
1201+ })
1202+ if i > 0 {
1203+ i -= 1
1204+ if i > 0 {
1205+ it .idx1 = 0
1206+ it .idx2 = 0
1207+ it .bs = it .bs [i * (it .bitmapSize + 4 ):]
1208+ it .inside = false
1209+ }
1210+ }
1211+ for it .Next () {
1212+ if it .At () >= x {
1213+ return true
1214+ }
1215+ }
1216+ return false
1217+ }
1218+
1219+ func (it * bitmapPostings ) Err () error {
1220+ return nil
1221+ }
1222+
1223+ func writeBitmapPostings (e * encoding.Encbuf , arr []uint32 ) {
1224+ key := uint32 (0xffffffff )
1225+ bitmapSize := 1 << (bitmapBits - 3 )
1226+ mask := uint32 ((1 << uint (bitmapBits )) - 1 )
1227+ var curKey uint32
1228+ var curVal uint32
1229+ var offset int // The starting offset of the bitmap of each block.
1230+ var idx1 int
1231+ var idx2 int
1232+ for _ , val := range arr {
1233+ curKey = val >> bitmapBits
1234+ curVal = val & mask
1235+ idx1 = int (curVal ) >> 3
1236+ idx2 = int (curVal ) % 8
1237+ if curKey != key {
1238+ key = curKey
1239+ e .PutBE32 (uint32 (key ))
1240+ offset = len (e .Get ())
1241+ for i := 0 ; i < bitmapSize ; i ++ {
1242+ e .PutByte (byte (0 ))
1243+ }
1244+ }
1245+ e .B [offset + idx1 ] |= 1 << uint (7 - idx2 )
1246+ }
1247+ }
0 commit comments