@@ -754,33 +754,27 @@ type baseDeltaPostings struct {
754754 base uint64
755755 size int
756756 idx int
757+ i int
757758 cur uint64
758759 mask uint64
759760 prel int
760761}
761762
762763func newBaseDeltaPostings (bstream []byte , base uint64 , width int , size int ) * baseDeltaPostings {
763- return & baseDeltaPostings {bs : bstream , width : width , base : base , size : size , cur : uint64 (base ), mask : (uint64 (1 ) << (uint64 (width ) << 3 )) - 1 , prel : 8 - width }
764+ return & baseDeltaPostings {bs : bstream , width : width , base : base , size : size , idx : 8 - width , cur : uint64 (base ), mask : (uint64 (1 ) << (uint64 (width ) << 3 )) - 1 , prel : 8 - width }
764765}
765766
766767func (it * baseDeltaPostings ) At () uint64 {
767768 return it .cur
768769}
769770
770771func (it * baseDeltaPostings ) Next () bool {
771- if it .idx >= it .size * it . width {
772+ if it .i >= it .size {
772773 return false
773774 }
774- if it .idx - it .prel >= 0 {
775- it .cur = binary .BigEndian .Uint64 (it .bs [it .idx - it .prel :])& it .mask + it .base
776- } else {
777- it .cur = 0
778- for i := 0 ; i < it .width ; i ++ {
779- it .cur = (it .cur << 8 ) | uint64 (it .bs [it .idx + i ])
780- }
781- it .cur += it .base
782- }
775+ it .cur = binary .BigEndian .Uint64 (it .bs [it .idx - it .prel :])& it .mask + it .base
783776 it .idx += it .width
777+ it .i += 1
784778 return true
785779}
786780
@@ -789,8 +783,7 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
789783 return true
790784 }
791785
792- num := it .size - it .idx / it .width
793- // Do binary search between current position and end.
786+ num := it .size - it .i
794787 x -= it .base
795788 i := sort .Search (num , func (i int ) bool {
796789 return binary .BigEndian .Uint64 (it .bs [it .idx + i * it .width - it .prel :])& it .mask >= x
@@ -799,18 +792,18 @@ func (it *baseDeltaPostings) Seek(x uint64) bool {
799792 it .idx += i * it .width
800793 it .cur = it .base + (binary .BigEndian .Uint64 (it .bs [it .idx - it .prel :])& it .mask )
801794 it .idx += it .width
795+ it .i += i + 1
802796 return true
803797 }
804- it .idx += i * it .width
805798 return false
806799}
807800
808801func (it * baseDeltaPostings ) Err () error {
809802 return nil
810803}
811804
812- const deltaBlockSize = 4096
813- const deltaBlockBits = 12
805+ const deltaBlockSize = 32
806+ const deltaBlockBits = 5
814807
815808// Block format(delta is to the previous value).
816809// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
@@ -1363,15 +1356,15 @@ func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
13631356}
13641357
13651358var rbpMasks []byte
1366- var rbpValueMask uint32
1359+ var rbpValueMask uint64
13671360var rbpValueSize int
13681361var rbpBitmapSize int
13691362
13701363func init () {
13711364 for i := 7 ; i >= 0 ; i -- {
13721365 rbpMasks = append (rbpMasks , byte (1 << uint (i )))
13731366 }
1374- rbpValueMask = uint32 (( 1 << uint (bitmapBits )) - 1 )
1367+ rbpValueMask = ( uint64 ( 1 ) << uint (bitmapBits )) - 1
13751368 rbpBitmapSize = 1 << (bitmapBits - 3 )
13761369 rbpValueSize = bitmapBits >> 3
13771370}
@@ -1392,7 +1385,7 @@ type roaringBitmapPostings struct {
13921385 idx1 int // The offset in the bitmap in current block in bytes.
13931386 idx2 int // The offset in the current byte in the bitmap ([0,8)).
13941387 footerAddr int
1395- key uint32
1388+ key uint64
13961389 numBlock int
13971390 blockIdx int
13981391 blockType byte
@@ -1419,7 +1412,7 @@ func (it *roaringBitmapPostings) Next() bool {
14191412 if it .inside { // Already entered the block.
14201413 if it .blockType == 0 { // Type array.
14211414 if it .idx < it .nextBlock {
1422- it .cur = uint64 ( it .key ) | uint64 (it .bs [it .idx ])
1415+ it .cur = it .key | uint64 (it .bs [it .idx ])
14231416 it .idx += 1
14241417 return true
14251418 }
@@ -1429,7 +1422,7 @@ func (it *roaringBitmapPostings) Next() bool {
14291422 }
14301423 for it .idx1 < rbpBitmapSize {
14311424 if it .bs [it .idx + it .idx1 ]& rbpMasks [it .idx2 ] != 0 {
1432- it .cur = uint64 ( it .key ) | uint64 ((it .idx1 << 3 )+ it .idx2 )
1425+ it .cur = it .key | uint64 ((it .idx1 << 3 )+ it .idx2 )
14331426 it .idx2 += 1
14341427 if it .idx2 == 8 {
14351428 it .idx1 += 1
@@ -1454,7 +1447,7 @@ func (it *roaringBitmapPostings) Next() bool {
14541447 } else { // Not yet entered the block.
14551448 if it .idx < it .footerAddr {
14561449 val , size := binary .Uvarint (it .bs [it .idx :])
1457- it .key = uint32 ( val ) << bitmapBits
1450+ it .key = val << bitmapBits
14581451 it .idx += size
14591452 it .blockType = it .bs [it .idx ]
14601453 it .idx += 1
@@ -1478,7 +1471,7 @@ func (it *roaringBitmapPostings) Next() bool {
14781471}
14791472
14801473func (it * roaringBitmapPostings ) seekInBlock (x uint64 ) bool {
1481- curVal := byte (uint32 ( x ) & rbpValueMask )
1474+ curVal := byte (x & rbpValueMask )
14821475 if it .blockType == 0 {
14831476 // If encoding with array, binary search.
14841477 num := (it .nextBlock - it .idx )
@@ -1492,15 +1485,15 @@ func (it *roaringBitmapPostings) seekInBlock(x uint64) bool {
14921485 return it .Next ()
14931486 }
14941487
1495- it .cur = uint64 ( it .key ) | uint64 (it .bs [it .idx + j ])
1488+ it .cur = it .key | uint64 (it .bs [it .idx + j ])
14961489 it .idx += j + 1
14971490 return true
14981491 } else {
14991492 // If encoding with bitmap, go to the exact location of value of x.
15001493 it .idx1 = int (curVal >> 3 )
15011494 it .idx2 = int (curVal % 8 )
15021495 if it .bs [it .idx + it .idx1 ]& rbpMasks [it .idx2 ] != 0 { // Found x.
1503- it .cur = uint64 ( it .key ) | uint64 (it .idx1 * 8 + it .idx2 )
1496+ it .cur = it .key | uint64 (it .idx1 * 8 + it .idx2 )
15041497 it .idx2 += 1
15051498 if it .idx2 == 8 {
15061499 it .idx1 += 1
@@ -1522,7 +1515,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15221515 if it .cur >= x {
15231516 return true
15241517 }
1525- curKey := uint32 ( x ) >> bitmapBits
1518+ curKey := x >> bitmapBits
15261519 if it .inside && it .key >> bitmapBits == curKey {
15271520 // Fast path.
15281521 return it .seekInBlock (x )
@@ -1533,7 +1526,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15331526 // off := it.readBytes(it.footerAddr+1+(it.blockIdx+i)*it.width)
15341527 off := int (binary .BigEndian .Uint32 (it .bs [it .footerAddr + 1 + (it .blockIdx + i )* it .width - 4 + it .width :]) & it .addrMask )
15351528 k , _ := binary .Uvarint (it .bs [off :])
1536- return uint32 ( k ) >= curKey
1529+ return k >= curKey
15371530 // return binary.BigEndian.Uint32(it.bs[off:]) > curKey
15381531 })
15391532 if i == it .numBlock - it .blockIdx {
@@ -1553,7 +1546,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15531546
15541547 val , size := binary .Uvarint (it .bs [it .idx :])
15551548 // If the key of current block doesn't match, directly go to the next block.
1556- if uint32 ( val ) != curKey {
1549+ if val != curKey {
15571550 if it .blockIdx == it .numBlock - 1 {
15581551 it .idx = it .footerAddr
15591552 return false
@@ -1564,7 +1557,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15641557 it .idx = int (binary .BigEndian .Uint32 (it .bs [it .footerAddr + 1 + it .blockIdx * it .width - 4 + it .width :]) & it .addrMask )
15651558 // it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+it.blockIdx*4:]))
15661559 val , size := binary .Uvarint (it .bs [it .idx :])
1567- it .key = uint32 ( val ) << bitmapBits
1560+ it .key = val << bitmapBits
15681561 it .idx += size
15691562 it .blockType = it .bs [it .idx ]
15701563 it .idx += 1
@@ -1582,7 +1575,7 @@ func (it *roaringBitmapPostings) Seek(x uint64) bool {
15821575 return it .Next ()
15831576 }
15841577 }
1585- it .key = uint32 ( val ) << bitmapBits
1578+ it .key = val << bitmapBits
15861579 it .idx += size
15871580 it .blockType = it .bs [it .idx ]
15881581 it .idx += 1
@@ -1662,10 +1655,10 @@ func (it *roaringBitmapPostings) readBits(offset int) uint64 {
16621655 return u
16631656}
16641657
1665- func writeRoaringBitmapBlock (e * encoding.Encbuf , vals []int , c [] byte , key uint32 , thres int , bitmapSize int , valueSize int ) {
1666- var offset int // The starting offset of the bitmap of each block.
1667- var idx1 int // The offset in the bitmap in current block in bytes.
1668- var idx2 int // The offset in the current byte in the bitmap ([0,8)).
1658+ func writeRoaringBitmapBlock (e * encoding.Encbuf , vals []uint32 , key uint32 , thres int , bitmapSize int , valueSize int ) {
1659+ var offset int // The starting offset of the bitmap of each block.
1660+ var idx1 uint32 // The offset in the bitmap in current block in bytes.
1661+ var idx2 uint32 // The offset in the current byte in the bitmap ([0,8)).
16691662 e .PutUvarint32 (key )
16701663 if len (vals ) > thres {
16711664 e .PutByte (byte (1 ))
@@ -1676,19 +1669,48 @@ func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []int, c []byte, key uint3
16761669 for _ , val := range vals {
16771670 idx1 = val >> 3
16781671 idx2 = val % 8
1679- e .B [offset + idx1 ] |= 1 << uint (7 - idx2 )
1672+ e .B [uint32 ( offset ) + idx1 ] |= 1 << uint (7 - idx2 )
16801673 }
16811674 } else {
1675+ c := make ([]byte , 4 )
16821676 e .PutByte (byte (0 ))
16831677 for _ , val := range vals {
1684- binary .BigEndian .PutUint32 (c [:], uint32 ( val ) )
1678+ binary .BigEndian .PutUint32 (c [:], val )
16851679 for i := 4 - valueSize ; i < 4 ; i ++ {
16861680 e .PutByte (c [i ])
16871681 }
16881682 }
16891683 }
16901684}
16911685
1686+ func writeRoaringBitmapBlock64 (e * encoding.Encbuf , vals []uint64 , key uint64 , thres int , bitmapSize int , valueSize int ) {
1687+ var offset int // The starting offset of the bitmap of each block.
1688+ var idx1 uint64 // The offset in the bitmap in current block in bytes.
1689+ var idx2 uint64 // The offset in the current byte in the bitmap ([0,8)).
1690+ e .PutUvarint64 (key )
1691+ if len (vals ) > thres {
1692+ e .PutByte (byte (1 ))
1693+ offset = len (e .Get ())
1694+ for i := 0 ; i < bitmapSize ; i ++ {
1695+ e .PutByte (byte (0 ))
1696+ }
1697+ for _ , val := range vals {
1698+ idx1 = val >> 3
1699+ idx2 = val % 8
1700+ e .B [uint64 (offset )+ idx1 ] |= 1 << uint (7 - idx2 )
1701+ }
1702+ } else {
1703+ c := make ([]byte , 8 )
1704+ e .PutByte (byte (0 ))
1705+ for _ , val := range vals {
1706+ binary .BigEndian .PutUint64 (c [:], val )
1707+ for i := 8 - valueSize ; i < 8 ; i ++ {
1708+ e .PutByte (c [i ])
1709+ }
1710+ }
1711+ }
1712+ }
1713+
16921714func putBytes (e * encoding.Encbuf , val uint32 , width int ) {
16931715 for i := width - 1 ; i >= 0 ; i -- {
16941716 e .PutByte (byte ((val >> (8 * uint (i )) & 0xff )))
@@ -1705,8 +1727,7 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17051727 var curVal uint32
17061728 var idx int // Index of current element in arr.
17071729 var startingOffs []uint32 // The starting offsets of each block.
1708- var vals []int // The converted values in the current block.
1709- c := make ([]byte , 4 )
1730+ var vals []uint32 // The converted values in the current block.
17101731 startOff := len (e .Get ())
17111732 e .PutBE32 (0 ) // Footer starting offset.
17121733 for idx < len (arr ) {
@@ -1716,22 +1737,22 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17161737 // Move to next block.
17171738 if idx != 0 {
17181739 startingOffs = append (startingOffs , uint32 (len (e .B )))
1719- writeRoaringBitmapBlock (e , vals , c , key , thres , bitmapSize , valueSize )
1740+ writeRoaringBitmapBlock (e , vals , key , thres , bitmapSize , valueSize )
17201741 vals = vals [:0 ]
17211742 }
17221743 key = curKey
17231744 }
1724- vals = append (vals , int ( curVal ) )
1745+ vals = append (vals , curVal )
17251746 idx += 1
17261747 }
17271748 startingOffs = append (startingOffs , uint32 (len (e .B )))
1728- writeRoaringBitmapBlock (e , vals , c , key , thres , bitmapSize , valueSize )
1749+ writeRoaringBitmapBlock (e , vals , key , thres , bitmapSize , valueSize )
17291750
17301751 // Put footer starting offset.
17311752 binary .BigEndian .PutUint32 (e .B [startOff :], uint32 (len (e .B )- 4 - startOff ))
17321753 width := bits .Len32 (startingOffs [len (startingOffs )- 1 ] - 4 - uint32 (startOff ))
17331754 if width == 0 {
1734- // key 0 will result in o width.
1755+ // key 0 will result in 0 width.
17351756 width += 1
17361757 }
17371758 // e.PutBE32(uint32(len(startingOffs))) // Number of blocks.
@@ -1749,3 +1770,48 @@ func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) {
17491770 // e.PutBE32(off - 4 - uint32(startOff))
17501771 // }
17511772}
1773+
1774+ func writeRoaringBitmapPostings64 (e * encoding.Encbuf , arr []uint64 ) {
1775+ key := uint64 (0xffffffffffffffff ) // The initial key should be unique.
1776+ bitmapSize := 1 << (bitmapBits - 3 ) // Bitmap size in bytes.
1777+ valueSize := bitmapBits >> 3 // The size of the element in array in bytes.
1778+ thres := (1 << bitmapBits ) / bitmapBits // Threshold of number of elements in the block for choosing encoding type.
1779+ mask := (uint64 (1 ) << uint (bitmapBits )) - 1 // Mask for the elements in the block.
1780+ var curKey uint64
1781+ var curVal uint64
1782+ var idx int // Index of current element in arr.
1783+ var startingOffs []uint32 // The starting offsets of each block.
1784+ var vals []uint64 // The converted values in the current block.
1785+ startOff := len (e .Get ())
1786+ e .PutBE32 (0 ) // Footer starting offset.
1787+ for idx < len (arr ) {
1788+ curKey = arr [idx ] >> bitmapBits // Key of block.
1789+ curVal = arr [idx ] & mask // Value inside block.
1790+ if curKey != key {
1791+ // Move to next block.
1792+ if idx != 0 {
1793+ startingOffs = append (startingOffs , uint32 (len (e .B )))
1794+ writeRoaringBitmapBlock64 (e , vals , key , thres , bitmapSize , valueSize )
1795+ vals = vals [:0 ]
1796+ }
1797+ key = curKey
1798+ }
1799+ vals = append (vals , curVal )
1800+ idx += 1
1801+ }
1802+ startingOffs = append (startingOffs , uint32 (len (e .B )))
1803+ writeRoaringBitmapBlock64 (e , vals , key , thres , bitmapSize , valueSize )
1804+
1805+ // Put footer starting offset.
1806+ binary .BigEndian .PutUint32 (e .B [startOff :], uint32 (len (e .B )- 4 - startOff ))
1807+ width := bits .Len32 (startingOffs [len (startingOffs )- 1 ] - 4 - uint32 (startOff ))
1808+ if width == 0 {
1809+ // key 0 will result in 0 width.
1810+ width += 1
1811+ }
1812+
1813+ e .PutByte (byte ((width + 7 ) / 8 ))
1814+ for _ , off := range startingOffs {
1815+ putBytes (e , off - 4 - uint32 (startOff ), (width + 7 )/ 8 )
1816+ }
1817+ }
0 commit comments