Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit bad269a

Browse files
committed
postings compression exploration
Signed-off-by: naivewong <[email protected]>
1 parent 6ab4830 commit bad269a

File tree

4 files changed

+647
-7
lines changed

4 files changed

+647
-7
lines changed

encoding/encoding.go

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,16 @@ var (
2929

3030
// Encbuf is a helper type to populate a byte slice with various types.
3131
type Encbuf struct {
32-
B []byte
33-
C [binary.MaxVarintLen64]byte
32+
B []byte
33+
C [binary.MaxVarintLen64]byte
34+
Count uint8
35+
}
36+
37+
func (e *Encbuf) Reset() {
38+
e.B = e.B[:0]
39+
e.Count = 0
3440
}
3541

36-
func (e *Encbuf) Reset() { e.B = e.B[:0] }
3742
func (e *Encbuf) Get() []byte { return e.B }
3843
func (e *Encbuf) Len() int { return len(e.B) }
3944

@@ -82,6 +87,55 @@ func (e *Encbuf) PutHash(h hash.Hash) {
8287
e.B = h.Sum(e.B)
8388
}
8489

90+
type bit bool
91+
92+
func (e *Encbuf) putBit(bit bit) {
93+
if e.Count == 0 {
94+
e.B = append(e.B, 0)
95+
e.Count = 8
96+
}
97+
98+
i := len(e.B) - 1
99+
100+
if bit {
101+
e.B[i] |= 1 << (e.Count - 1)
102+
}
103+
104+
e.Count--
105+
}
106+
107+
func (e *Encbuf) putByte(byt byte) {
108+
if e.Count == 0 {
109+
e.B = append(e.B, 0)
110+
e.Count = 8
111+
}
112+
113+
i := len(e.B) - 1
114+
115+
// fill up e.B with e.Count bits from byt
116+
e.B[i] |= byt >> (8 - e.Count)
117+
118+
e.B = append(e.B, 0)
119+
i++
120+
e.B[i] = byt << e.Count
121+
}
122+
123+
func (e *Encbuf) PutBits(u uint64, nbits int) {
124+
u <<= (64 - uint(nbits))
125+
for nbits >= 8 {
126+
byt := byte(u >> 56)
127+
e.putByte(byt)
128+
u <<= 8
129+
nbits -= 8
130+
}
131+
132+
for nbits > 0 {
133+
e.putBit((u >> 63) == 1)
134+
u <<= 1
135+
nbits--
136+
}
137+
}
138+
85139
// Decbuf provides safe methods to extract data from a byte slice. It does all
86140
// necessary bounds checking and advancing of the byte slice.
87141
// Several datums can be extracted without checking for errors. However, before using

index/index.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"io"
2222
"io/ioutil"
2323
"math"
24+
"math/bits"
2425
"os"
2526
"path/filepath"
2627
"sort"
@@ -522,9 +523,24 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
522523
w.buf2.Reset()
523524
w.buf2.PutBE32int(len(refs))
524525

525-
for _, r := range refs {
526-
w.buf2.PutBE32(r)
526+
switch postingsType {
527+
case 1:
528+
for _, r := range refs {
529+
w.buf2.PutBE32(r)
530+
}
531+
case 2:
532+
// The base.
533+
w.buf2.PutUvarint32(refs[0])
534+
// The width.
535+
width := bits.Len32(uint32(refs[len(refs)-1]-refs[0]))
536+
w.buf2.PutByte(byte(width))
537+
for _, r := range refs {
538+
w.buf2.PutBits(uint64(r-refs[0]), width)
539+
}
540+
case 3:
541+
writeDeltaBlockPostings(&w.buf2, refs)
527542
}
543+
528544
w.uint32s = refs
529545

530546
w.buf1.Reset()
@@ -1028,8 +1044,21 @@ type Decoder struct {
10281044
func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10291045
d := encoding.Decbuf{B: b}
10301046
n := d.Be32int()
1031-
l := d.Get()
1032-
return n, newBigEndianPostings(l), d.Err()
1047+
switch postingsType {
1048+
case 1:
1049+
l := d.Get()
1050+
return n, newBigEndianPostings(l), d.Err()
1051+
case 2:
1052+
base := uint32(d.Uvarint())
1053+
width := int(d.Byte())
1054+
l := d.Get()
1055+
return n, newBaseDeltaPostings(l, base, width, n), d.Err()
1056+
case 3:
1057+
l := d.Get()
1058+
return n, newDeltaBlockPostings(l, n), d.Err()
1059+
default:
1060+
return n, EmptyPostings(), d.Err()
1061+
}
10331062
}
10341063

10351064
// Series decodes a series entry from the given byte slice into lset and chks.

0 commit comments

Comments
 (0)