diff --git a/encoding/encoding.go b/encoding/encoding.go index a732a604..6658c605 100644 --- a/encoding/encoding.go +++ b/encoding/encoding.go @@ -29,11 +29,16 @@ var ( // Encbuf is a helper type to populate a byte slice with various types. type Encbuf struct { - B []byte - C [binary.MaxVarintLen64]byte + B []byte + C [binary.MaxVarintLen64]byte + Count uint8 +} + +func (e *Encbuf) Reset() { + e.B = e.B[:0] + e.Count = 0 } -func (e *Encbuf) Reset() { e.B = e.B[:0] } func (e *Encbuf) Get() []byte { return e.B } func (e *Encbuf) Len() int { return len(e.B) } @@ -82,6 +87,55 @@ func (e *Encbuf) PutHash(h hash.Hash) { e.B = h.Sum(e.B) } +type bit bool + +func (e *Encbuf) putBit(bit bit) { + if e.Count == 0 { + e.B = append(e.B, 0) + e.Count = 8 + } + + i := len(e.B) - 1 + + if bit { + e.B[i] |= 1 << (e.Count - 1) + } + + e.Count-- +} + +func (e *Encbuf) putByte(byt byte) { + if e.Count == 0 { + e.B = append(e.B, 0) + e.Count = 8 + } + + i := len(e.B) - 1 + + // fill up e.B with e.Count bits from byt + e.B[i] |= byt >> (8 - e.Count) + + e.B = append(e.B, 0) + i++ + e.B[i] = byt << e.Count +} + +func (e *Encbuf) PutBits(u uint64, nbits int) { + u <<= (64 - uint(nbits)) + for nbits >= 8 { + byt := byte(u >> 56) + e.putByte(byt) + u <<= 8 + nbits -= 8 + } + + for nbits > 0 { + e.putBit((u >> 63) == 1) + u <<= 1 + nbits-- + } +} + // Decbuf provides safe methods to extract data from a byte slice. It does all // necessary bounds checking and advancing of the byte slice. // Several datums can be extracted without checking for errors. However, before using diff --git a/index/index.go b/index/index.go index 6b333fa5..aab9c37f 100644 --- a/index/index.go +++ b/index/index.go @@ -21,6 +21,7 @@ import ( "io" "io/ioutil" "math" + "math/bits" "os" "path/filepath" "sort" @@ -136,6 +137,10 @@ type Writer struct { Version int } +func (w *Writer) GetP() uint64 { + return w.pos +} + // TOC represents index Table Of Content that states where each section of index starts. type TOC struct { Symbols uint64 @@ -522,9 +527,106 @@ func (w *Writer) WritePostings(name, value string, it Postings) error { w.buf2.Reset() w.buf2.PutBE32int(len(refs)) + switch postingsType { + case 1: + for _, r := range refs { + w.buf2.PutBE32(r) + } + case 2: + // The base. + w.buf2.PutUvarint32(refs[0]) + // The width. + width := (bits.Len32(refs[len(refs)-1] - refs[0]) + 7) >> 3 + if width == 0 { + width = 1 + } + w.buf2.PutByte(byte(width)) + for i := 0; i < 8 - width; i++ { + w.buf2.PutByte(0) + } + for i := 0; i < len(refs); i++ { + for j := width - 1; j >= 0; j-- { + w.buf2.B = append(w.buf2.B, byte(((refs[i]-refs[0])>>(8*uint(j))&0xff))) + } + } + case 3: + writeDeltaBlockPostings(&w.buf2, refs) + case 4: + writeBaseDeltaBlockPostings(&w.buf2, refs) + case 5: + writeBitmapPostings(&w.buf2, refs) + case 6: + writeRoaringBitmapPostings(&w.buf2, refs) + // if len(refs) < 32 { + // w.buf2.PutByte(0) + // for _, r := range refs { + // w.buf2.PutBE32(r) + // } + // } else { + // w.buf2.PutByte(1) + // writeRoaringBitmapPostings(&w.buf2, refs) + // } + case 7: + writeBaseDeltaBlock16Postings(&w.buf2, refs) + case 8: + writeBaseDeltaBlock16PostingsV2(&w.buf2, refs) + } + + w.uint32s = refs + + w.buf1.Reset() + w.buf1.PutBE32int(w.buf2.Len()) + + w.buf2.PutHash(w.crc32) + + err := w.write(w.buf1.Get(), w.buf2.Get()) + return errors.Wrap(err, "write postings") +} + +func (w *Writer) WritePostings1(name, value string, it Postings) (uint64, error) { + if err := w.ensureStage(idxStagePostings); err != nil { + return 0, errors.Wrap(err, "ensure stage") + } + + start := w.pos + + // Align beginning to 4 bytes for more efficient postings list scans. + if err := w.addPadding(4); err != nil { + return 0, err + } + + w.postings = append(w.postings, hashEntry{ + keys: []string{name, value}, + offset: w.pos, + }) + + // Order of the references in the postings list does not imply order + // of the series references within the persisted block they are mapped to. + // We have to sort the new references again. + refs := w.uint32s[:0] + + for it.Next() { + offset, ok := w.seriesOffsets[it.At()] + if !ok { + return 0, errors.Errorf("%p series for reference %d not found", w, it.At()) + } + if offset > (1<<32)-1 { + return 0, errors.Errorf("series offset %d exceeds 4 bytes", offset) + } + refs = append(refs, uint32(offset)) + } + if err := it.Err(); err != nil { + return 0, err + } + sort.Sort(uint32slice(refs)) + + w.buf2.Reset() + w.buf2.PutBE32int(len(refs)) + for _, r := range refs { w.buf2.PutBE32(r) } + w.uint32s = refs w.buf1.Reset() @@ -533,7 +635,128 @@ func (w *Writer) WritePostings(name, value string, it Postings) error { w.buf2.PutHash(w.crc32) err := w.write(w.buf1.Get(), w.buf2.Get()) - return errors.Wrap(err, "write postings") + return w.pos - start, errors.Wrap(err, "write postings") +} + +func (w *Writer) WritePostings2(name, value string, it Postings) (uint64, int, error) { + if err := w.ensureStage(idxStagePostings); err != nil { + return 0, 0, errors.Wrap(err, "ensure stage") + } + + // Align beginning to 4 bytes for more efficient postings list scans. + // if err := w.addPadding(4); err != nil { + // return err + // } + + start := w.pos + + w.postings = append(w.postings, hashEntry{ + keys: []string{name, value}, + offset: w.pos, + }) + + // Order of the references in the postings list does not imply order + // of the series references within the persisted block they are mapped to. + // We have to sort the new references again. + refs := w.uint32s[:0] + + for it.Next() { + offset, ok := w.seriesOffsets[it.At()] + if !ok { + return 0, 0, errors.Errorf("%p series for reference %d not found", w, it.At()) + } + if offset > (1<<32)-1 { + return 0, 0, errors.Errorf("series offset %d exceeds 4 bytes", offset) + } + refs = append(refs, uint32(offset)) + } + if err := it.Err(); err != nil { + return 0, 0, err + } + sort.Sort(uint32slice(refs)) + + w.buf2.Reset() + w.buf2.PutBE32int(len(refs)) + + n := writeBaseDeltaBlock16Postings(&w.buf2, refs) + + w.uint32s = refs + + w.buf1.Reset() + w.buf1.PutBE32int(w.buf2.Len()) + + w.buf2.PutHash(w.crc32) + + err := w.write(w.buf1.Get(), w.buf2.Get()) + return w.pos - start, n, errors.Wrap(err, "write postings") +} + +func (w *Writer) WritePostings3(name, value string, it Postings) (uint64, error) { + if err := w.ensureStage(idxStagePostings); err != nil { + return 0, errors.Wrap(err, "ensure stage") + } + + // Align beginning to 4 bytes for more efficient postings list scans. + // if err := w.addPadding(4); err != nil { + // return err + // } + + start := w.pos + + w.postings = append(w.postings, hashEntry{ + keys: []string{name, value}, + offset: w.pos, + }) + + // Order of the references in the postings list does not imply order + // of the series references within the persisted block they are mapped to. + // We have to sort the new references again. + refs := w.uint32s[:0] + + for it.Next() { + offset, ok := w.seriesOffsets[it.At()] + if !ok { + return 0, errors.Errorf("%p series for reference %d not found", w, it.At()) + } + if offset > (1<<32)-1 { + return 0, errors.Errorf("series offset %d exceeds 4 bytes", offset) + } + refs = append(refs, uint32(offset)) + } + if err := it.Err(); err != nil { + return 0, err + } + sort.Sort(uint32slice(refs)) + + w.buf2.Reset() + w.buf2.PutBE32int(len(refs)) + + // The base. + w.buf2.PutUvarint32(refs[0]) + // The width. + width := (bits.Len32(refs[len(refs)-1] - refs[0]) + 7) >> 3 + if width == 0 { + width = 1 + } + w.buf2.PutByte(byte(width)) + for i := 0; i < 8 - width; i++ { + w.buf2.PutByte(0) + } + for i := 0; i < len(refs); i++ { + for j := width - 1; j >= 0; j-- { + w.buf2.B = append(w.buf2.B, byte(((refs[i]-refs[0])>>(8*uint(j))&0xff))) + } + } + + w.uint32s = refs + + w.buf1.Reset() + w.buf1.PutBE32int(w.buf2.Len()) + + w.buf2.PutHash(w.crc32) + + err := w.write(w.buf1.Get(), w.buf2.Get()) + return w.pos - start, errors.Wrap(err, "write postings") } type uint32slice []uint32 @@ -894,6 +1117,26 @@ func (r *Reader) Series(id uint64, lbls *labels.Labels, chks *[]chunks.Meta) err // Postings returns a postings list for the given label pair. func (r *Reader) Postings(name, value string) (Postings, error) { + e, ok := r.postings[name] + if !ok { + return EmptyPostings(), errors.Errorf("cannot find name") + } + off, ok := e[value] + if !ok { + return EmptyPostings(), errors.Errorf("cannot find value") + } + d := encoding.NewDecbufAt(r.b, int(off), castagnoliTable) + if d.Err() != nil { + return nil, errors.Wrap(d.Err(), "get postings entry") + } + _, p, err := r.dec.Postings(d.Get()) + if err != nil { + return nil, errors.Wrap(err, "decode postings") + } + return p, nil +} + +func (r *Reader) Postings1(name, value string) (Postings, error) { e, ok := r.postings[name] if !ok { return EmptyPostings(), nil @@ -906,7 +1149,47 @@ func (r *Reader) Postings(name, value string) (Postings, error) { if d.Err() != nil { return nil, errors.Wrap(d.Err(), "get postings entry") } - _, p, err := r.dec.Postings(d.Get()) + _, p, err := r.dec.Postings1(d.Get()) + if err != nil { + return nil, errors.Wrap(err, "decode postings") + } + return p, nil +} + +func (r *Reader) Postings2(name, value string) (Postings, error) { + e, ok := r.postings[name] + if !ok { + return EmptyPostings(), nil + } + off, ok := e[value] + if !ok { + return EmptyPostings(), nil + } + d := encoding.NewDecbufAt(r.b, int(off), castagnoliTable) + if d.Err() != nil { + return nil, errors.Wrap(d.Err(), "get postings entry") + } + _, p, err := r.dec.Postings2(d.Get()) + if err != nil { + return nil, errors.Wrap(err, "decode postings") + } + return p, nil +} + +func (r *Reader) Postings3(name, value string) (Postings, error) { + e, ok := r.postings[name] + if !ok { + return EmptyPostings(), nil + } + off, ok := e[value] + if !ok { + return EmptyPostings(), nil + } + d := encoding.NewDecbufAt(r.b, int(off), castagnoliTable) + if d.Err() != nil { + return nil, errors.Wrap(d.Err(), "get postings entry") + } + _, p, err := r.dec.Postings3(d.Get()) if err != nil { return nil, errors.Wrap(err, "decode postings") } @@ -1026,12 +1309,70 @@ type Decoder struct { // Postings returns a postings list for b and its number of elements. func (dec *Decoder) Postings(b []byte) (int, Postings, error) { + d := encoding.Decbuf{B: b} + n := d.Be32int() + switch postingsType { + case 1: + l := d.Get() + return n, newBigEndianPostings(l), d.Err() + case 2: + base := uint64(d.Uvarint()) + width := int(d.Byte()) + l := d.Get() + return n, newBaseDeltaPostings(l, base, width, n), d.Err() + case 3: + l := d.Get() + return n, newDeltaBlockPostings(l, n), d.Err() + case 4: + l := d.Get() + return n, newBaseDeltaBlockPostings(l), d.Err() + case 5: + l := d.Get() + return n, newBitmapPostings(l), d.Err() + case 6: + l := d.Get() + return n, newRoaringBitmapPostings(l), d.Err() + // typ := d.Byte() + // l := d.Get() + // if typ == 0 { + // return n, newBigEndianPostings(l), d.Err() + // } else { + // return n, newRoaringBitmapPostings(l), d.Err() + // } + case 7: + l := d.Get() + return n, newBaseDeltaBlock16Postings(l), d.Err() + case 8: + l := d.Get() + return n, newBaseDeltaBlock16PostingsV2(l), d.Err() + default: + return n, EmptyPostings(), d.Err() + } +} + +func (dec *Decoder) Postings1(b []byte) (int, Postings, error) { d := encoding.Decbuf{B: b} n := d.Be32int() l := d.Get() return n, newBigEndianPostings(l), d.Err() } +func (dec *Decoder) Postings2(b []byte) (int, Postings, error) { + d := encoding.Decbuf{B: b} + n := d.Be32int() + l := d.Get() + return n, newBaseDeltaBlock16Postings(l), d.Err() +} + +func (dec *Decoder) Postings3(b []byte) (int, Postings, error) { + d := encoding.Decbuf{B: b} + n := d.Be32int() + base := uint64(d.Uvarint()) + width := int(d.Byte()) + l := d.Get() + return n, newBaseDeltaPostings(l, base, width, n), d.Err() +} + // Series decodes a series entry from the given byte slice into lset and chks. func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]chunks.Meta) error { *lbls = (*lbls)[:0] diff --git a/index/index_test.go b/index/index_test.go index 43b737c7..682ed535 100644 --- a/index/index_test.go +++ b/index/index_test.go @@ -14,6 +14,7 @@ package index import ( + "fmt" "io/ioutil" "math/rand" "os" @@ -25,6 +26,7 @@ import ( "github.com/prometheus/tsdb/chunkenc" "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/encoding" + "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/testutil" ) @@ -238,6 +240,257 @@ func TestIndexRW_Postings(t *testing.T) { testutil.Ok(t, ir.Close()) } +func rewriteIndex(inputFilePath, outputFilePath string) int { + var ( + labelsBuf labels.Labels + chunksBuf []chunks.Meta + err error + apkName, apkValue = AllPostingsKey() + values = map[string]map[string]struct{}{} + lenCount = map[int]int{} + larger1 = map[int]map[int]int{} + // larger2 = map[int]map[int]int{} + // count = uint64(0) + ) + + indexr, err := NewFileReader(inputFilePath) + if err != nil { + fmt.Println("cannot create index reader") + fmt.Fprintln(os.Stderr, err) + return 1 + } + defer indexr.Close() + + // Rename the symbols. + originalSymbols, err := indexr.Symbols() + if err != nil { + fmt.Println("index reader symbols") + fmt.Fprintln(os.Stderr, err) + return 1 + } + + indexw, err := NewWriter(outputFilePath) + if err != nil { + fmt.Println("index writer") + fmt.Fprintln(os.Stderr, err) + return 1 + } + defer indexw.Close() + + // Write symbols. + if err := indexw.AddSymbols(originalSymbols); err != nil { + fmt.Println("index writer symbols") + fmt.Fprintln(os.Stderr, err) + return 1 + } + + // Write Series. + posts, err := indexr.Postings1(apkName, apkValue) + if err != nil { + fmt.Println("index reader postings") + fmt.Fprintln(os.Stderr, err) + return 1 + } + + for posts.Next() { + p := posts.At() + labelsBuf = labelsBuf[:0] + chunksBuf = chunksBuf[:0] + + if err := indexr.Series(p, &labelsBuf, &chunksBuf); err != nil { + fmt.Println("index reader series") + fmt.Fprintln(os.Stderr, err) + return 1 + } + + // Recording the original labels values which is needed + // to fetch and write the postings. + for _, l := range labelsBuf { + valset, ok := values[l.Name] + if !ok { + valset = map[string]struct{}{} + values[l.Name] = valset + } + valset[l.Value] = struct{}{} + } + + if err := indexw.AddSeries(p, labelsBuf, chunksBuf...); err != nil { + fmt.Println("index writer series") + fmt.Fprintln(os.Stderr, err) + return 1 + } + } + + names := []string{} + labelValuesBuf := []string{} + for n, v := range values { + labelValuesBuf = labelValuesBuf[:0] + names = append(names, n) + + for val := range v { + labelValuesBuf = append(labelValuesBuf, val) + } + if err := indexw.WriteLabelIndex([]string{n}, labelValuesBuf); err != nil { + return 1 + } + } + names = append(names, apkName) + values[apkName] = map[string]struct{}{apkValue: struct{}{}} + sort.Strings(names) + + for _, n := range names { + labelValuesBuf = labelValuesBuf[:0] + for v := range values[n] { + labelValuesBuf = append(labelValuesBuf, v) + } + sort.Strings(labelValuesBuf) + + for _, v := range labelValuesBuf { + posts, err := indexr.Postings1(n, v) + if err != nil { + return 1 + } + arr, _ := ExpandPostings(posts) + if _, ok := lenCount[len(arr)]; ok { + lenCount[len(arr)] += 1 + } else { + lenCount[len(arr)] = 1 + } + posts, _ = indexr.Postings1(n, v) + + // if len(arr) < 512 { + l, err := indexw.WritePostings3(n, v, posts) + if err != nil { + return 1 + } + if l > uint64(len(arr) * 4 + 12) { + if _, ok := larger1[len(arr)]; !ok { + larger1[len(arr)] = map[int]int{} + } + if _, ok := larger1[len(arr)][int(l) - (len(arr) * 4 + 12)]; ok { + larger1[len(arr)][int(l) - (len(arr) * 4 + 12)] += 1 + } else { + larger1[len(arr)][int(l) - (len(arr) * 4 + 12)] = 1 + } + } + // } else { + // if _, _, err := indexw.WritePostings2(n, v, posts); err != nil { + // return 1 + // } + // } + // l, n, _ := indexw.WritePostings2(n, v, posts) + // if len(arr) > 11 { + // if l > uint64(len(arr) * 4 + 12) { + // if _, ok := larger1[len(arr)]; !ok { + // larger1[len(arr)] = map[int]int{} + // } + // if _, ok := larger1[len(arr)][int(l) - (len(arr) * 4 + 12)]; ok { + // larger1[len(arr)][int(l) - (len(arr) * 4 + 12)] += 1 + // } else { + // larger1[len(arr)][int(l) - (len(arr) * 4 + 12)] = 1 + // } + // if _, ok := larger2[len(arr)]; !ok { + // larger2[len(arr)] = map[int]int{} + // } + // if _, ok := larger2[len(arr)][n]; ok { + // larger2[len(arr)][n] += 1 + // } else { + // larger2[len(arr)][n] = 1 + // } + // } + // } + // if len(arr) == 300 { + // fmt.Println(n) + // for _, i := range arr { + // fmt.Printf("%d,", i) + // } + // fmt.Println() + // } + } + } + // fmt.Println(lenCount) + // fmt.Println(count) + fmt.Println(larger1) + // fmt.Println() + // fmt.Println(larger2) + + return 0 +} + +func TestIndexSizeComparison(t *testing.T) { + f, err := fileutil.OpenMmapFile("../../remappedindex_corrected") + testutil.Ok(t, err) + toc, err := NewTOCFromByteSlice(realByteSlice(f.Bytes())) + testutil.Ok(t, err) + t.Log("size of postings =", toc.LabelIndicesTable-toc.Postings) + t.Log(toc) + f.Close() + + + // ir, err := NewFileReader("../../remappedindex") + // testutil.Ok(t, err) + // labelNames, _ := ir.LabelNames() + // // labelValues := make(map[string][]string) + // // for _, name := range labelNames { + // // vals, _ := ir.LabelValues(name) + // // arr := make([]string, vals.Len()) + // // for i := 0; i < vals.Len(); i++ { + // // arr[i], _ = vals.At(i) + // // } + // // labelValues[name] = arr + // // } + // // iw, err := NewWriter("../../remappedindex_r16") + // // testutil.Ok(t, err) + // all := []uint64{} + // lenCount := map[int]int{} + // t.Log("labelNames size =", len(labelNames)) + // for _, name := range labelNames { + // t.Log(name) + // vals, _ := ir.LabelValues(name) + // for i := 0; i < vals.Len(); i++ { + // v, _ := vals.At(i) + // p, err := ir.Postings(name, v[0]) + // testutil.Ok(t, err) + // count := 0 + // for p.Next() { + // all = append(all, p.At()) + // count += 1 + // } + // if _, ok := lenCount[count]; ok { + // lenCount[count] += 1 + // } else { + // lenCount[count] = 1 + // } + // p, err = ir.Postings(name, v[0]) + // testutil.Ok(t, err) + // // err = iw.WritePostings2(name, v[0], p) + // // testutil.Ok(t, err) + // } + // } + // sort.Slice(all, func(i, j int) bool { return all[i] < all[j] }) + // t.Log(lenCount) + // t.Log("AllPostings len =", len(all)) + // // err = iw.WritePostings2("", "", newListPostings(all...)) + // // testutil.Ok(t, err) + // ir.Close() + // // iw.Close() + + // f, err = fileutil.OpenMmapFile("../../remappedindex_r16") + // testutil.Ok(t, err) + // toc, err = NewTOCFromByteSlice(realByteSlice(f.Bytes())) + // testutil.Ok(t, err) + // t.Log("size of postings (r16) =", toc.LabelIndicesTable-toc.Postings) + // f.Close() + rewriteIndex("../../remappedindex_corrected", "../../remappedindex_corrected_1") + f, err = fileutil.OpenMmapFile("../../remappedindex_corrected_1") + testutil.Ok(t, err) + toc, err = NewTOCFromByteSlice(realByteSlice(f.Bytes())) + testutil.Ok(t, err) + t.Log("size of postings =", toc.LabelIndicesTable-toc.Postings) + t.Log(toc) + f.Close() +} + func TestPersistence_index_e2e(t *testing.T) { dir, err := ioutil.TempDir("", "test_persistence_e2e") testutil.Ok(t, err) @@ -338,6 +591,12 @@ func TestPersistence_index_e2e(t *testing.T) { err = iw.Close() testutil.Ok(t, err) + f, err := fileutil.OpenMmapFile(filepath.Join(dir, indexFilename)) + testutil.Ok(t, err) + toc, err := NewTOCFromByteSlice(realByteSlice(f.Bytes())) + testutil.Ok(t, err) + t.Log("size of postings =", toc.LabelIndicesTable-toc.Postings) + ir, err := NewFileReader(filepath.Join(dir, indexFilename)) testutil.Ok(t, err) diff --git a/index/postings.go b/index/postings.go index cef2d886..88a0e050 100644 --- a/index/postings.go +++ b/index/postings.go @@ -14,13 +14,17 @@ package index import ( + // "time" + // "fmt" "container/heap" "encoding/binary" + "math/bits" "runtime" "sort" "strings" "sync" + "github.com/prometheus/tsdb/encoding" "github.com/prometheus/tsdb/labels" ) @@ -689,3 +693,1706 @@ func (it *bigEndianPostings) Seek(x uint64) bool { func (it *bigEndianPostings) Err() error { return nil } + +// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings, 6 is roaringBitmapPostings. +const postingsType = 7 + +type bitSlice struct { + bstream []byte + width int +} + +func (bs *bitSlice) readByte(idx int, count uint8) byte { + if count == 0 { + return bs.bstream[idx] + } + + byt := bs.bstream[idx] << count + byt |= bs.bstream[idx+1] >> (8 - count) + + return byt +} + +// This is to read the delta bitpack given an offset. +// Check whether out-of-bounds before using. +func (bs *bitSlice) readBits(offset int) uint64 { + idx := offset / 8 + count := uint8(offset % 8) + nbits := bs.width + var u uint64 + + for nbits >= 8 { + byt := bs.readByte(idx, count) + + u = (u << 8) | uint64(byt) + nbits -= 8 + idx += 1 + } + + if nbits == 0 { + return u + } + + if nbits > int(8-count) { + u = (u << uint(8-count)) | uint64((bs.bstream[idx]<>count) + nbits -= int(8 - count) + idx += 1 + + count = 0 + } + + u = (u << uint(nbits)) | uint64((bs.bstream[idx]<>(8-uint(nbits))) + return u +} + +// ┌──────────┬────────────────┬────────────┬────────────────┬─────┬────────────────┐ +// │ num <4b> │ base │ width <1b> │ delta 1 │ ... │ delta n │ +// └──────────┴────────────────┴────────────┴────────────────┴─────┴────────────────┘ +type baseDeltaPostings struct { + bs []byte + width int + base uint64 + size int + idx int + i int + cur uint64 + mask uint64 + prel int +} + +func newBaseDeltaPostings(bstream []byte, base uint64, width int, size int) *baseDeltaPostings { + return &baseDeltaPostings{bs: bstream, width: width, base: base, size: size, idx: 8 - width, cur: uint64(base), mask: (uint64(1) << (uint64(width) << 3)) - 1, prel: 8 - width} +} + +func (it *baseDeltaPostings) At() uint64 { + return it.cur +} + +func (it *baseDeltaPostings) Next() bool { + if it.i >= it.size { + return false + } + it.cur = binary.BigEndian.Uint64(it.bs[it.idx-it.prel:])&it.mask + it.base + it.idx += it.width + it.i += 1 + return true +} + +func (it *baseDeltaPostings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + + num := it.size - it.i + x -= it.base + i := sort.Search(num, func(i int) bool { + return binary.BigEndian.Uint64(it.bs[it.idx+i*it.width-it.prel:])&it.mask >= x + }) + if i < num { + it.idx += i * it.width + it.cur = it.base + (binary.BigEndian.Uint64(it.bs[it.idx-it.prel:])&it.mask) + it.idx += it.width + it.i += i + 1 + return true + } + return false +} + +func (it *baseDeltaPostings) Err() error { + return nil +} + +const deltaBlockSize = 32 +const deltaBlockBits = 5 + +// Block format(delta is to the previous value). +// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐ +// │ base │ idx │ count │ width <1b> │ delta 1 │ ... │ delta n │ +// └────────────────┴───────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘ +type deltaBlockPostings struct { + bs bitSlice + size int + count int // count in current block. + idxBlock int + idx int + offset int // offset in bit. + cur uint64 +} + +func newDeltaBlockPostings(bstream []byte, size int) *deltaBlockPostings { + return &deltaBlockPostings{bs: bitSlice{bstream: bstream}, size: size} +} + +func (it *deltaBlockPostings) GetOff() int { + return it.offset +} +func (it *deltaBlockPostings) GetWidth() int { + return it.bs.width +} + +func (it *deltaBlockPostings) At() uint64 { + return it.cur +} + +func (it *deltaBlockPostings) Next() bool { + if it.offset >= len(it.bs.bstream)<<3 || it.idx >= it.size { + return false + } + if it.offset%(deltaBlockSize<<3) == 0 { + val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:]) + if n < 1 { + return false + } + it.cur = val + it.offset += n << 3 + val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:]) + if n < 1 { + return false + } + it.idx = int(val) + 1 + it.offset += n << 3 + val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:]) + if n < 1 { + return false + } + it.count = int(val) + it.offset += n << 3 + it.bs.width = int(it.bs.bstream[it.offset>>3]) + it.offset += 8 + it.idxBlock = 1 + return true + } + + it.cur = it.bs.readBits(it.offset) + it.cur + it.offset += it.bs.width + it.idx += 1 + it.idxBlock += 1 + if it.idxBlock == it.count { + it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3 + } + return true +} + +func (it *deltaBlockPostings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + + startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize + num := (len(it.bs.bstream)-1)/deltaBlockSize - (it.offset-1)/(deltaBlockSize<<3) + 1 + // Do binary search between current position and end. + i := sort.Search(num, func(i int) bool { + val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:]) + return val > x + }) + if i > 0 { + // Go to the previous block because the previous block + // may contain the first value >= x. + i -= 1 + } + it.offset = (startOff + i*deltaBlockSize) << 3 + for it.Next() { + if it.At() >= x { + return true + } + } + return false +} + +func (it *deltaBlockPostings) Err() error { + return nil +} + +func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) { + i := 0 + startLen := len(e.B) + deltas := []uint32{} + var remaining int + var preVal uint32 + var max int + for i < len(arr) { + e.PutUvarint32(arr[i]) // Put base. + e.PutUvarint64(uint64(i)) // Put idx. + remaining = (deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1) << 3 + deltas = deltas[:0] + preVal = arr[i] + max = -1 + i += 1 + for i < len(arr) { + delta := arr[i] - preVal + cur := bits.Len32(delta) + if cur <= max { + cur = max + } + if remaining-cur*(len(deltas)+1)-(((bits.Len(uint(len(deltas)))>>3)+1)<<3) >= 0 { + deltas = append(deltas, delta) + max = cur + preVal = arr[i] + } else { + break + } + i += 1 + } + e.PutUvarint64(uint64(len(deltas) + 1)) + e.PutByte(byte(max)) + remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) << 3 + for _, delta := range deltas { + e.PutBits(uint64(delta), max) + remaining -= max + } + + if i == len(arr) { + break + } + + for remaining >= 64 { + e.PutBits(uint64(0), 64) + remaining -= 64 + } + + if remaining > 0 { + e.PutBits(uint64(0), remaining) + } + e.Count = 0 + + // There can be one more extra 0. + e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize] + } +} + +// Block format(delta is to the base). +// ┌────────────────┬─────────────────┬────────────┬─────────────────┬─────┬─────────────────┐ +// │ base │ count │ width <1b> │ delta 1 │ ... │ delta n │ +// └────────────────┴─────────────────┴────────────┴─────────────────┴─────┴─────────────────┘ +type baseDeltaBlockPostings struct { + bs bitSlice + count int // count in current block. + idxBlock int + idx int + offset int // offset in bit. + cur uint64 + base uint64 + mask uint64 + prel int +} + +func newBaseDeltaBlockPostings(bstream []byte) *baseDeltaBlockPostings { + return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}} +} + +func (it *baseDeltaBlockPostings) At() uint64 { + return it.cur +} + +func (it *baseDeltaBlockPostings) Next() bool { + if it.offset >= len(it.bs.bstream) { + return false + } + if it.offset%deltaBlockSize == 0 { + val, n := binary.Uvarint(it.bs.bstream[it.offset:]) + it.cur = val + it.base = val + it.offset += n + + val, n = binary.Uvarint(it.bs.bstream[it.offset:]) + it.count = int(val) + it.offset += n + it.bs.width = int(it.bs.bstream[it.offset]) + it.mask = (uint64(1) << uint(8 * it.bs.width)) - 1 + it.prel = 8 - it.bs.width + it.offset += 1 + it.idxBlock = 1 + return true + } + + if it.offset-it.prel >= 0 { + it.cur = binary.BigEndian.Uint64(it.bs.bstream[it.offset-it.prel:])&it.mask + it.base + } else { + it.cur = 0 + for i := 0; i < it.bs.width; i++ { + it.cur = (it.cur << 8) | uint64(it.bs.bstream[it.offset+i]) + } + it.cur += it.base + } + // it.cur = (binary.BigEndian.Uint64(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base + it.offset += it.bs.width + it.idxBlock += 1 + if it.idxBlock == it.count { + it.offset = (((it.offset - 1) >> deltaBlockBits) + 1) << deltaBlockBits + } + return true +} + +func (it *baseDeltaBlockPostings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + if it.offset >= len(it.bs.bstream) { + return false + } + startOff := (((it.offset) >> deltaBlockBits) + 1) << deltaBlockBits + num := (len(it.bs.bstream) >> deltaBlockBits) - (startOff >> deltaBlockBits) + 1 + if num > 0 { + // Fast path to check if the binary search among blocks is needed. + val, _ := binary.Uvarint(it.bs.bstream[startOff:]) + if val <= x { + // Do binary search between current position and end. + i := sort.Search(num, func(i int) bool { + val, _ := binary.Uvarint(it.bs.bstream[startOff+(i< x + }) + if i > 0 { + // Go to the previous block because the previous block + // may contain the first value >= x. + i -= 1 + } + it.offset = startOff + (i << deltaBlockBits) + + // Read base, and width. + val, n := binary.Uvarint(it.bs.bstream[it.offset:]) + it.cur = val + it.base = val + it.offset += n + val, n = binary.Uvarint(it.bs.bstream[it.offset:]) + it.count = int(val) + it.offset += n + it.bs.width = int(it.bs.bstream[it.offset]) + it.mask = (uint64(1) << uint(8 * it.bs.width)) - 1 + it.prel = 8 - it.bs.width + it.offset += 1 + it.idxBlock = 1 + if x <= it.base { + return true + } else { + temp := x - it.base + j := sort.Search(it.count-it.idxBlock, func(i int) bool { + return (binary.BigEndian.Uint64(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp + }) + if j < it.count-it.idxBlock { + it.offset += j * it.bs.width + it.cur = (binary.BigEndian.Uint64(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base + it.idxBlock += j + 1 + if it.idxBlock == it.count { + // it.offset = startOff + ((i+1)<> deltaBlockBits) + i + 1) << deltaBlockBits + } else { + it.offset += it.bs.width + } + } else { + // it.offset = startOff + ((i+1)<> deltaBlockBits) + i + 1) << deltaBlockBits + return it.Next() + } + return true + } + } + } + + // Search in current block. + startOff -= deltaBlockSize + if it.offset == startOff { + // Read base, and width. + val, n := binary.Uvarint(it.bs.bstream[it.offset:]) + it.cur = val + it.base = val + it.offset += n + val, n = binary.Uvarint(it.bs.bstream[it.offset:]) + it.count = int(val) + it.offset += n + it.bs.width = int(it.bs.bstream[it.offset]) + it.mask = (uint64(1) << uint(8*it.bs.width)) - 1 + it.prel = 8 - it.bs.width + it.offset += 1 + it.idxBlock = 1 + } + if x <= it.base { + return true + } else { + temp := x - it.base + j := sort.Search(it.count-it.idxBlock, func(i int) bool { + return (binary.BigEndian.Uint64(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp + }) + if j < it.count-it.idxBlock { + it.offset += j * it.bs.width + it.cur = (binary.BigEndian.Uint64(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base + it.idxBlock += j + 1 + if it.idxBlock == it.count { + // it.offset = startOff + deltaBlockSize + it.offset = ((startOff >> deltaBlockBits) + 1) << deltaBlockBits + } else { + it.offset += it.bs.width + } + } else { + // it.offset = startOff + deltaBlockSize + it.offset = ((startOff >> deltaBlockBits) + 1) << deltaBlockBits + return it.Next() + } + return true + } + +} + +func (it *baseDeltaBlockPostings) Err() error { + return nil +} + +func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) { + i := 0 + startLen := len(e.B) + deltas := []uint32{} + var remaining int + var base uint32 + var max int + for i < len(arr) { + e.PutUvarint32(arr[i]) // Put base. + remaining = deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1 + deltas = deltas[:0] + base = arr[i] + max = -1 + i += 1 + for i < len(arr) { + delta := arr[i] - base + cur := (bits.Len32(delta) + 7) >> 3 + if cur == 0 { + cur = 1 + } + if remaining-cur*(len(deltas)+1)-((bits.Len(uint(len(deltas)))>>3)+1) >= 0 { + deltas = append(deltas, delta) + max = cur + } else { + break + } + i += 1 + } + e.PutUvarint64(uint64(len(deltas) + 1)) + e.PutByte(byte(max)) + remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) + for _, delta := range deltas { + for j := max - 1; j >= 0; j-- { + e.B = append(e.B, byte((delta >> (uint(j) << 3) & 0xff))) + } + remaining -= max + } + + if i == len(arr) { + break + } + + for remaining > 0 { + e.PutByte(0) + remaining -= 1 + } + } +} + +func writeBaseDeltaBlockPostings64(e *encoding.Encbuf, arr []uint64) { + i := 0 + startLen := len(e.B) + deltas := []uint64{} + var remaining int + var base uint64 + var max int + for i < len(arr) { + e.PutUvarint64(arr[i]) // Put base. + remaining = deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1 + deltas = deltas[:0] + base = arr[i] + max = -1 + i += 1 + for i < len(arr) { + delta := arr[i] - base + cur := (bits.Len64(delta) + 7) >> 3 + if cur == 0 { + cur = 1 + } + if remaining-cur*(len(deltas)+1)-((bits.Len(uint(len(deltas)))>>3)+1) >= 0 { + deltas = append(deltas, delta) + max = cur + } else { + break + } + i += 1 + } + e.PutUvarint64(uint64(len(deltas) + 1)) + e.PutByte(byte(max)) + remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) + for _, delta := range deltas { + for j := max - 1; j >= 0; j-- { + e.B = append(e.B, byte((delta >> (uint(j) << 3) & 0xff))) + } + remaining -= max + } + + if i == len(arr) { + break + } + + for remaining > 0 { + e.PutByte(0) + remaining -= 1 + } + } +} + +// 8bits -> 256/8=32bytes, 12bits -> 4096/8=512bytes, 16bits -> 65536/8=8192bytes. +const bitmapBits = 8 + +// Bitmap block format. +// ┌──────────┬────────┐ +// │ key <4b> │ bitmap │ +// └──────────┴────────┘ +type bitmapPostings struct { + bs []byte + cur uint64 + inside bool + idx1 int + idx2 int + bitmapSize int + key uint32 +} + +func newBitmapPostings(bstream []byte) *bitmapPostings { + return &bitmapPostings{bs: bstream, bitmapSize: 1 << (bitmapBits - 3)} +} + +func (it *bitmapPostings) At() uint64 { + return it.cur +} + +func (it *bitmapPostings) Next() bool { + if it.inside { + for it.idx1 < it.bitmapSize { + if it.bs[it.idx1+4] == byte(0) { + it.idx1 += 1 + continue + } + for it.idx1 < it.bitmapSize { + if it.bs[it.idx1+4]&(1<= it.bitmapSize { + it.key = binary.BigEndian.Uint32(it.bs) + it.inside = true + return it.Next() + } else { + return false + } + } +} + +func (it *bitmapPostings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + curKey := uint32(x) >> bitmapBits + // curVal := uint32(x) & uint32((1 << uint(bitmapBits)) - 1) + i := sort.Search(len(it.bs)/(it.bitmapSize+4), func(i int) bool { + return binary.BigEndian.Uint32(it.bs[i*(it.bitmapSize+4):]) > curKey + }) + if i > 0 { + i -= 1 + if i > 0 { + it.idx1 = 0 + it.idx2 = 0 + it.bs = it.bs[i*(it.bitmapSize+4):] + it.inside = false + } + } + for it.Next() { + if it.At() >= x { + return true + } + } + return false +} + +func (it *bitmapPostings) Err() error { + return nil +} + +func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) { + key := uint32(0xffffffff) + bitmapSize := 1 << (bitmapBits - 3) + mask := uint32((1 << uint(bitmapBits)) - 1) + var curKey uint32 + var curVal uint32 + var offset int // The starting offset of the bitmap of each block. + var idx1 int + var idx2 int + for _, val := range arr { + curKey = val >> bitmapBits + curVal = val & mask + idx1 = int(curVal) >> 3 + idx2 = int(curVal) % 8 + if curKey != key { + key = curKey + e.PutBE32(uint32(key)) + offset = len(e.Get()) + for i := 0; i < bitmapSize; i++ { + e.PutByte(byte(0)) + } + } + e.B[offset+idx1] |= 1 << uint(7-idx2) + } +} + +var rbpMasks []byte +var rbpValueMask uint64 +var rbpValueSize int +var rbpBitmapSize int + +func init() { + for i := 7; i >= 0; i-- { + rbpMasks = append(rbpMasks, byte(1<> 3 +} + +// roaringBitmap block format, type 0 = array, type 1 = bitmap. +// ┌───────────────┬──────────┬──────────────┐ +// │ key │ type<1b> │ bitmap/array │ +// └───────────────┴──────────┴──────────────┘ +// footer format. +// ┌────────────┬─────────────────────┬─────┬─────────────────────┐ +// │ width <1b> │ block 1 addr │ ... │ block n addr │ +// └────────────┴─────────────────────┴─────┴─────────────────────┘ +type roaringBitmapPostings struct { + bs []byte + cur uint64 + inside bool + idx int // The current offset inside the bs. + idx1 int // The offset in the bitmap in current block in bytes. + idx2 int // The offset in the current byte in the bitmap ([0,8)). + footerAddr int + key uint64 + numBlock int + blockIdx int + blockType byte + nextBlock int + width int + addrMask uint32 +} + +func newRoaringBitmapPostings(bstream []byte) *roaringBitmapPostings { + if len(bstream) <= 4 { + return nil + } + x := binary.BigEndian.Uint32(bstream) + // return &roaringBitmapPostings{bs: bstream[4:], numBlock: int(binary.BigEndian.Uint32(bstream[4+int(x):])), footerAddr: int(x), width: int(bstream[8+int(x)])} + // return &roaringBitmapPostings{bs: bstream[4:], numBlock: (len(bstream)-int(x))/4 - 1, footerAddr: int(x)} + // return &roaringBitmapPostings{bs: bstream[4:], numBlock: (len(bstream) - int(x) - 5) / int(bstream[4+int(x)]), footerAddr: int(x), width: int(bstream[4+int(x)]), addrMask: uint32((1 << (8 * uint(bstream[4+int(x)]))) - 1)} + return &roaringBitmapPostings{bs: bstream[8:], numBlock: int(binary.BigEndian.Uint32(bstream[4:])), footerAddr: int(x), width: int(bstream[8+int(x)]), addrMask: uint32((1 << (8 * uint(bstream[8+int(x)]))) - 1)} +} + +func (it *roaringBitmapPostings) At() uint64 { + return it.cur +} + +func (it *roaringBitmapPostings) Next() bool { + if it.inside { // Already entered the block. + if it.blockType == 0 { // Type array. + if it.idx < it.nextBlock { + it.cur = it.key | uint64(it.bs[it.idx]) + it.idx += 1 + return true + } + } else { // Type bitmap. + for it.idx1 < rbpBitmapSize && it.bs[it.idx+it.idx1] == 0 { + it.idx1 += 1 + } + for it.idx1 < rbpBitmapSize { + if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 { + it.cur = it.key | uint64((it.idx1<<3)+it.idx2) + it.idx2 += 1 + if it.idx2 == 8 { + it.idx1 += 1 + it.idx2 = 0 + } + return true + } else { + it.idx2 += 1 + if it.idx2 == 8 { + it.idx1 += 1 + it.idx2 = 0 + } + } + } + it.idx += rbpBitmapSize + it.idx1 = 0 + it.idx2 = 0 + } + it.blockIdx += 1 + it.inside = false + return it.Next() + } else { // Not yet entered the block. + if it.idx < it.footerAddr { + val, size := binary.Uvarint(it.bs[it.idx:]) + it.key = val << bitmapBits + it.idx += size + it.blockType = it.bs[it.idx] + it.idx += 1 + it.inside = true + + if it.blockType == 0 { + if it.blockIdx != it.numBlock-1 { + // it.nextBlock = int(it.readBits((it.footerAddr+5)*8+(it.blockIdx+1)*it.width)) + // it.nextBlock = it.readBytes(it.footerAddr+1+(it.blockIdx+1)*it.width) + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-4+it.width:]) & it.addrMask) + // it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+(it.blockIdx+1)*4:])) + } else { + it.nextBlock = it.footerAddr + } + } + return it.Next() + } else { + return false + } + } +} + +func (it *roaringBitmapPostings) seekInBlock(x uint64) bool { + curVal := byte(x & rbpValueMask) + if it.blockType == 0 { + // If encoding with array, binary search. + num := (it.nextBlock - it.idx) + j := sort.Search(num, func(i int) bool { + return it.bs[it.idx+i] >= curVal + }) + if j == num { + // The first element in next block should be >= x. + it.idx = it.nextBlock + it.inside = false + return it.Next() + } + + it.cur = it.key | uint64(it.bs[it.idx+j]) + it.idx += j + 1 + return true + } else { + // If encoding with bitmap, go to the exact location of value of x. + it.idx1 = int(curVal >> 3) + it.idx2 = int(curVal % 8) + if it.bs[it.idx+it.idx1]&rbpMasks[it.idx2] != 0 { // Found x. + it.cur = it.key | uint64(it.idx1*8+it.idx2) + it.idx2 += 1 + if it.idx2 == 8 { + it.idx1 += 1 + it.idx2 = 0 + } + return true + } else { + it.idx2 += 1 + if it.idx2 == 8 { + it.idx1 += 1 + it.idx2 = 0 + } + return it.Next() + } + } +} + +func (it *roaringBitmapPostings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + curKey := x >> bitmapBits + if it.inside && it.key>>bitmapBits == curKey { + // Fast path. + return it.seekInBlock(x) + } else { + i := sort.Search(it.numBlock-it.blockIdx, func(i int) bool { + // off := int(it.readBits(((it.footerAddr+5)<<3)+(it.blockIdx+i)*it.width)) + // off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+4*(it.blockIdx+i):])) + // off := it.readBytes(it.footerAddr+1+(it.blockIdx+i)*it.width) + off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-4+it.width:]) & it.addrMask) + k, _ := binary.Uvarint(it.bs[off:]) + return k >= curKey + // return binary.BigEndian.Uint32(it.bs[off:]) > curKey + }) + if i == it.numBlock-it.blockIdx { + return false + } + if i != 0 { // i > 0. + it.idx1 = 0 + it.idx2 = 0 + it.inside = false + // it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+4*(it.blockIdx+i):])) + // it.idx = int(it.readBits(((it.footerAddr+5)<<3)+(it.blockIdx+i)*it.width)) + // it.idx = it.readBytes(it.footerAddr+1+(it.blockIdx+i)*it.width) + it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-4+it.width:]) & it.addrMask) + } + it.blockIdx += i + } + + val, size := binary.Uvarint(it.bs[it.idx:]) + it.key = val << bitmapBits + it.idx += size + it.blockType = it.bs[it.idx] + it.idx += 1 + it.inside = true + + if it.blockType == 0 { + if it.blockIdx != it.numBlock-1 { + // it.nextBlock = int(it.readBits((it.footerAddr+5)*8+(it.blockIdx+1)*it.width)) + // it.nextBlock = it.readBytes(it.footerAddr+1+(it.blockIdx+1)*it.width) + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-4+it.width:]) & it.addrMask) + // it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+(it.blockIdx+1)*4:])) + } else { + it.nextBlock = it.footerAddr + } + } + return it.seekInBlock(x) +} + +func (it *roaringBitmapPostings) Err() error { + return nil +} + +// Read key of the block starting from off. +// func (it *roaringBitmapPostings) readKey(off int) uint32 { +// key := uint32(0) +// for i := 0; i < 4 - it.valueSize; i ++ { +// key = (key << 8) + uint32(it.bs[off+i]) +// } +// return key +// } + +func (it *roaringBitmapPostings) readBytes(off int) int { + val := 0 + for i := 0; i < it.width; i++ { + val = (val << 8) | int(it.bs[off+i]) + } + return val +} + +func (it *roaringBitmapPostings) readByte(idx int, count uint8) byte { + if count == 0 { + return it.bs[idx] + } + byt := it.bs[idx] << count + byt |= it.bs[idx+1] >> (8 - count) + + return byt +} + +func (it *roaringBitmapPostings) readBits(offset int) uint64 { + idx := offset >> 3 + count := uint8(offset % 8) + nbits := it.width + var u uint64 + + for nbits >= 8 { + byt := it.readByte(idx, count) + + u = (u << 8) | uint64(byt) + nbits -= 8 + idx += 1 + } + + if nbits == 0 { + return u + } + + if nbits > int(8-count) { + u = (u << uint(8-count)) | uint64((it.bs[idx]<>count) + nbits -= int(8 - count) + idx += 1 + + count = 0 + } + + u = (u << uint(nbits)) | uint64((it.bs[idx]<>(8-uint(nbits))) + return u +} + +func writeRoaringBitmapBlock(e *encoding.Encbuf, vals []uint32, key uint32, thres int, bitmapSize int, valueSize int) { + var offset int // The starting offset of the bitmap of each block. + var idx1 uint32 // The offset in the bitmap in current block in bytes. + var idx2 uint32 // The offset in the current byte in the bitmap ([0,8)). + e.PutUvarint32(key) + if len(vals) > thres { + e.PutByte(byte(1)) + offset = len(e.Get()) + for i := 0; i < bitmapSize; i++ { + e.PutByte(byte(0)) + } + for _, val := range vals { + idx1 = val >> 3 + idx2 = val % 8 + e.B[uint32(offset)+idx1] |= 1 << uint(7-idx2) + } + } else { + c := make([]byte, 4) + e.PutByte(byte(0)) + for _, val := range vals { + binary.BigEndian.PutUint32(c[:], val) + for i := 4 - valueSize; i < 4; i++ { + e.PutByte(c[i]) + } + } + } +} + +func writeRoaringBitmapBlock64(e *encoding.Encbuf, vals []uint64, key uint64, thres int, bitmapSize int, valueSize int) { + var offset int // The starting offset of the bitmap of each block. + var idx1 uint64 // The offset in the bitmap in current block in bytes. + var idx2 uint64 // The offset in the current byte in the bitmap ([0,8)). + e.PutUvarint64(key) + if len(vals) > thres { + e.PutByte(byte(1)) + offset = len(e.Get()) + for i := 0; i < bitmapSize; i++ { + e.PutByte(byte(0)) + } + for _, val := range vals { + idx1 = val >> 3 + idx2 = val % 8 + e.B[uint64(offset)+idx1] |= 1 << uint(7-idx2) + } + } else { + c := make([]byte, 8) + e.PutByte(byte(0)) + for _, val := range vals { + binary.BigEndian.PutUint64(c[:], val) + for i := 8 - valueSize; i < 8; i++ { + e.PutByte(c[i]) + } + } + } +} + +func putBytes(e *encoding.Encbuf, val uint32, width int) { + for i := width - 1; i >= 0; i-- { + e.PutByte(byte((val >> (8 * uint(i)) & 0xff))) + } +} + +func writeRoaringBitmapPostings(e *encoding.Encbuf, arr []uint32) { + key := uint32(0xffffffff) // The initial key should be unique. + bitmapSize := 1 << (bitmapBits - 3) // Bitmap size in bytes. + valueSize := bitmapBits >> 3 // The size of the element in array in bytes. + thres := (1 << bitmapBits) / bitmapBits // Threshold of number of elements in the block for choosing encoding type. + mask := uint32((1 << uint(bitmapBits)) - 1) // Mask for the elements in the block. + var curKey uint32 + var curVal uint32 + var idx int // Index of current element in arr. + var startingOffs []uint32 // The starting offsets of each block. + var vals []uint32 // The converted values in the current block. + startOff := len(e.Get()) + e.PutBE32(0) // Footer starting offset. + e.PutBE32(0) // Number of blocks. + for idx < len(arr) { + curKey = arr[idx] >> bitmapBits // Key of block. + curVal = arr[idx] & mask // Value inside block. + if curKey != key { + // Move to next block. + if idx != 0 { + startingOffs = append(startingOffs, uint32(len(e.B))) + writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize) + vals = vals[:0] + } + key = curKey + } + vals = append(vals, curVal) + idx += 1 + } + startingOffs = append(startingOffs, uint32(len(e.B))) + writeRoaringBitmapBlock(e, vals, key, thres, bitmapSize, valueSize) + + // Put footer starting offset. + binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-8-startOff)) + binary.BigEndian.PutUint32(e.B[startOff+4:], uint32(len(startingOffs))) + width := bits.Len32(startingOffs[len(startingOffs)-1] - 8 - uint32(startOff)) + if width == 0 { + // key 0 will result in 0 width. + width += 1 + } + // e.PutBE32(uint32(len(startingOffs))) // Number of blocks. + // e.PutByte(byte(width)) + // for _, off := range startingOffs { + // e.PutBits(uint64(off - 4 - uint32(startOff)), width) + // } + + e.PutByte(byte((width + 7) / 8)) + for _, off := range startingOffs { + putBytes(e, off-8-uint32(startOff), (width+7)/8) + } + + // for _, off := range startingOffs { + // e.PutBE32(off - 4 - uint32(startOff)) + // } +} + +func writeRoaringBitmapPostings64(e *encoding.Encbuf, arr []uint64) { + key := uint64(0xffffffffffffffff) // The initial key should be unique. + bitmapSize := 1 << (bitmapBits - 3) // Bitmap size in bytes. + valueSize := bitmapBits >> 3 // The size of the element in array in bytes. + thres := (1 << bitmapBits) / bitmapBits // Threshold of number of elements in the block for choosing encoding type. + mask := (uint64(1) << uint(bitmapBits)) - 1 // Mask for the elements in the block. + var curKey uint64 + var curVal uint64 + var idx int // Index of current element in arr. + var startingOffs []uint32 // The starting offsets of each block. + var vals []uint64 // The converted values in the current block. + startOff := len(e.Get()) + e.PutBE32(0) // Footer starting offset. + e.PutBE32(0) // Number of blocks. + for idx < len(arr) { + curKey = arr[idx] >> bitmapBits // Key of block. + curVal = arr[idx] & mask // Value inside block. + if curKey != key { + // Move to next block. + if idx != 0 { + startingOffs = append(startingOffs, uint32(len(e.B))) + writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize) + vals = vals[:0] + } + key = curKey + } + vals = append(vals, curVal) + idx += 1 + } + startingOffs = append(startingOffs, uint32(len(e.B))) + writeRoaringBitmapBlock64(e, vals, key, thres, bitmapSize, valueSize) + + // Put footer starting offset. + binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-8-startOff)) + binary.BigEndian.PutUint32(e.B[startOff+4:], uint32(len(startingOffs))) + width := bits.Len32(startingOffs[len(startingOffs)-1] - 8 - uint32(startOff)) + if width == 0 { + // key 0 will result in 0 width. + width += 1 + } + + e.PutByte(byte((width + 7) / 8)) + for _, off := range startingOffs { + putBytes(e, off-8-uint32(startOff), (width+7)/8) + } +} + +type baseDeltaBlock8Postings struct { + bs []byte + cur uint64 + inside bool + idx int // The current offset inside the bs. + footerAddr int + key uint64 + numBlock int + blockIdx int + nextBlock int + width int + prel int + addrMask uint32 +} + +func newBaseDeltaBlock8Postings(bstream []byte) *baseDeltaBlock8Postings { + if len(bstream) <= 4 { + return nil + } + x := binary.BigEndian.Uint32(bstream) + width := int(bstream[8+int(x)]) + return &baseDeltaBlock8Postings{bs: bstream[8:], numBlock: int(binary.BigEndian.Uint32(bstream[4:])), footerAddr: int(x), width: width, prel: 4 - width, addrMask: uint32((1 << (8 * uint(width))) - 1)} +} + +func (it *baseDeltaBlock8Postings) At() uint64 { + return it.cur +} + +func (it *baseDeltaBlock8Postings) Next() bool { + if it.inside { // Already entered the block. + if it.idx < it.nextBlock { + it.cur = it.key | uint64(it.bs[it.idx]) + it.idx += 1 + return true + } + it.blockIdx += 1 + it.inside = false + return it.Next() + } else { // Not yet entered the block. + if it.idx < it.footerAddr { + val, size := binary.Uvarint(it.bs[it.idx:]) + it.key = val << bitmapBits + it.idx += size + it.inside = true + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + it.cur = it.key | uint64(it.bs[it.idx]) + it.idx += 1 + return true + } else { + return false + } + } +} + +func (it *baseDeltaBlock8Postings) seekInBlock(x uint64) bool { + curVal := byte(x & rbpValueMask) + num := it.nextBlock - it.idx + j := sort.Search(num, func(i int) bool { + return it.bs[it.idx+i] >= curVal + }) + if j == num { + // Fast-path to the next block. + // The first element in next block should be >= x. + it.idx = it.nextBlock + it.blockIdx += 1 + if it.idx < it.footerAddr { + val, size := binary.Uvarint(it.bs[it.idx:]) + it.key = val << bitmapBits + it.idx += size + it.inside = true + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + it.cur = it.key | uint64(it.bs[it.idx]) + it.idx += 1 + return true + } else { + return false + } + } + it.cur = it.key | uint64(it.bs[it.idx+j]) + it.idx += j + 1 + return true +} + +func (it *baseDeltaBlock8Postings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + curKey := x >> bitmapBits + if it.inside && it.key>>bitmapBits == curKey { + // Fast path. + return it.seekInBlock(x) + } else { + i := sort.Search(it.numBlock-it.blockIdx, func(i int) bool { + off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-it.prel:]) & it.addrMask) + k, _ := binary.Uvarint(it.bs[off:]) + return k >= curKey + }) + if i == it.numBlock-it.blockIdx { + return false + } + it.blockIdx += i + if i != 0 { // i > 0. + it.inside = false + it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+it.blockIdx*it.width-it.prel:]) & it.addrMask) + } + } + val, size := binary.Uvarint(it.bs[it.idx:]) + it.key = val << bitmapBits + it.idx += size + it.inside = true + + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + return it.seekInBlock(x) +} + +func (it *baseDeltaBlock8Postings) Err() error { + return nil +} + +func writeBaseDelta8Block(e *encoding.Encbuf, vals []uint32, key uint32, valueSize int) { + e.PutUvarint32(key) + c := make([]byte, 4) + for _, val := range vals { + binary.BigEndian.PutUint32(c[:], val) + for i := 4 - valueSize; i < 4; i++ { + e.PutByte(c[i]) + } + } +} + +func writeBaseDeltaBlock8Postings(e *encoding.Encbuf, arr []uint32) { + key := uint32(0xffffffff) // The initial key should be unique. + valueSize := bitmapBits >> 3 // The size of the element in array in bytes. + mask := uint32((1 << uint(bitmapBits)) - 1) // Mask for the elements in the block. + var curKey uint32 + var curVal uint32 + var idx int // Index of current element in arr. + var startingOffs []uint32 // The starting offsets of each block. + var vals []uint32 // The converted values in the current block. + startOff := len(e.Get()) + e.PutBE32(0) // Footer starting offset. + e.PutBE32(0) // Number of blocks. + for idx < len(arr) { + curKey = arr[idx] >> bitmapBits // Key of block. + curVal = arr[idx] & mask // Value inside block. + if curKey != key { + // Move to next block. + if idx != 0 { + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta8Block(e, vals, key, valueSize) + vals = vals[:0] + } + key = curKey + } + vals = append(vals, curVal) + idx += 1 + } + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta8Block(e, vals, key, valueSize) + + // Put footer starting offset. + binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-8-startOff)) + binary.BigEndian.PutUint32(e.B[startOff+4:], uint32(len(startingOffs))) + width := bits.Len32(startingOffs[len(startingOffs)-1] - 8 - uint32(startOff)) + if width == 0 { + // key 0 will result in 0 width. + width += 1 + } + + e.PutByte(byte((width + 7) / 8)) + for _, off := range startingOffs { + putBytes(e, off-8-uint32(startOff), (width+7)/8) + } +} + +type baseDeltaBlock16Postings struct { + bs []byte + cur uint64 + inside bool + idx int // The current offset inside the bs. + footerAddr int + key uint64 + numBlock int + blockIdx int // The current block idx. + nextBlock int +} + +func newBaseDeltaBlock16Postings(bstream []byte) *baseDeltaBlock16Postings { + x := binary.BigEndian.Uint32(bstream) // Read the footer address. + return &baseDeltaBlock16Postings{bs: bstream[8:], numBlock: int(binary.BigEndian.Uint32(bstream[4:])), footerAddr: int(x)} +} + +func (it *baseDeltaBlock16Postings) At() uint64 { + return it.cur +} + +func (it *baseDeltaBlock16Postings) Next() bool { + if it.inside { // Already entered the block. + if it.idx < it.nextBlock { + it.cur = it.key | uint64(binary.BigEndian.Uint16(it.bs[it.idx:])) + it.idx += 2 + return true + } + it.blockIdx += 1 // Go to the next block. + } + // Currently not entered any block. + if it.idx < it.footerAddr { + it.key = binary.BigEndian.Uint64(it.bs[it.idx:]) + it.idx += 8 + it.inside = true + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+((it.blockIdx+1)<<2):])) + it.cur = it.key | uint64(binary.BigEndian.Uint16(it.bs[it.idx:])) + it.idx += 2 + return true + } else { + return false + } +} + +func (it *baseDeltaBlock16Postings) seekInBlock(x uint64) bool { + curVal := x & 0xffff + num := (it.nextBlock - it.idx) >> 1 + j := sort.Search(num, func(i int) bool { + return uint64(binary.BigEndian.Uint16(it.bs[it.idx+(i<<1):])) >= curVal + }) + if j == num { + // Fast-path to the next block. + // The first element in next block should be >= x. + it.idx = it.nextBlock + it.blockIdx += 1 + if it.idx < it.footerAddr { + it.key = binary.BigEndian.Uint64(it.bs[it.idx:]) + it.idx += 8 + it.inside = true + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+((it.blockIdx+1)<<2):])) + it.cur = it.key | uint64(binary.BigEndian.Uint16(it.bs[it.idx:])) + it.idx += 2 + return true + } else { + return false + } + } + it.cur = it.key | uint64(binary.BigEndian.Uint16(it.bs[it.idx+(j<<1):])) + it.idx += (j + 1) << 1 + return true +} + +func (it *baseDeltaBlock16Postings) Seek(x uint64) bool { + if it.cur >= x { + return true + } + curKey := (x >> 16) << 16 + if it.inside && it.key == curKey { + // Fast path for x in current block. + return it.seekInBlock(x) + } else { + i := sort.Search(it.numBlock-it.blockIdx, func(i int) bool { + off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+((it.blockIdx+i)<<2):])) + // k, _ := binary.Uvarint(it.bs[off:]) + k := binary.BigEndian.Uint64(it.bs[off:]) + return k >= curKey + }) + if i == it.numBlock-it.blockIdx { + return false + } + it.blockIdx += i + if i != 0 { // i > 0. + it.inside = false + it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+((it.blockIdx)<<2):])) + } + } + it.key = binary.BigEndian.Uint64(it.bs[it.idx:]) + it.idx += 8 + + it.inside = true + + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+((it.blockIdx+1)<<2):])) + return it.seekInBlock(x) +} + +func (it *baseDeltaBlock16Postings) Err() error { + return nil +} + +func writeBaseDelta16Block(e *encoding.Encbuf, vals []uint32, key uint32, valueSize int) { + e.PutBE64(uint64(key)) + c := make([]byte, 4) + for _, val := range vals { + binary.BigEndian.PutUint32(c[:], val) + for i := 4 - valueSize; i < 4; i++ { + e.PutByte(c[i]) + } + } +} + +func writeBaseDelta16Block64(e *encoding.Encbuf, vals []uint64, key uint64, valueSize int) { + e.PutBE64(key) + c := make([]byte, 8) + for _, val := range vals { + binary.BigEndian.PutUint64(c[:], val) + for i := 8 - valueSize; i < 8; i++ { + e.PutByte(c[i]) + } + } +} + +func writeBaseDeltaBlock16Postings(e *encoding.Encbuf, arr []uint32) int { + key := uint32(0xffffffff) // The initial key should be unique. + valueSize := 16 >> 3 // The size of the element in array in bytes. + mask := uint32((1 << uint(16)) - 1) // Mask for the elements in the block. + invertedMask := ^mask + var curKey uint32 + var curVal uint32 + var idx int // Index of current element in arr. + var startingOffs []uint32 // The starting offsets of each block. + var vals []uint32 // The converted values in the current block. + startOff := len(e.Get()) + e.PutBE32(0) // Footer starting offset. + e.PutBE32(0) // Number of blocks. + for idx < len(arr) { + curKey = arr[idx] & invertedMask // Key of block. + curVal = arr[idx] & mask // Value inside block. + if curKey != key { + // Move to next block. + if idx != 0 { + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta16Block(e, vals, key, valueSize) + vals = vals[:0] + } + key = curKey + } + vals = append(vals, curVal) + idx += 1 + } + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta16Block(e, vals, key, valueSize) + startingOffs = append(startingOffs, uint32(len(e.B))) + + binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-8-startOff)) // Put footer starting offset. + binary.BigEndian.PutUint32(e.B[startOff+4:], uint32(len(startingOffs)-1)) // Put number of blocks. + for _, off := range startingOffs { + e.PutBE32(off-8-uint32(startOff)) + } + // e.PutUvarint32(startingOffs[0]-8-uint32(startOff)) + // width := bits.Len32(startingOffs[len(startingOffs)-1] - 4 - uint32(startOff)) + // if width == 0 { + // // key 0 will result in 0 width. + // width += 1 + // } + // e.PutByte(byte((width + 7) / 8)) + // for _, off := range startingOffs { + // putBytes(e, off - (startingOffs[len(startingOffs)-1] - 4 - uint32(startOff)), (width + 7) / 8) + // } + return len(startingOffs) - 1 +} + +func writeBaseDeltaBlock16Postings64(e *encoding.Encbuf, arr []uint64) { + key := uint64(0xffffffff) // The initial key should be unique. + valueSize := 16 >> 3 // The size of the element in array in bytes. + mask := uint64((1 << uint(16)) - 1) // Mask for the elements in the block. + invertedMask := ^mask + var curKey uint64 + var curVal uint64 + var idx int // Index of current element in arr. + var startingOffs []uint32 // The starting offsets of each block. + var vals []uint64 // The converted values in the current block. + startOff := len(e.Get()) + e.PutBE32(0) // Footer starting offset. + e.PutBE32(0) // Number of blocks. + for idx < len(arr) { + curKey = arr[idx] & invertedMask // Key of block. + curVal = arr[idx] & mask // Value inside block. + if curKey != key { + // Move to next block. + if idx != 0 { + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta16Block64(e, vals, key, valueSize) + vals = vals[:0] + } + key = curKey + } + vals = append(vals, curVal) + idx += 1 + } + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta16Block64(e, vals, key, valueSize) + startingOffs = append(startingOffs, uint32(len(e.B))) + + binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-8-startOff)) // Put footer starting offset. + binary.BigEndian.PutUint32(e.B[startOff+4:], uint32(len(startingOffs)-1)) // Put number of blocks. + for _, off := range startingOffs { + e.PutBE32(off-8-uint32(startOff)) + } +} + +type baseDeltaBlock16PostingsV2 struct { + bs []byte + cur uint64 + inside bool + idx int // The current offset inside the bs. + footerAddr int + base uint64 + numBlock int + blockIdx int // The current block idx. + nextBlock int + width int + prel int + addrMask uint32 +} + +func newBaseDeltaBlock16PostingsV2(bstream []byte) *baseDeltaBlock16PostingsV2 { + x := binary.BigEndian.Uint32(bstream) // Read the footer address. + width := int(bstream[8+int(x)]) + return &baseDeltaBlock16PostingsV2{bs: bstream[8:], numBlock: int(binary.BigEndian.Uint32(bstream[4:])), footerAddr: int(x), width: width, prel: 4 - width, addrMask: uint32((1 << (8 * uint(width))) - 1)} +} + +func (it *baseDeltaBlock16PostingsV2) At() uint64 { + return it.cur +} + +func (it *baseDeltaBlock16PostingsV2) Next() bool { + if it.inside { // Already entered the block. + if it.idx < it.nextBlock { + it.cur = it.base + uint64(binary.BigEndian.Uint16(it.bs[it.idx:])) + it.idx += 2 + return true + } + it.blockIdx += 1 // Go to the next block. + } + // Currently not entered any block. + if it.idx < it.footerAddr { + val, size := binary.Uvarint(it.bs[it.idx:]) // Read the base. + it.base = val + it.idx += size + it.inside = true + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + it.cur = it.base + return true + } else { + return false + } +} + +func (it *baseDeltaBlock16PostingsV2) seekInBlock(x uint64) bool { + temp := x - it.base + num := (it.nextBlock - it.idx) >> 1 + j := sort.Search(num, func(i int) bool { + return uint64(binary.BigEndian.Uint16(it.bs[it.idx+(i<<1):])) >= temp + }) + if j == num { + // Fast-path to the next block. + // The first element in next block should be >= x. + it.idx = it.nextBlock + it.blockIdx += 1 + if it.idx < it.footerAddr { + val, size := binary.Uvarint(it.bs[it.idx:]) + it.base = val + it.idx += size + it.inside = true + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + it.cur = it.base + return true + } else { + return false + } + } + it.cur = it.base + uint64(binary.BigEndian.Uint16(it.bs[it.idx+(j<<1):])) + it.idx += (j + 1) << 1 + return true +} + +func (it *baseDeltaBlock16PostingsV2) Seek(x uint64) bool { + if it.cur >= x { + return true + } + if it.inside && bits.Len64(x - it.base) <= 16 { + // Fast path for x in current block. + return it.seekInBlock(x) + } else { + i := sort.Search(it.numBlock-it.blockIdx, func(i int) bool { + off := int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+i)*it.width-it.prel:]) & it.addrMask) + k, _ := binary.Uvarint(it.bs[off:]) + return k > x + }) + if i > 0 { + i -= 1 + } + it.blockIdx += i + if i != 0 { // i > 0. + it.inside = false + it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+it.blockIdx*it.width-it.prel:]) & it.addrMask) + } + } + val, size := binary.Uvarint(it.bs[it.idx:]) + it.base = val + it.idx += size + it.inside = true + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + if it.base >= x { + it.cur = it.base + return true + } + + // If the length of the diff larger than 16, directly go to the next block + // because the first value of the next block should be >= x. + if bits.Len64(x - val) > 16 { + if it.blockIdx == it.numBlock-1 { + return false + } else { + it.blockIdx += 1 + it.idx = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+it.blockIdx*it.width-it.prel:]) & it.addrMask) + val, size := binary.Uvarint(it.bs[it.idx:]) + it.base = val + it.idx += size + it.inside = true + if it.blockIdx != it.numBlock-1 { + it.nextBlock = int(binary.BigEndian.Uint32(it.bs[it.footerAddr+1+(it.blockIdx+1)*it.width-it.prel:]) & it.addrMask) + } else { + it.nextBlock = it.footerAddr + } + it.cur = it.base + uint64(binary.BigEndian.Uint16(it.bs[it.idx:])) + it.idx += 2 + return true + } + } + return it.seekInBlock(x) +} + +func (it *baseDeltaBlock16PostingsV2) Err() error { + return nil +} + +func writeBaseDelta16BlockV2(e *encoding.Encbuf, vals []uint32, base uint32) { + e.PutUvarint32(base) + c := make([]byte, 2) + for _, val := range vals { + binary.BigEndian.PutUint16(c[:], uint16(val)) + e.PutByte(c[0]) + e.PutByte(c[1]) + } +} + +func writeBaseDeltaBlock16PostingsV2(e *encoding.Encbuf, arr []uint32) { + var base uint32 + var idx int // Index of current element in arr. + var startingOffs []uint32 // The starting offsets of each block. + var vals []uint32 // The converted values in the current block. + startOff := len(e.Get()) + e.PutBE32(0) // Footer starting offset. + e.PutBE32(0) // Number of blocks. + base = arr[idx] + idx += 1 + for idx < len(arr) { + delta := arr[idx] - base + if bits.Len32(delta) > 16 { + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta16BlockV2(e, vals, base) + base = arr[idx] + idx += 1 + vals = vals[:0] + continue + } + vals = append(vals, delta) + idx += 1 + } + startingOffs = append(startingOffs, uint32(len(e.B))) + writeBaseDelta16BlockV2(e, vals, base) + + binary.BigEndian.PutUint32(e.B[startOff:], uint32(len(e.B)-8-startOff)) // Put footer starting offset. + binary.BigEndian.PutUint32(e.B[startOff+4:], uint32(len(startingOffs))) // Put number of blocks. + width := bits.Len32(startingOffs[len(startingOffs)-1] - 8 - uint32(startOff)) + if width == 0 { + // key 0 will result in 0 width. + width += 1 + } + + e.PutByte(byte((width + 7) / 8)) + for _, off := range startingOffs { + putBytes(e, off-8-uint32(startOff), (width+7)/8) + } +} diff --git a/index/postings_test.go b/index/postings_test.go index 1eed1dbf..4990bc8d 100644 --- a/index/postings_test.go +++ b/index/postings_test.go @@ -14,12 +14,17 @@ package index import ( + "bufio" "encoding/binary" "fmt" + "math/bits" "math/rand" + "os" "sort" + "strconv" "testing" + "github.com/prometheus/tsdb/encoding" "github.com/prometheus/tsdb/testutil" ) @@ -718,6 +723,1763 @@ func TestBigEndian(t *testing.T) { }) } +func TestBaseDeltaPostings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(25)) + 2 + } + + width := (bits.Len32(ls[len(ls)-1] - ls[0]) + 7) >> 3 + buf := encoding.Encbuf{} + for i := 0; i < 8 - width; i ++ { + buf.B = append(buf.B, 0) + } + for i := 0; i < num; i++ { + for j := width - 1; j >= 0; j-- { + buf.B = append(buf.B, byte(((ls[i]-ls[0])>>(8*uint(j))&0xff))) + } + } + // t.Log("(baseDeltaPostings) len of 1000 number = ", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + bdp := newBaseDeltaPostings(buf.Get(), uint64(ls[0]), width, len(ls)) + for i := 0; i < num; i++ { + testutil.Assert(t, bdp.Next() == true, "") + testutil.Equals(t, uint64(ls[i]), bdp.At()) + } + + testutil.Assert(t, bdp.Next() == false, "") + testutil.Assert(t, bdp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + bdp := newBaseDeltaPostings(buf.Get(), uint64(ls[0]), width, len(ls)) + + for _, v := range table { + testutil.Equals(t, v.found, bdp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), bdp.At()) + testutil.Assert(t, bdp.Err() == nil, "") + } + }) +} + +func TestDeltaBlockPostings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(25)) + 2 + } + + buf := encoding.Encbuf{} + writeDeltaBlockPostings(&buf, ls) + // t.Log("(deltaBlockPostings) len of 1000 number = ", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + dbp := newDeltaBlockPostings(buf.Get(), len(ls)) + for i := 0; i < num; i++ { + testutil.Assert(t, dbp.Next() == true, "") + if uint64(ls[i]) != dbp.At() { + t.Log(i, dbp.GetOff(), "width=", dbp.GetWidth()) + } + testutil.Equals(t, uint64(ls[i]), dbp.At()) + } + + testutil.Assert(t, dbp.Next() == false, "") + testutil.Assert(t, dbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + dbp := newDeltaBlockPostings(buf.Get(), len(ls)) + + for _, v := range table { + testutil.Equals(t, v.found, dbp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), dbp.At()) + testutil.Assert(t, dbp.Err() == nil, "") + } + }) +} + +func TestBaseDeltaBlockPostings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(25)) + 2 + } + + buf := encoding.Encbuf{} + writeBaseDeltaBlockPostings(&buf, ls) + // t.Log("(deltaBlockPostings) len of 1000 number = ", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + dbp := newBaseDeltaBlockPostings(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, dbp.Next() == true, "") + testutil.Equals(t, uint64(ls[i]), dbp.At()) + } + + testutil.Assert(t, dbp.Next() == false, "") + testutil.Assert(t, dbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + dbp := newBaseDeltaBlockPostings(buf.Get()) + + for _, v := range table { + // fmt.Println(i) + testutil.Equals(t, v.found, dbp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), dbp.At()) + testutil.Assert(t, dbp.Err() == nil, "") + } + }) +} + +func TestBitmapPostings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(25)) + 2 + // ls[i] = ls[i-1] + 2 + } + + buf := encoding.Encbuf{} + writeBitmapPostings(&buf, ls) + // t.Log("len", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + bp := newBitmapPostings(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, bp.Next() == true, "") + // t.Log("ls[i] =", ls[i], "bp.At() =", bp.At()) + testutil.Equals(t, uint64(ls[i]), bp.At()) + } + + testutil.Assert(t, bp.Next() == false, "") + testutil.Assert(t, bp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + bp := newBitmapPostings(buf.Get()) + + for _, v := range table { + testutil.Equals(t, v.found, bp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), bp.At()) + testutil.Assert(t, bp.Err() == nil, "") + } + }) +} + +func TestRoaringBitmapPostings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(15)) + 2 + // ls[i] = ls[i-1] + 10 + } + + buf := encoding.Encbuf{} + writeRoaringBitmapPostings(&buf, ls) + // t.Log("len", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + rbp := newRoaringBitmapPostings(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, rbp.Next() == true, "") + if uint64(ls[i]) != rbp.At() { + t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At(), " i =", i) + } + testutil.Equals(t, uint64(ls[i]), rbp.At()) + } + + testutil.Assert(t, rbp.Next() == false, "") + testutil.Assert(t, rbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + rbp := newRoaringBitmapPostings(buf.Get()) + + for _, v := range table { + // t.Log("i", i) + testutil.Equals(t, v.found, rbp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), rbp.At()) + testutil.Assert(t, rbp.Err() == nil, "") + } + }) +} + +func TestBaseDeltaBlock8Postings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(15)) + 2 + // ls[i] = ls[i-1] + 10 + } + + buf := encoding.Encbuf{} + writeBaseDeltaBlock8Postings(&buf, ls) + // t.Log("len", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + rbp := newBaseDeltaBlock8Postings(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, rbp.Next() == true, "") + if uint64(ls[i]) != rbp.At() { + t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At(), " i =", i) + } + testutil.Equals(t, uint64(ls[i]), rbp.At()) + } + + testutil.Assert(t, rbp.Next() == false, "") + testutil.Assert(t, rbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + rbp := newBaseDeltaBlock8Postings(buf.Get()) + + for _, v := range table { + // t.Log("i", i) + testutil.Equals(t, v.found, rbp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), rbp.At()) + testutil.Assert(t, rbp.Err() == nil, "") + } + }) +} + +func TestBaseDeltaBlock16Postings(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(15)) + 2 + // ls[i] = ls[i-1] + 10 + } + + buf := encoding.Encbuf{} + writeBaseDeltaBlock16Postings(&buf, ls) + // t.Log("len", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + rbp := newBaseDeltaBlock16Postings(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, rbp.Next() == true, "") + if uint64(ls[i]) != rbp.At() { + t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At(), " i =", i) + } + testutil.Equals(t, uint64(ls[i]), rbp.At()) + } + + testutil.Assert(t, rbp.Next() == false, "") + testutil.Assert(t, rbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + rbp := newBaseDeltaBlock16Postings(buf.Get()) + + for _, v := range table { + // t.Log("i", i) + testutil.Equals(t, v.found, rbp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), rbp.At()) + testutil.Assert(t, rbp.Err() == nil, "") + } + }) +} + +func TestBaseDeltaBlock16PostingsV2(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(15)) + 2 + // ls[i] = ls[i-1] + 10 + } + + buf := encoding.Encbuf{} + writeBaseDeltaBlock16PostingsV2(&buf, ls) + // t.Log("len", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + rbp := newBaseDeltaBlock16PostingsV2(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, rbp.Next() == true, "") + if uint64(ls[i]) != rbp.At() { + t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At(), " i =", i) + } + testutil.Equals(t, uint64(ls[i]), rbp.At()) + } + + testutil.Assert(t, rbp.Next() == false, "") + testutil.Assert(t, rbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + rbp := newBaseDeltaBlock16PostingsV2(buf.Get()) + + for _, v := range table { + // t.Log("i", i) + testutil.Equals(t, v.found, rbp.Seek(uint64(v.seek))) + testutil.Equals(t, uint64(v.val), rbp.At()) + testutil.Assert(t, rbp.Err() == nil, "") + } + }) +} + +func TestRoaringBitmapPostings64(t *testing.T) { + num := 1000 + // mock a list as postings + ls := make([]uint64, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint64(rand.Int63n(15)) + 2 + // ls[i] = ls[i-1] + 10 + } + + buf := encoding.Encbuf{} + writeRoaringBitmapPostings64(&buf, ls) + // t.Log("len", len(buf.Get())) + + t.Run("Iteration", func(t *testing.T) { + rbp := newRoaringBitmapPostings(buf.Get()) + for i := 0; i < num; i++ { + testutil.Assert(t, rbp.Next() == true, "") + // t.Log("ls[i] =", ls[i], "rbp.At() =", rbp.At()) + testutil.Equals(t, ls[i], rbp.At()) + } + + testutil.Assert(t, rbp.Next() == false, "") + testutil.Assert(t, rbp.Err() == nil, "") + }) + + t.Run("Seek", func(t *testing.T) { + table := []struct { + seek uint64 + val uint64 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[4], ls[4], true, + }, + { + ls[500] - 1, ls[500], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[600] + 1, ls[601], true, + }, + { + ls[0], ls[601], true, + }, + { + ls[600], ls[601], true, + }, + { + ls[999], ls[999], true, + }, + { + ls[999] + 10, ls[999], false, + }, + } + + rbp := newRoaringBitmapPostings(buf.Get()) + + for _, v := range table { + // t.Log("i", i) + testutil.Equals(t, v.found, rbp.Seek(v.seek)) + testutil.Equals(t, v.val, rbp.At()) + testutil.Assert(t, rbp.Err() == nil, "") + } + }) +} + +func BenchmarkRandomPostings(b *testing.B) { + num := 100000 + ls := make([]uint32, num) + existedNum := make(map[uint32]struct{}) + for i := 0; i < num; i++ { + for { + x := uint32(rand.Int31n(1000000)) + if _, ok := existedNum[x]; !ok { + ls[i] = x + existedNum[x] = struct{}{} + break + } + } + } + sort.Sort(uint32slice(ls)) + + // bigEndianPostings. + bufBE := make([]byte, num*4) + for i := 0; i < num; i++ { + b := bufBE[i*4 : i*4+4] + binary.BigEndian.PutUint32(b, ls[i]) + } + b.Log("bigEndianPostings size =", len(bufBE)) + + bufBDB16 := encoding.Encbuf{} + temp := make([]uint64, 0, len(ls)) + for _, x := range ls { + temp = append(temp, uint64(x)) + } + writeBaseDeltaBlock16Postings64(&bufBDB16, temp) + b.Log("baseDeltaBlock16Postings (64bit)", len(bufBDB16.Get())) + + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] + 1, ls[1], true, + }, + { + ls[1000], ls[1000], true, + }, + { + ls[1001], ls[1001], true, + }, + { + ls[2000]+1, ls[2001], true, + }, + { + ls[3000], ls[3000], true, + }, + { + ls[3001], ls[3001], true, + }, + { + ls[4000]+1, ls[4001], true, + }, + { + ls[5000], ls[5000], true, + }, + { + ls[5001], ls[5001], true, + }, + { + ls[6000]+1, ls[6001], true, + }, + { + ls[10000], ls[10000], true, + }, + { + ls[10001], ls[10001], true, + }, + { + ls[20000]+1, ls[20001], true, + }, + { + ls[30000], ls[30000], true, + }, + { + ls[30001], ls[30001], true, + }, + { + ls[40000]+1, ls[40001], true, + }, + { + ls[50000], ls[50000], true, + }, + { + ls[50001], ls[50001], true, + }, + { + ls[60000]+1, ls[60001], true, + }, + { + ls[70000], ls[70000], true, + }, + { + ls[70001], ls[70001], true, + }, + { + ls[80000]+1, ls[80001], true, + }, + { + ls[99999], ls[99999], true, + }, + { + ls[99999] + 10, ls[99999], false, + }, + } + + b.Run("bigEndianIteration", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for i := 0; i < num; i++ { + testutil.Assert(bench, bep.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), bep.At()) + } + testutil.Assert(bench, bep.Next() == false, "") + testutil.Assert(bench, bep.Err() == nil, "") + } + }) + b.Run("baseDeltaBlock16PostingsIteration (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for i := 0; i < num; i++ { + testutil.Assert(bench, rbm.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), rbm.At()) + } + testutil.Assert(bench, rbm.Next() == false, "") + testutil.Assert(bench, rbm.Err() == nil, "") + } + }) + + b.Run("bigEndianSeek", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, bep.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), bep.At()) + testutil.Assert(bench, bep.Err() == nil, "") + } + } + }) + b.Run("baseDeltaBlock16PostingsSeek (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), rbm.At()) + testutil.Assert(bench, rbm.Err() == nil, "") + } + } + }) +} + +func BenchmarkRealPostings(b *testing.B) { + file, err := os.Open("../../realWorldPostings.txt") + if err != nil { + panic(err) + } + defer file.Close() + + var ls []uint32 + scanner := bufio.NewScanner(file) + for scanner.Scan() { + x, err := strconv.Atoi(scanner.Text()) + if err != nil { + panic(err) + } + ls = append(ls, uint32(x)) + } + if err := scanner.Err(); err != nil { + panic(err) + } + + // bigEndianPostings. + bufBE := make([]byte, len(ls)*4) + for i := 0; i < len(ls); i++ { + b := bufBE[i*4 : i*4+4] + binary.BigEndian.PutUint32(b, ls[i]) + } + b.Log("bigEndianPostings size =", len(bufBE)) + + width := (bits.Len32(ls[len(ls)-1] - ls[0]) + 7) >> 3 + bufBD := encoding.Encbuf{} + for i := 0; i < 8 - width; i ++ { + bufBD.B = append(bufBD.B, 0) + } + for i := 0; i < len(ls); i++ { + for j := width - 1; j >= 0; j-- { + bufBD.B = append(bufBD.B, byte(((ls[i]-ls[0])>>(8*uint(j))&0xff))) + } + // bufBD.PutBits(uint64(ls[i]-ls[0]), width) + } + b.Log("baseDeltaPostings size =", len(bufBD.Get())) + + bufBDB16 := encoding.Encbuf{} + temp := make([]uint64, 0, len(ls)) + for _, x := range ls { + temp = append(temp, uint64(x)) + } + writeBaseDeltaBlock16Postings64(&bufBDB16, temp) + b.Log("baseDeltaBlock16Postings (64bit)", len(bufBDB16.Get())) + + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] + 1, ls[1], true, + }, + { + ls[1000], ls[1000], true, + }, + { + ls[1001], ls[1001], true, + }, + { + ls[2000]+1, ls[2001], true, + }, + { + ls[3000], ls[3000], true, + }, + { + ls[3001], ls[3001], true, + }, + { + ls[4000]+1, ls[4001], true, + }, + { + ls[5000], ls[5000], true, + }, + { + ls[5001], ls[5001], true, + }, + { + ls[6000]+1, ls[6001], true, + }, + { + ls[10000], ls[10000], true, + }, + { + ls[10001], ls[10001], true, + }, + { + ls[20000]+1, ls[20001], true, + }, + { + ls[30000], ls[30000], true, + }, + { + ls[30001], ls[30001], true, + }, + { + ls[40000]+1, ls[40001], true, + }, + { + ls[50000], ls[50000], true, + }, + { + ls[50001], ls[50001], true, + }, + { + ls[60000]+1, ls[60001], true, + }, + { + ls[70000], ls[70000], true, + }, + { + ls[70001], ls[70001], true, + }, + { + ls[80000]+1, ls[80001], true, + }, + { + ls[100000], ls[100000], true, + }, + { + ls[150000]+1, ls[150001], true, + }, + { + ls[200000], ls[200000], true, + }, + { + ls[250000]+1, ls[250001], true, + }, + { + ls[300000], ls[300000], true, + }, + } + b.Run("bigEndianIteration", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for i := 0; i < len(ls); i++ { + testutil.Assert(bench, bep.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), bep.At()) + } + testutil.Assert(bench, bep.Next() == false, "") + testutil.Assert(bench, bep.Err() == nil, "") + } + }) + b.Run("baseDeltaIteration", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bdp := newBaseDeltaPostings(bufBD.Get(), uint64(ls[0]), width, len(ls)) + // bench.StartTimer() + + for i := 0; i < len(ls); i++ { + testutil.Assert(bench, bdp.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), bdp.At()) + } + testutil.Assert(bench, bdp.Next() == false, "") + testutil.Assert(bench, bdp.Err() == nil, "") + } + }) + b.Run("baseDeltaBlock16PostingsIteration (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for i := 0; i < len(ls); i++ { + testutil.Assert(bench, rbm.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), rbm.At()) + } + testutil.Assert(bench, rbm.Next() == false, "") + testutil.Assert(bench, rbm.Err() == nil, "") + } + }) + + b.Run("bigEndianSeek", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, bep.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), bep.At()) + testutil.Assert(bench, bep.Err() == nil, "") + } + } + }) + b.Run("baseDeltaSeek", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bdp := newBaseDeltaPostings(bufBD.Get(), uint64(ls[0]), width, len(ls)) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, bdp.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), bdp.At()) + testutil.Assert(bench, bdp.Err() == nil, "") + } + } + }) + b.Run("baseDeltaBlock16PostingsSeek (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), rbm.At()) + testutil.Assert(bench, rbm.Err() == nil, "") + } + } + }) +} + +func BenchmarkRealShortPostings(b *testing.B) { + ls := []uint64{12825376,12825699,12826041,12826364,12826706,12826880,12827211,12827553,12827885,12828225,12828529,12828852,12829194,12829555,12829878,12830239,12830581,12830904,12831265,12831569,12831892,12832234,12832557,12832937,12833351,12833672,12834014,12834299,12834641,12834983,12835306,12835648,12835971,12836313,12836655,12837006,12837346,12837650,12838011,12838334,12838695,12839009,12839330,12839653,12839995,12840308,12840629,12840971,12841313,12841655,12845998,12846017,12846036,12846967,12846986,12847005,12847993,12848012,12848031,12848962,12848981,12849000,12849988,12850007,12850026,12850510,12850519,12850528,12851503,12851522,12851541,12852529,12852548,12852567,12853555,12853574,12853593,12854581,12854600,12854619,12855493,12855512,12855531,12856462,12856481,12856500,12857488,12857507,12857526,12858571,12858590,12858609,12859540,12859559,12859578,12860623,12860642,12860661,12861649,12861668,12861687,12862618,12862637,12862656,12863701,12863720,12863739,12864613,12864632,12864651,12865582,12865601,12865620,12866608,12866627,12866646,12867577,12867596,12867615,12868717,12868736,12868755,12869980,12869999,12870018,12870949,12870968,12870987,12871975,12871994,12872013,12872830,12872849,12872868,12873856,12873875,12873894,12874882,12874901,12874920,12875851,12875870,12875889,12876877,12876896,12876915,12877846,12877865,12877884,12878872,12878891,12878910,12879898,12879917,12879936,12880981,12881000,12881019,12882007,12882026,12882045,12882919,12882938,12882957,12884002,12884021,12884040,12884971,12884990,12885009,12886054,12886073,12886092,12887023,12887042,12887061,12887992,12888011,12888030,12888961,12888980,12888999,12889987,12890006,12890025,12890929,12890947,12890965,12891892,12891911,12891930,12892918,12892937,12892956,12893944,12893963,12893982,12894970,12894989,12895008,12895445,12895768,12896110,12896433,12896775,12896949,12897280,12897622,12897954,12898294,12898598,12898921,12899263,12899624,12899947,12900308,12900650,12900973,12901334,12901638,12901961,12902303,12902626,12903006,12903420,12903741,12904083,12904368,12904710,12905052,12905375,12905717,12906040,12906382,12906724,12907075,12907415,12907719,12908080,12908403,12908764,12909078,12909399,12909722,12910064,12910377,12910698,12911040,12911382,12911724,12912085,12912408,12912750,12913073,12913415,12913589,12913920,12914262,12914594,12914934,12915238,12915561,12915903,12916264,12916587,12916948,12917290,12917613,12917974,12918278,12918601,12918943,12919266,12919646,12920060,12920381,12920723,12921008,12921350,12921692,12922015,12922357,12922680,12923022,12923364,12923715,12924055,12924359,12924720,12925043,12925404,12925718,12926039,12926362,12926704,12927018,12927339,12927681,12928023,12928365} + + bufBE := make([]byte, len(ls)*4) + for i := 0; i < len(ls); i++ { + b := bufBE[i*4 : i*4+4] + binary.BigEndian.PutUint32(b, uint32(ls[i])) + } + b.Log("bigEndianPostings size =", len(bufBE)) + + width := (bits.Len64(ls[len(ls)-1] - ls[0]) + 7) >> 3 + bufBD := encoding.Encbuf{} + for i := 0; i < 8 - width; i ++ { + bufBD.B = append(bufBD.B, 0) + } + for i := 0; i < len(ls); i++ { + for j := width - 1; j >= 0; j-- { + bufBD.B = append(bufBD.B, byte(((ls[i]-ls[0])>>(8*uint(j))&0xff))) + } + // bufBD.PutBits(uint64(ls[i]-ls[0]), width) + } + b.Log("baseDeltaPostings size =", len(bufBD.Get())) + + bufBDB16 := encoding.Encbuf{} + temp := make([]uint64, 0, len(ls)) + for _, x := range ls { + temp = append(temp, uint64(x)) + } + writeBaseDeltaBlock16Postings64(&bufBDB16, temp) + b.Log("baseDeltaBlock16Postings (64bit)", len(bufBDB16.Get())) + + table := []struct { + seek uint64 + val uint64 + found bool + }{ + { + ls[0], ls[0], true, + }, + { + ls[5], ls[5], true, + }, + { + ls[10], ls[10], true, + }, + { + ls[15], ls[15], true, + }, + { + ls[20], ls[20], true, + }, + { + ls[25], ls[25], true, + }, + { + ls[30], ls[30], true, + }, + { + ls[35], ls[35], true, + }, + { + ls[40], ls[40], true, + }, + { + ls[45], ls[45], true, + }, + { + ls[50], ls[50], true, + }, + { + ls[55], ls[55], true, + }, + { + ls[60], ls[60], true, + }, + { + ls[65], ls[65], true, + }, + { + ls[70], ls[70], true, + }, + { + ls[75], ls[75], true, + }, + { + ls[80], ls[80], true, + }, + { + ls[85], ls[85], true, + }, + { + ls[90], ls[90], true, + }, + { + ls[95], ls[95], true, + }, + { + ls[100], ls[100], true, + }, + { + ls[105], ls[105], true, + }, + { + ls[110], ls[110], true, + }, + { + ls[115], ls[115], true, + }, + { + ls[120], ls[120], true, + }, + { + ls[125], ls[125], true, + }, + { + ls[130], ls[130], true, + }, + { + ls[135], ls[135], true, + }, + { + ls[140], ls[140], true, + }, + { + ls[145], ls[145], true, + }, + { + ls[150], ls[150], true, + }, + { + ls[155], ls[155], true, + }, + { + ls[160], ls[160], true, + }, + { + ls[165], ls[165], true, + }, + { + ls[170], ls[170], true, + }, + { + ls[175], ls[175], true, + }, + { + ls[180], ls[180], true, + }, + { + ls[185], ls[185], true, + }, + { + ls[190], ls[190], true, + }, + { + ls[195], ls[195], true, + }, + { + ls[200], ls[200], true, + }, + { + ls[205], ls[205], true, + }, + { + ls[210], ls[210], true, + }, + } + b.Run("bigEndianIteration", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for i := 0; i < len(ls); i++ { + testutil.Assert(bench, bep.Next() == true, "") + testutil.Equals(bench, ls[i], bep.At()) + } + testutil.Assert(bench, bep.Next() == false, "") + testutil.Assert(bench, bep.Err() == nil, "") + } + }) + b.Run("baseDeltaIteration", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bdp := newBaseDeltaPostings(bufBD.Get(), uint64(ls[0]), width, len(ls)) + // bench.StartTimer() + + for i := 0; i < len(ls); i++ { + testutil.Assert(bench, bdp.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), bdp.At()) + } + testutil.Assert(bench, bdp.Next() == false, "") + testutil.Assert(bench, bdp.Err() == nil, "") + } + }) + b.Run("baseDeltaBlock16PostingsIteration (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for i := 0; i < len(ls); i++ { + testutil.Assert(bench, rbm.Next() == true, "") + testutil.Equals(bench, ls[i], rbm.At()) + } + testutil.Assert(bench, rbm.Next() == false, "") + testutil.Assert(bench, rbm.Err() == nil, "") + } + }) + + b.Run("bigEndianSeek", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, bep.Seek(v.seek)) + testutil.Equals(bench, v.val, bep.At()) + testutil.Assert(bench, bep.Err() == nil, "") + } + } + }) + b.Run("baseDeltaSeek", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bdp := newBaseDeltaPostings(bufBD.Get(), uint64(ls[0]), width, len(ls)) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, bdp.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), bdp.At()) + testutil.Assert(bench, bdp.Err() == nil, "") + } + } + }) + b.Run("baseDeltaBlock16PostingsSeek (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, rbm.Seek(v.seek)) + testutil.Equals(bench, v.val, rbm.At()) + testutil.Assert(bench, rbm.Err() == nil, "") + } + } + }) +} + +func BenchmarkPostings(b *testing.B) { + num := 100000 + // mock a list as postings + ls := make([]uint32, num) + ls[0] = 2 + for i := 1; i < num; i++ { + ls[i] = ls[i-1] + uint32(rand.Int31n(25)) + 2 + // ls[i] = ls[i-1] + 2 + } + + // bigEndianPostings. + bufBE := make([]byte, num*4) + for i := 0; i < num; i++ { + b := bufBE[i*4 : i*4+4] + binary.BigEndian.PutUint32(b, ls[i]) + } + b.Log("bigEndianPostings size =", len(bufBE)) + + // baseDeltaPostings. + width := (bits.Len32(ls[len(ls)-1] - ls[0]) + 7) >> 3 + bufBD := encoding.Encbuf{} + for i := 0; i < 8 - width; i ++ { + bufBD.B = append(bufBD.B, 0) + } + for i := 0; i < num; i++ { + for j := width - 1; j >= 0; j-- { + bufBD.B = append(bufBD.B, byte(((ls[i]-ls[0])>>(8*uint(j))&0xff))) + } + // bufBD.PutBits(uint64(ls[i]-ls[0]), width) + } + b.Log("baseDeltaPostings size =", len(bufBD.Get())) + + // deltaBlockPostings. + bufDB := encoding.Encbuf{} + writeDeltaBlockPostings(&bufDB, ls) + b.Log("deltaBlockPostings size =", len(bufDB.Get())) + + // baseDeltaBlockPostings. + bufBDB := encoding.Encbuf{} + writeBaseDeltaBlockPostings(&bufBDB, ls) + b.Log("baseDeltaBlockPostings size =", len(bufBDB.Get())) + + // bitmapPostings. + bufBM := encoding.Encbuf{} + writeBitmapPostings(&bufBM, ls) + b.Log("bitmapPostings bits", bitmapBits, "size =", len(bufBM.Get())) + + // roaringBitmapPostings. + bufRBM := encoding.Encbuf{} + writeRoaringBitmapPostings(&bufRBM, ls) + b.Log("roaringBitmapPostings bits", bitmapBits, "size =", len(bufRBM.Get())) + + bufRBM2 := encoding.Encbuf{} + writeBaseDeltaBlock8Postings(&bufRBM2, ls) + b.Log("baseDeltaBlock8Postings", len(bufRBM2.Get())) + + bufRBM3 := encoding.Encbuf{} + writeBaseDeltaBlock16Postings(&bufRBM3, ls) + b.Log("baseDeltaBlock16Postings", len(bufRBM3.Get())) + + bufBDB16 := encoding.Encbuf{} + temp := make([]uint64, 0, len(ls)) + for _, x := range ls { + temp = append(temp, uint64(x)) + } + writeBaseDeltaBlock16Postings64(&bufBDB16, temp) + b.Log("baseDeltaBlock16Postings (64bit)", len(bufBDB16.Get())) + + bufRBM4 := encoding.Encbuf{} + writeBaseDeltaBlock16PostingsV2(&bufRBM4, ls) + b.Log("baseDeltaBlock16PostingsV2", len(bufRBM4.Get())) + + table := []struct { + seek uint32 + val uint32 + found bool + }{ + { + ls[0] - 1, ls[0], true, + }, + { + ls[1000], ls[1000], true, + }, + { + ls[1001], ls[1001], true, + }, + { + ls[2000]+1, ls[2001], true, + }, + { + ls[3000], ls[3000], true, + }, + { + ls[3001], ls[3001], true, + }, + { + ls[4000]+1, ls[4001], true, + }, + { + ls[5000], ls[5000], true, + }, + { + ls[5001], ls[5001], true, + }, + { + ls[6000]+1, ls[6001], true, + }, + { + ls[10000], ls[10000], true, + }, + { + ls[10001], ls[10001], true, + }, + { + ls[20000]+1, ls[20001], true, + }, + { + ls[30000], ls[30000], true, + }, + { + ls[30001], ls[30001], true, + }, + { + ls[40000]+1, ls[40001], true, + }, + { + ls[50000], ls[50000], true, + }, + { + ls[50001], ls[50001], true, + }, + { + ls[60000]+1, ls[60001], true, + }, + { + ls[70000], ls[70000], true, + }, + { + ls[70001], ls[70001], true, + }, + { + ls[80000]+1, ls[80001], true, + }, + { + ls[99999], ls[99999], true, + }, + { + ls[99999] + 10, ls[99999], false, + }, + } + + b.Run("bigEndianIteration", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for i := 0; i < num; i++ { + testutil.Assert(bench, bep.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), bep.At()) + } + testutil.Assert(bench, bep.Next() == false, "") + testutil.Assert(bench, bep.Err() == nil, "") + } + }) + // b.Run("baseDeltaIteration", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // bdp := newBaseDeltaPostings(bufBD.Get(), uint64(ls[0]), width, len(ls)) + // // bench.StartTimer() + + // for i := 0; i < num; i++ { + // testutil.Assert(bench, bdp.Next() == true, "") + // testutil.Equals(bench, uint64(ls[i]), bdp.At()) + // } + // testutil.Assert(bench, bdp.Next() == false, "") + // testutil.Assert(bench, bdp.Err() == nil, "") + // } + // }) + // b.Run("baseDeltaBlockIteration", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // bdbp := newBaseDeltaBlockPostings(bufBDB.Get()) + // // bench.StartTimer() + + // for i := 0; i < num; i++ { + // testutil.Assert(bench, bdbp.Next() == true, "") + // testutil.Equals(bench, uint64(ls[i]), bdbp.At()) + // } + // testutil.Assert(bench, bdbp.Next() == false, "") + // testutil.Assert(bench, bdbp.Err() == nil, "") + // } + // }) + // b.Run("roaringBitmapPostingsIteration", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newRoaringBitmapPostings(bufRBM.Get()) + // // bench.StartTimer() + + // for i := 0; i < num; i++ { + // testutil.Assert(bench, rbm.Next() == true, "") + // testutil.Equals(bench, uint64(ls[i]), rbm.At()) + // } + // testutil.Assert(bench, rbm.Next() == false, "") + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // }) + // b.Run("baseDeltaBlock8PostingsIteration", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newBaseDeltaBlock8Postings(bufRBM2.Get()) + // // bench.StartTimer() + + // for i := 0; i < num; i++ { + // testutil.Assert(bench, rbm.Next() == true, "") + // testutil.Equals(bench, uint64(ls[i]), rbm.At()) + // } + // testutil.Assert(bench, rbm.Next() == false, "") + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // }) + // b.Run("baseDeltaBlock16PostingsIteration", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newBaseDeltaBlock16Postings(bufRBM3.Get()) + // // bench.StartTimer() + + // for i := 0; i < num; i++ { + // testutil.Assert(bench, rbm.Next() == true, "") + // testutil.Equals(bench, uint64(ls[i]), rbm.At()) + // } + // testutil.Assert(bench, rbm.Next() == false, "") + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // }) + b.Run("baseDeltaBlock16PostingsIteration (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for i := 0; i < num; i++ { + testutil.Assert(bench, rbm.Next() == true, "") + testutil.Equals(bench, uint64(ls[i]), rbm.At()) + } + testutil.Assert(bench, rbm.Next() == false, "") + testutil.Assert(bench, rbm.Err() == nil, "") + } + }) + // b.Run("baseDeltaBlock16PostingsV2Iteration", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newBaseDeltaBlock16PostingsV2(bufRBM4.Get()) + // // bench.StartTimer() + + // for i := 0; i < num; i++ { + // testutil.Assert(bench, rbm.Next() == true, "") + // testutil.Equals(bench, uint64(ls[i]), rbm.At()) + // } + // testutil.Assert(bench, rbm.Next() == false, "") + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // }) + + b.Run("bigEndianSeek", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + bep := newBigEndianPostings(bufBE) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, bep.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), bep.At()) + testutil.Assert(bench, bep.Err() == nil, "") + } + } + }) + // b.Run("baseDeltaSeek", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // bdp := newBaseDeltaPostings(bufBD.Get(), uint64(ls[0]), width, len(ls)) + // // bench.StartTimer() + + // for _, v := range table { + // testutil.Equals(bench, v.found, bdp.Seek(uint64(v.seek))) + // testutil.Equals(bench, uint64(v.val), bdp.At()) + // testutil.Assert(bench, bdp.Err() == nil, "") + // } + // } + // }) + // b.Run("baseDeltaBlockSeek", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // bdbp := newBaseDeltaBlockPostings(bufBDB.Get()) + // // bench.StartTimer() + + // for _, v := range table { + // testutil.Equals(bench, v.found, bdbp.Seek(uint64(v.seek))) + // testutil.Equals(bench, uint64(v.val), bdbp.At()) + // testutil.Assert(bench, bdbp.Err() == nil, "") + // } + // } + // }) + // b.Run("roaringBitmapPostingsSeek", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newRoaringBitmapPostings(bufRBM.Get()) + // // bench.StartTimer() + + // for _, v := range table { + // testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + // testutil.Equals(bench, uint64(v.val), rbm.At()) + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // } + // }) + // b.Run("baseDeltaBlock8PostingsSeek", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newBaseDeltaBlock8Postings(bufRBM2.Get()) + // // bench.StartTimer() + + // for _, v := range table { + // testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + // testutil.Equals(bench, uint64(v.val), rbm.At()) + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // } + // }) + // b.Run("baseDeltaBlock16PostingsSeek", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newBaseDeltaBlock16Postings(bufRBM3.Get()) + // // bench.StartTimer() + + // for _, v := range table { + // testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + // testutil.Equals(bench, uint64(v.val), rbm.At()) + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // } + // }) + b.Run("baseDeltaBlock16PostingsSeek (64bit)", func(bench *testing.B) { + bench.ResetTimer() + bench.ReportAllocs() + for j := 0; j < bench.N; j++ { + // bench.StopTimer() + rbm := newBaseDeltaBlock16Postings(bufBDB16.Get()) + // bench.StartTimer() + + for _, v := range table { + testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + testutil.Equals(bench, uint64(v.val), rbm.At()) + testutil.Assert(bench, rbm.Err() == nil, "") + } + } + }) + // b.Run("baseDeltaBlock16PostingsV2Seek", func(bench *testing.B) { + // bench.ResetTimer() + // bench.ReportAllocs() + // for j := 0; j < bench.N; j++ { + // // bench.StopTimer() + // rbm := newBaseDeltaBlock16PostingsV2(bufRBM4.Get()) + // // bench.StartTimer() + + // for _, v := range table { + // testutil.Equals(bench, v.found, rbm.Seek(uint64(v.seek))) + // testutil.Equals(bench, uint64(v.val), rbm.At()) + // testutil.Assert(bench, rbm.Err() == nil, "") + // } + // } + // }) +} + func TestIntersectWithMerge(t *testing.T) { // One of the reproducible cases for: // https://github.com/prometheus/prometheus/issues/2616