Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 598c024

Browse files
committed
sort symbols in order of frequency rather than lexicographically
1 parent ffe73cd commit 598c024

File tree

7 files changed

+60
-44
lines changed

7 files changed

+60
-44
lines changed

block.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434
type IndexWriter interface {
3535
// AddSymbols registers all string symbols that are encountered in series
3636
// and other indices.
37-
AddSymbols(sym map[string]struct{}) error
37+
AddSymbols(sym map[string]int) error
3838

3939
// AddSeries populates the index writer with a series and its offsets
4040
// of chunks that the index can reference.
@@ -61,7 +61,7 @@ type IndexWriter interface {
6161
type IndexReader interface {
6262
// Symbols returns a set of string symbols that may occur in series' labels
6363
// and indices.
64-
Symbols() (map[string]struct{}, error)
64+
Symbols() (map[string]int, error)
6565

6666
// LabelValues returns the possible label values.
6767
LabelValues(names ...string) (index.StringTuples, error)
@@ -348,7 +348,7 @@ type blockIndexReader struct {
348348
b *Block
349349
}
350350

351-
func (r blockIndexReader) Symbols() (map[string]struct{}, error) {
351+
func (r blockIndexReader) Symbols() (map[string]int, error) {
352352
s, err := r.ir.Symbols()
353353
return s, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
354354
}

compact.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
485485
func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter) error {
486486
var (
487487
set ChunkSeriesSet
488-
allSymbols = make(map[string]struct{}, 1<<16)
488+
allSymbols = make(map[string]int, 1<<16)
489489
closers = []io.Closer{}
490490
)
491491
defer func() { closeAll(closers...) }()
@@ -514,7 +514,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
514514
return errors.Wrap(err, "read symbols")
515515
}
516516
for s := range symbols {
517-
allSymbols[s] = struct{}{}
517+
allSymbols[s] = symbols[s]
518518
}
519519

520520
all, err := indexr.Postings(index.AllPostingsKey())

head.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ type Head struct {
6464
series *stripeSeries
6565

6666
symMtx sync.RWMutex
67-
symbols map[string]struct{}
67+
symbols map[string]int
6868
values map[string]stringset // label names to possible values
6969

7070
postings *index.MemPostings // postings lists for terms
@@ -187,7 +187,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (
187187
maxTime: math.MinInt64,
188188
series: newStripeSeries(),
189189
values: map[string]stringset{},
190-
symbols: map[string]struct{}{},
190+
symbols: make(map[string]int),
191191
postings: index.NewUnorderedMemPostings(),
192192
tombstones: memTombstones{},
193193
}
@@ -623,12 +623,12 @@ func (h *Head) gc() {
623623
h.postings.Delete(deleted)
624624

625625
// Rebuild symbols and label value indices from what is left in the postings terms.
626-
symbols := make(map[string]struct{})
626+
symbols := make(map[string]int)
627627
values := make(map[string]stringset, len(h.values))
628628

629629
h.postings.Iter(func(t labels.Label, _ index.Postings) error {
630-
symbols[t.Name] = struct{}{}
631-
symbols[t.Value] = struct{}{}
630+
symbols[t.Name]++
631+
symbols[t.Value]++
632632

633633
ss, ok := values[t.Name]
634634
if !ok {
@@ -775,14 +775,14 @@ func (h *headIndexReader) Close() error {
775775
return nil
776776
}
777777

778-
func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
778+
func (h *headIndexReader) Symbols() (map[string]int, error) {
779779
h.head.symMtx.RLock()
780780
defer h.head.symMtx.RUnlock()
781781

782-
res := make(map[string]struct{}, len(h.head.symbols))
782+
res := make(map[string]int, len(h.head.symbols))
783783

784784
for s := range h.head.symbols {
785-
res[s] = struct{}{}
785+
res[s] = 0
786786
}
787787
return res, nil
788788
}
@@ -914,8 +914,8 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
914914
}
915915
valset.set(l.Value)
916916

917-
h.symbols[l.Name] = struct{}{}
918-
h.symbols[l.Value] = struct{}{}
917+
h.symbols[l.Name]++
918+
h.symbols[l.Value]++
919919
}
920920

921921
return s, true

head_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,12 @@ func TestHead_Truncate(t *testing.T) {
178178
testutil.Assert(t, postingsB2 == nil, "")
179179
testutil.Assert(t, postingsC1 == nil, "")
180180

181-
testutil.Equals(t, map[string]struct{}{
182-
"": struct{}{}, // from 'all' postings list
183-
"a": struct{}{},
184-
"b": struct{}{},
185-
"1": struct{}{},
186-
"2": struct{}{},
181+
testutil.Equals(t, map[string]int{
182+
"": 2, // from 'all' postings list
183+
"a": 2,
184+
"b": 1,
185+
"1": 2,
186+
"2": 1,
187187
}, h.symbols)
188188

189189
testutil.Equals(t, map[string]stringset{

index/index.go

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ func (s indexWriterSeriesSlice) Less(i, j int) bool {
5454
return labels.Compare(s[i].labels, s[j].labels) < 0
5555
}
5656

57+
type symbolFrequencyPair struct {
58+
symbol string
59+
frequency int
60+
}
61+
62+
type symbolFrequencylist []symbolFrequencyPair
63+
64+
func (s symbolFrequencylist) Len() int { return len(s) }
65+
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
66+
func (s symbolFrequencylist) Less(i, j int) bool { return s[i].frequency < s[j].frequency }
67+
5768
type indexWriterStage uint8
5869

5970
const (
@@ -321,17 +332,17 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
321332
return nil
322333
}
323334

324-
func (w *Writer) AddSymbols(sym map[string]struct{}) error {
335+
func (w *Writer) AddSymbols(sym map[string]int) error {
325336
if err := w.ensureStage(idxStageSymbols); err != nil {
326337
return err
327338
}
328339
// Generate sorted list of strings we will store as reference table.
329-
symbols := make([]string, 0, len(sym))
340+
symbols := make(symbolFrequencylist, 0, len(sym))
330341

331-
for s := range sym {
332-
symbols = append(symbols, s)
342+
for k, v := range sym {
343+
symbols = append(symbols, symbolFrequencyPair{k, v})
333344
}
334-
sort.Strings(symbols)
345+
sort.Sort(sort.Reverse(symbols))
335346

336347
const headerSize = 4
337348

@@ -343,8 +354,13 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
343354
w.symbols = make(map[string]uint32, len(symbols))
344355

345356
for index, s := range symbols {
357+
<<<<<<< Updated upstream
346358
w.symbols[s] = uint32(index)
347359
w.buf2.putUvarintStr(s)
360+
=======
361+
w.symbols[s.symbol] = uint32(index)
362+
w.buf2.putUvarintStr(s.symbol)
363+
>>>>>>> Stashed changes
348364
}
349365

350366
w.buf1.putBE32int(w.buf2.len())
@@ -817,11 +833,11 @@ func (r *Reader) lookupSymbol(o uint32) (string, error) {
817833
}
818834

819835
// Symbols returns a set of symbols that exist within the index.
820-
func (r *Reader) Symbols() (map[string]struct{}, error) {
821-
res := make(map[string]struct{}, len(r.symbols))
836+
func (r *Reader) Symbols() (map[string]int, error) {
837+
res := make(map[string]int, len(r.symbols))
822838

823839
for _, s := range r.symbols {
824-
res[s] = struct{}{}
840+
res[s] = 0
825841
}
826842
return res, nil
827843
}

index/index_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -191,13 +191,13 @@ func TestIndexRW_Postings(t *testing.T) {
191191
labels.FromStrings("a", "1", "b", "4"),
192192
}
193193

194-
err = iw.AddSymbols(map[string]struct{}{
195-
"a": struct{}{},
196-
"b": struct{}{},
197-
"1": struct{}{},
198-
"2": struct{}{},
199-
"3": struct{}{},
200-
"4": struct{}{},
194+
err = iw.AddSymbols(map[string]int{
195+
"a": 1,
196+
"b": 2,
197+
"1": 1,
198+
"2": 4,
199+
"3": 5,
200+
"4": 3,
201201
})
202202
testutil.Ok(t, err)
203203

@@ -245,11 +245,11 @@ func TestPersistence_index_e2e(t *testing.T) {
245245
// Sort labels as the index writer expects series in sorted order.
246246
sort.Sort(labels.Slice(lbls))
247247

248-
symbols := map[string]struct{}{}
248+
symbols := make(map[string]int)
249249
for _, lset := range lbls {
250250
for _, l := range lset {
251-
symbols[l.Name] = struct{}{}
252-
symbols[l.Value] = struct{}{}
251+
symbols[l.Name] = 0
252+
symbols[l.Value] = 0
253253
}
254254
}
255255

querier_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,20 +1367,20 @@ type mockIndex struct {
13671367
series map[uint64]series
13681368
labelIndex map[string][]string
13691369
postings map[labels.Label][]uint64
1370-
symbols map[string]struct{}
1370+
symbols map[string]int
13711371
}
13721372

13731373
func newMockIndex() mockIndex {
13741374
ix := mockIndex{
13751375
series: make(map[uint64]series),
13761376
labelIndex: make(map[string][]string),
13771377
postings: make(map[labels.Label][]uint64),
1378-
symbols: make(map[string]struct{}),
1378+
symbols: make(map[string]int),
13791379
}
13801380
return ix
13811381
}
13821382

1383-
func (m mockIndex) Symbols() (map[string]struct{}, error) {
1383+
func (m mockIndex) Symbols() (map[string]int, error) {
13841384
return m.symbols, nil
13851385
}
13861386

@@ -1389,8 +1389,8 @@ func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta)
13891389
return errors.Errorf("series with reference %d already added", ref)
13901390
}
13911391
for _, lbl := range l {
1392-
m.symbols[lbl.Name] = struct{}{}
1393-
m.symbols[lbl.Value] = struct{}{}
1392+
m.symbols[lbl.Name] = 0
1393+
m.symbols[lbl.Value] = 0
13941394
}
13951395

13961396
s := series{l: l}

0 commit comments

Comments
 (0)