Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 95666e0

Browse files
committed
sort symbols in order of frequency rather than lexicographically
1 parent 195bc0d commit 95666e0

File tree

7 files changed

+58
-47
lines changed

7 files changed

+58
-47
lines changed

block.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434
type IndexWriter interface {
3535
// AddSymbols registers all string symbols that are encountered in series
3636
// and other indices.
37-
AddSymbols(sym map[string]struct{}) error
37+
AddSymbols(sym map[string]int) error
3838

3939
// AddSeries populates the index writer with a series and its offsets
4040
// of chunks that the index can reference.
@@ -61,7 +61,7 @@ type IndexWriter interface {
6161
type IndexReader interface {
6262
// Symbols returns a set of string symbols that may occur in series' labels
6363
// and indices.
64-
Symbols() (map[string]struct{}, error)
64+
Symbols() (map[string]int, error)
6565

6666
// LabelValues returns the possible label values.
6767
LabelValues(names ...string) (index.StringTuples, error)
@@ -350,7 +350,7 @@ type blockIndexReader struct {
350350
b *Block
351351
}
352352

353-
func (r blockIndexReader) Symbols() (map[string]struct{}, error) {
353+
func (r blockIndexReader) Symbols() (map[string]int, error) {
354354
s, err := r.ir.Symbols()
355355
return s, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
356356
}

compact.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
509509
func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter) error {
510510
var (
511511
set ChunkSeriesSet
512-
allSymbols = make(map[string]struct{}, 1<<16)
512+
allSymbols = make(map[string]int, 1<<16)
513513
closers = []io.Closer{}
514514
)
515515
defer func() { closeAll(closers...) }()
@@ -538,7 +538,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
538538
return errors.Wrap(err, "read symbols")
539539
}
540540
for s := range symbols {
541-
allSymbols[s] = struct{}{}
541+
allSymbols[s] = symbols[s]
542542
}
543543

544544
all, err := indexr.Postings(index.AllPostingsKey())

head.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ type Head struct {
6464
series *stripeSeries
6565

6666
symMtx sync.RWMutex
67-
symbols map[string]struct{}
67+
symbols map[string]int
6868
values map[string]stringset // label names to possible values
6969

7070
postings *index.MemPostings // postings lists for terms
@@ -187,7 +187,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (
187187
maxTime: math.MinInt64,
188188
series: newStripeSeries(),
189189
values: map[string]stringset{},
190-
symbols: map[string]struct{}{},
190+
symbols: make(map[string]int),
191191
postings: index.NewUnorderedMemPostings(),
192192
tombstones: memTombstones{},
193193
}
@@ -623,12 +623,12 @@ func (h *Head) gc() {
623623
h.postings.Delete(deleted)
624624

625625
// Rebuild symbols and label value indices from what is left in the postings terms.
626-
symbols := make(map[string]struct{})
626+
symbols := make(map[string]int)
627627
values := make(map[string]stringset, len(h.values))
628628

629629
h.postings.Iter(func(t labels.Label, _ index.Postings) error {
630-
symbols[t.Name] = struct{}{}
631-
symbols[t.Value] = struct{}{}
630+
symbols[t.Name]++
631+
symbols[t.Value]++
632632

633633
ss, ok := values[t.Name]
634634
if !ok {
@@ -771,14 +771,14 @@ func (h *headIndexReader) Close() error {
771771
return nil
772772
}
773773

774-
func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
774+
func (h *headIndexReader) Symbols() (map[string]int, error) {
775775
h.head.symMtx.RLock()
776776
defer h.head.symMtx.RUnlock()
777777

778-
res := make(map[string]struct{}, len(h.head.symbols))
778+
res := make(map[string]int, len(h.head.symbols))
779779

780-
for s := range h.head.symbols {
781-
res[s] = struct{}{}
780+
for s, num := range h.head.symbols {
781+
res[s] = num
782782
}
783783
return res, nil
784784
}
@@ -910,8 +910,8 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
910910
}
911911
valset.set(l.Value)
912912

913-
h.symbols[l.Name] = struct{}{}
914-
h.symbols[l.Value] = struct{}{}
913+
h.symbols[l.Name]++
914+
h.symbols[l.Value]++
915915
}
916916

917917
return s, true

head_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,12 @@ func TestHead_Truncate(t *testing.T) {
178178
testutil.Assert(t, postingsB2 == nil, "")
179179
testutil.Assert(t, postingsC1 == nil, "")
180180

181-
testutil.Equals(t, map[string]struct{}{
182-
"": struct{}{}, // from 'all' postings list
183-
"a": struct{}{},
184-
"b": struct{}{},
185-
"1": struct{}{},
186-
"2": struct{}{},
181+
testutil.Equals(t, map[string]int{
182+
"": 2, // from 'all' postings list
183+
"a": 2,
184+
"b": 1,
185+
"1": 2,
186+
"2": 1,
187187
}, h.symbols)
188188

189189
testutil.Equals(t, map[string]stringset{

index/index.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ func (s indexWriterSeriesSlice) Less(i, j int) bool {
5555
return labels.Compare(s[i].labels, s[j].labels) < 0
5656
}
5757

58+
type symbolFrequencyPair struct {
59+
symbol string
60+
frequency int
61+
}
62+
63+
type symbolFrequencylist []symbolFrequencyPair
64+
65+
func (s symbolFrequencylist) Len() int { return len(s) }
66+
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
67+
func (s symbolFrequencylist) Less(i, j int) bool { return s[i].frequency < s[j].frequency }
68+
5869
type indexWriterStage uint8
5970

6071
const (
@@ -330,17 +341,17 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
330341
return nil
331342
}
332343

333-
func (w *Writer) AddSymbols(sym map[string]struct{}) error {
344+
func (w *Writer) AddSymbols(sym map[string]int) error {
334345
if err := w.ensureStage(idxStageSymbols); err != nil {
335346
return err
336347
}
337348
// Generate sorted list of strings we will store as reference table.
338-
symbols := make([]string, 0, len(sym))
349+
symbols := make(symbolFrequencylist, 0, len(sym))
339350

340-
for s := range sym {
341-
symbols = append(symbols, s)
351+
for k, v := range sym {
352+
symbols = append(symbols, symbolFrequencyPair{k, v})
342353
}
343-
sort.Strings(symbols)
354+
sort.Sort(sort.Reverse(symbols))
344355

345356
const headerSize = 4
346357

@@ -352,8 +363,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
352363
w.symbols = make(map[string]uint32, len(symbols))
353364

354365
for index, s := range symbols {
355-
w.symbols[s] = uint32(index)
356-
w.buf2.putUvarintStr(s)
366+
w.symbols[s.symbol] = uint32(index)
367+
w.buf2.putUvarintStr(s.symbol)
357368
}
358369

359370
w.buf1.putBE32int(w.buf2.len())
@@ -832,11 +843,11 @@ func (r *Reader) lookupSymbol(o uint32) (string, error) {
832843
}
833844

834845
// Symbols returns a set of symbols that exist within the index.
835-
func (r *Reader) Symbols() (map[string]struct{}, error) {
836-
res := make(map[string]struct{}, len(r.symbols))
846+
func (r *Reader) Symbols() (map[string]int, error) {
847+
res := make(map[string]int, len(r.symbols))
837848

838849
for _, s := range r.symbols {
839-
res[s] = struct{}{}
850+
res[s] = 0
840851
}
841852
return res, nil
842853
}

index/index_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -191,13 +191,13 @@ func TestIndexRW_Postings(t *testing.T) {
191191
labels.FromStrings("a", "1", "b", "4"),
192192
}
193193

194-
err = iw.AddSymbols(map[string]struct{}{
195-
"a": struct{}{},
196-
"b": struct{}{},
197-
"1": struct{}{},
198-
"2": struct{}{},
199-
"3": struct{}{},
200-
"4": struct{}{},
194+
err = iw.AddSymbols(map[string]int{
195+
"a": 1,
196+
"b": 2,
197+
"1": 1,
198+
"2": 4,
199+
"3": 5,
200+
"4": 3,
201201
})
202202
testutil.Ok(t, err)
203203

@@ -245,11 +245,11 @@ func TestPersistence_index_e2e(t *testing.T) {
245245
// Sort labels as the index writer expects series in sorted order.
246246
sort.Sort(labels.Slice(lbls))
247247

248-
symbols := map[string]struct{}{}
248+
symbols := make(map[string]int)
249249
for _, lset := range lbls {
250250
for _, l := range lset {
251-
symbols[l.Name] = struct{}{}
252-
symbols[l.Value] = struct{}{}
251+
symbols[l.Name] = 0
252+
symbols[l.Value] = 0
253253
}
254254
}
255255

querier_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,20 +1367,20 @@ type mockIndex struct {
13671367
series map[uint64]series
13681368
labelIndex map[string][]string
13691369
postings map[labels.Label][]uint64
1370-
symbols map[string]struct{}
1370+
symbols map[string]int
13711371
}
13721372

13731373
func newMockIndex() mockIndex {
13741374
ix := mockIndex{
13751375
series: make(map[uint64]series),
13761376
labelIndex: make(map[string][]string),
13771377
postings: make(map[labels.Label][]uint64),
1378-
symbols: make(map[string]struct{}),
1378+
symbols: make(map[string]int),
13791379
}
13801380
return ix
13811381
}
13821382

1383-
func (m mockIndex) Symbols() (map[string]struct{}, error) {
1383+
func (m mockIndex) Symbols() (map[string]int, error) {
13841384
return m.symbols, nil
13851385
}
13861386

@@ -1389,8 +1389,8 @@ func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta)
13891389
return errors.Errorf("series with reference %d already added", ref)
13901390
}
13911391
for _, lbl := range l {
1392-
m.symbols[lbl.Name] = struct{}{}
1393-
m.symbols[lbl.Value] = struct{}{}
1392+
m.symbols[lbl.Name] = 0
1393+
m.symbols[lbl.Value] = 0
13941394
}
13951395

13961396
s := series{l: l}

0 commit comments

Comments
 (0)