Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 09b5f47

Browse files
committed
sort symbols in order of frequency rather than lexicographically
Signed-off-by: Callum Styan <[email protected]>
1 parent 296f943 commit 09b5f47

File tree

7 files changed

+58
-47
lines changed

7 files changed

+58
-47
lines changed

block.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434
type IndexWriter interface {
3535
// AddSymbols registers all string symbols that are encountered in series
3636
// and other indices.
37-
AddSymbols(sym map[string]struct{}) error
37+
AddSymbols(sym map[string]int) error
3838

3939
// AddSeries populates the index writer with a series and its offsets
4040
// of chunks that the index can reference.
@@ -61,7 +61,7 @@ type IndexWriter interface {
6161
type IndexReader interface {
6262
// Symbols returns a set of string symbols that may occur in series' labels
6363
// and indices.
64-
Symbols() (map[string]struct{}, error)
64+
Symbols() (map[string]int, error)
6565

6666
// LabelValues returns the possible label values.
6767
LabelValues(names ...string) (index.StringTuples, error)
@@ -368,7 +368,7 @@ type blockIndexReader struct {
368368
b *Block
369369
}
370370

371-
func (r blockIndexReader) Symbols() (map[string]struct{}, error) {
371+
func (r blockIndexReader) Symbols() (map[string]int, error) {
372372
s, err := r.ir.Symbols()
373373
return s, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
374374
}

compact.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
548548

549549
var (
550550
set ChunkSeriesSet
551-
allSymbols = make(map[string]struct{}, 1<<16)
551+
allSymbols = make(map[string]int, 1<<16)
552552
closers = []io.Closer{}
553553
)
554554
defer func() { closeAll(closers...) }()
@@ -577,7 +577,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
577577
return errors.Wrap(err, "read symbols")
578578
}
579579
for s := range symbols {
580-
allSymbols[s] = struct{}{}
580+
allSymbols[s] = symbols[s]
581581
}
582582

583583
all, err := indexr.Postings(index.AllPostingsKey())

head.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ type Head struct {
6767
series *stripeSeries
6868

6969
symMtx sync.RWMutex
70-
symbols map[string]struct{}
70+
symbols map[string]int
7171
values map[string]stringset // label names to possible values
7272

7373
postings *index.MemPostings // postings lists for terms
@@ -229,7 +229,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int
229229
maxTime: math.MinInt64,
230230
series: newStripeSeries(),
231231
values: map[string]stringset{},
232-
symbols: map[string]struct{}{},
232+
symbols: make(map[string]int),
233233
postings: index.NewUnorderedMemPostings(),
234234
tombstones: newMemTombstones(),
235235
}
@@ -897,12 +897,12 @@ func (h *Head) gc() {
897897
h.postings.Delete(deleted)
898898

899899
// Rebuild symbols and label value indices from what is left in the postings terms.
900-
symbols := make(map[string]struct{})
900+
symbols := make(map[string]int)
901901
values := make(map[string]stringset, len(h.values))
902902

903903
if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
904-
symbols[t.Name] = struct{}{}
905-
symbols[t.Value] = struct{}{}
904+
symbols[t.Name]++
905+
symbols[t.Value]++
906906

907907
ss, ok := values[t.Name]
908908
if !ok {
@@ -1046,14 +1046,14 @@ func (h *headIndexReader) Close() error {
10461046
return nil
10471047
}
10481048

1049-
func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
1049+
func (h *headIndexReader) Symbols() (map[string]int, error) {
10501050
h.head.symMtx.RLock()
10511051
defer h.head.symMtx.RUnlock()
10521052

1053-
res := make(map[string]struct{}, len(h.head.symbols))
1053+
res := make(map[string]int, len(h.head.symbols))
10541054

1055-
for s := range h.head.symbols {
1056-
res[s] = struct{}{}
1055+
for s, num := range h.head.symbols {
1056+
res[s] = num
10571057
}
10581058
return res, nil
10591059
}
@@ -1202,8 +1202,8 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
12021202
}
12031203
valset.set(l.Value)
12041204

1205-
h.symbols[l.Name] = struct{}{}
1206-
h.symbols[l.Value] = struct{}{}
1205+
h.symbols[l.Name]++
1206+
h.symbols[l.Value]++
12071207
}
12081208

12091209
return s, true

head_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,12 @@ func TestHead_Truncate(t *testing.T) {
211211
testutil.Assert(t, postingsB2 == nil, "")
212212
testutil.Assert(t, postingsC1 == nil, "")
213213

214-
testutil.Equals(t, map[string]struct{}{
215-
"": {}, // from 'all' postings list
216-
"a": {},
217-
"b": {},
218-
"1": {},
219-
"2": {},
214+
testutil.Equals(t, map[string]int{
215+
"": 2, // from 'all' postings list
216+
"a": 2,
217+
"b": 1,
218+
"1": 2,
219+
"2": 1,
220220
}, h.symbols)
221221

222222
testutil.Equals(t, map[string]stringset{

index/index.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,17 @@ func (s indexWriterSeriesSlice) Less(i, j int) bool {
5757
return labels.Compare(s[i].labels, s[j].labels) < 0
5858
}
5959

60+
type symbolFrequencyPair struct {
61+
symbol string
62+
frequency int
63+
}
64+
65+
type symbolFrequencylist []symbolFrequencyPair
66+
67+
func (s symbolFrequencylist) Len() int { return len(s) }
68+
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
69+
func (s symbolFrequencylist) Less(i, j int) bool { return s[i].frequency < s[j].frequency }
70+
6071
type indexWriterStage uint8
6172

6273
const (
@@ -334,17 +345,17 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
334345
return nil
335346
}
336347

337-
func (w *Writer) AddSymbols(sym map[string]struct{}) error {
348+
func (w *Writer) AddSymbols(sym map[string]int) error {
338349
if err := w.ensureStage(idxStageSymbols); err != nil {
339350
return err
340351
}
341352
// Generate sorted list of strings we will store as reference table.
342-
symbols := make([]string, 0, len(sym))
353+
symbols := make(symbolFrequencylist, 0, len(sym))
343354

344-
for s := range sym {
345-
symbols = append(symbols, s)
355+
for k, v := range sym {
356+
symbols = append(symbols, symbolFrequencyPair{k, v})
346357
}
347-
sort.Strings(symbols)
358+
sort.Sort(sort.Reverse(symbols))
348359

349360
const headerSize = 4
350361

@@ -356,8 +367,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
356367
w.symbols = make(map[string]uint32, len(symbols))
357368

358369
for index, s := range symbols {
359-
w.symbols[s] = uint32(index)
360-
w.buf2.putUvarintStr(s)
370+
w.symbols[s.symbol] = uint32(index)
371+
w.buf2.putUvarintStr(s.symbol)
361372
}
362373

363374
w.buf1.putBE32int(w.buf2.len())
@@ -856,11 +867,11 @@ func (r *Reader) lookupSymbol(o uint32) (string, error) {
856867
}
857868

858869
// Symbols returns a set of symbols that exist within the index.
859-
func (r *Reader) Symbols() (map[string]struct{}, error) {
860-
res := make(map[string]struct{}, len(r.symbols))
870+
func (r *Reader) Symbols() (map[string]int, error) {
871+
res := make(map[string]int, len(r.symbols))
861872

862873
for _, s := range r.symbols {
863-
res[s] = struct{}{}
874+
res[s] = 0
864875
}
865876
for _, s := range r.symbolSlice {
866877
res[s] = struct{}{}

index/index_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,13 @@ func TestIndexRW_Postings(t *testing.T) {
189189
labels.FromStrings("a", "1", "b", "4"),
190190
}
191191

192-
err = iw.AddSymbols(map[string]struct{}{
193-
"a": {},
194-
"b": {},
195-
"1": {},
196-
"2": {},
197-
"3": {},
198-
"4": {},
192+
err = iw.AddSymbols(map[string]int{
193+
"a": 1,
194+
"b": 2,
195+
"1": 1,
196+
"2": 4,
197+
"3": 5,
198+
"4": 3,
199199
})
200200
testutil.Ok(t, err)
201201

@@ -243,11 +243,11 @@ func TestPersistence_index_e2e(t *testing.T) {
243243
// Sort labels as the index writer expects series in sorted order.
244244
sort.Sort(labels.Slice(lbls))
245245

246-
symbols := map[string]struct{}{}
246+
symbols := make(map[string]int)
247247
for _, lset := range lbls {
248248
for _, l := range lset {
249-
symbols[l.Name] = struct{}{}
250-
symbols[l.Value] = struct{}{}
249+
symbols[l.Name] = 0
250+
symbols[l.Value] = 0
251251
}
252252
}
253253

querier_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,20 +1345,20 @@ type mockIndex struct {
13451345
series map[uint64]series
13461346
labelIndex map[string][]string
13471347
postings map[labels.Label][]uint64
1348-
symbols map[string]struct{}
1348+
symbols map[string]int
13491349
}
13501350

13511351
func newMockIndex() mockIndex {
13521352
ix := mockIndex{
13531353
series: make(map[uint64]series),
13541354
labelIndex: make(map[string][]string),
13551355
postings: make(map[labels.Label][]uint64),
1356-
symbols: make(map[string]struct{}),
1356+
symbols: make(map[string]int),
13571357
}
13581358
return ix
13591359
}
13601360

1361-
func (m mockIndex) Symbols() (map[string]struct{}, error) {
1361+
func (m mockIndex) Symbols() (map[string]int, error) {
13621362
return m.symbols, nil
13631363
}
13641364

@@ -1367,8 +1367,8 @@ func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta)
13671367
return errors.Errorf("series with reference %d already added", ref)
13681368
}
13691369
for _, lbl := range l {
1370-
m.symbols[lbl.Name] = struct{}{}
1371-
m.symbols[lbl.Value] = struct{}{}
1370+
m.symbols[lbl.Name] = 0
1371+
m.symbols[lbl.Value] = 0
13721372
}
13731373

13741374
s := series{l: l}

0 commit comments

Comments
 (0)