plan,execution: make coalesce a logical node

MichaHoffmann · MichaHoffmann · commit c0f3313e8c64 · 2023-12-28T20:37:24.000+01:00
Signed-off-by: Michael Hoffmann &lt;mhoffm@posteo.de&gt;
diff --git a/execution/execution.go b/execution/execution.go
@@ -17,7 +17,6 @@
 package execution
 
 import (
-	"runtime"
 	"sort"
 
 	"github.com/efficientgo/core/errors"
@@ -72,6 +71,8 @@ func newOperator(expr parser.Expr, storage *engstore.SelectorPool, opts *query.O
 		return newUnaryExpression(e, storage, opts, hints)
 	case *parser.StepInvariantExpr:
 		return newStepInvariantExpression(e, storage, opts, hints)
+	case logicalplan.Coalesce:
+		return newCoalesce(e, storage, opts, hints)
 	case logicalplan.Deduplicate:
 		return newDeduplication(e, storage, opts, hints)
 	case logicalplan.RemoteExecution:
@@ -94,21 +95,10 @@ func newVectorSelector(e *logicalplan.VectorSelector, storage *engstore.Selector
 	batchsize := e.BatchSize
 	selector := storage.GetFilteredSelector(start, end, opts.Step.Milliseconds(), e.LabelMatchers, e.Filters, hints)
 
-	numShards := runtime.GOMAXPROCS(0) / 2
-	if numShards < 1 {
-		numShards = 1
-	}
-
-	operators := make([]model.VectorOperator, 0, numShards)
-	for i := 0; i < numShards; i++ {
-		operator := exchange.NewConcurrent(
-			scan.NewVectorSelector(
-				model.NewVectorPool(opts.StepsBatch), selector, opts, offset, hints, batchsize, i, numShards),
-			2)
-		operators = append(operators, operator)
-	}
+	shard := e.Shard
+	numShards := e.NumShards
 
-	return exchange.NewCoalesce(model.NewVectorPool(opts.StepsBatch), opts, batchsize*int64(numShards), operators...), nil
+	return scan.NewVectorSelector(model.NewVectorPool(opts.StepsBatch), selector, opts, offset, hints, batchsize, shard, numShards), nil
 }
 
 func newCall(e *parser.Call, storage *engstore.SelectorPool, opts *query.Options, hints storage.SelectHints) (model.VectorOperator, error) {
@@ -231,10 +221,6 @@ func newRangeVectorFunction(e *parser.Call, t *logicalplan.MatrixSelector, stora
 	hints.Range = milliSecondRange
 	filter := storage.GetFilteredSelector(start, end, opts.Step.Milliseconds(), vs.LabelMatchers, filters, hints)
 
-	numShards := runtime.GOMAXPROCS(0) / 2
-	if numShards < 1 {
-		numShards = 1
-	}
 	var arg float64
 	if e.Func.Name == "quantile_over_time" {
 		constVal, err := unwrapConstVal(e.Args[0])
@@ -243,28 +229,21 @@ func newRangeVectorFunction(e *parser.Call, t *logicalplan.MatrixSelector, stora
 		}
 		arg = constVal
 	}
-
-	operators := make([]model.VectorOperator, 0, numShards)
-	for i := 0; i < numShards; i++ {
-		operator, err := scan.NewMatrixSelector(
-			model.NewVectorPool(opts.StepsBatch),
-			filter,
-			e.Func.Name,
-			arg,
-			opts,
-			t.Range,
-			vs.Offset,
-			batchSize,
-			i,
-			numShards,
-		)
-		if err != nil {
-			return nil, err
-		}
-		operators = append(operators, exchange.NewConcurrent(operator, 2))
-	}
-
-	return exchange.NewCoalesce(model.NewVectorPool(opts.StepsBatch), opts, batchSize*int64(numShards), operators...), nil
+	shard := t.Shard
+	numShards := t.NumShards
+
+	return scan.NewMatrixSelector(
+		model.NewVectorPool(opts.StepsBatch),
+		filter,
+		e.Func.Name,
+		arg,
+		opts,
+		t.Range,
+		vs.Offset,
+		batchSize,
+		shard,
+		numShards,
+	)
 }
 
 func newSubqueryFunction(e *parser.Call, t *parser.SubqueryExpr, storage *engstore.SelectorPool, opts *query.Options, hints storage.SelectHints) (model.VectorOperator, error) {
@@ -407,6 +386,18 @@ func newStepInvariantExpression(e *parser.StepInvariantExpr, storage *engstore.S
 	return step_invariant.NewStepInvariantOperator(model.NewVectorPoolWithSize(opts.StepsBatch, 1), next, e.Expr, opts)
 }
 
+func newCoalesce(e logicalplan.Coalesce, storage *engstore.SelectorPool, opts *query.Options, hints storage.SelectHints) (model.VectorOperator, error) {
+	operators := make([]model.VectorOperator, len(e.Exprs))
+	for i, expr := range e.Exprs {
+		operator, err := newOperator(expr, storage, opts, hints)
+		if err != nil {
+			return nil, err
+		}
+		operators[i] = exchange.NewConcurrent(operator, 2)
+	}
+	return exchange.NewCoalesce(model.NewVectorPool(opts.StepsBatch), opts, 0, operators...), nil
+}
+
 func newDeduplication(e logicalplan.Deduplicate, storage *engstore.SelectorPool, opts *query.Options, hints storage.SelectHints) (model.VectorOperator, error) {
 	// The Deduplicate operator will deduplicate samples using a last-sample-wins strategy.
 	// Sorting engines by MaxT ensures that samples produced due to
@@ -424,6 +415,7 @@ func newDeduplication(e logicalplan.Deduplicate, storage *engstore.SelectorPool,
 		}
 		operators[i] = operator
 	}
+	// We dont need to use logical coalesce here since it was already pushed back above remote evaluation here
 	coalesce := exchange.NewCoalesce(model.NewVectorPool(opts.StepsBatch), opts, 0, operators...)
 	dedup := exchange.NewDedupOperator(model.NewVectorPool(opts.StepsBatch), coalesce)
 	return exchange.NewConcurrent(dedup, 2), nil
diff --git a/logicalplan/coalesce.go b/logicalplan/coalesce.go
@@ -0,0 +1,105 @@
+// Copyright (c) The Thanos Community Authors.
+// Licensed under the Apache License 2.0.
+
+package logicalplan
+
+import (
+	"github.com/prometheus/prometheus/promql/parser"
+	"github.com/prometheus/prometheus/promql/parser/posrange"
+	"github.com/prometheus/prometheus/util/annotations"
+
+	"github.com/thanos-io/promql-engine/query"
+)
+
+type Coalesce struct {
+	// We assume to always have at least one expression
+	Exprs []parser.Expr
+}
+
+func (c Coalesce) String() string {
+	return c.Exprs[0].String()
+}
+
+func (c Coalesce) Pretty(level int) string { return c.String() }
+
+func (c Coalesce) PositionRange() posrange.PositionRange { return c.Exprs[0].PositionRange() }
+
+func (c Coalesce) Type() parser.ValueType { return c.Exprs[0].Type() }
+
+func (c Coalesce) PromQLExpr() {}
+
+type CoalesceOptimizer struct{}
+
+func (c CoalesceOptimizer) Optimize(expr parser.Expr, opts *query.Options) (parser.Expr, annotations.Annotations) {
+	numShards := opts.NumShards()
+
+	TraverseBottomUp(nil, &expr, func(parent, e *parser.Expr) bool {
+		switch t := (*e).(type) {
+		case *VectorSelector:
+			if parent != nil {
+				// we coalesce matrix selectors in a different branch
+				if _, ok := (*parent).(MatrixSelector); ok {
+					return false
+				}
+				// timestamp/absent is a weird function and those workarounds are for it
+				if _, ok := (*parent).(*parser.StepInvariantExpr); ok {
+					return false
+				}
+				if c, ok := (*parent).(*parser.Call); ok {
+					if c.Func.Name == "absent" || c.Func.Name == "timestamp" {
+						return true
+					}
+				}
+			}
+			exprs := make([]parser.Expr, numShards)
+			for i := 0; i < numShards; i++ {
+				exprs[i] = &VectorSelector{
+					VectorSelector: t.VectorSelector,
+					Filters:        t.Filters,
+					BatchSize:      t.BatchSize,
+					Shard:          i,
+					NumShards:      numShards,
+				}
+			}
+			*e = Coalesce{Exprs: exprs}
+			return true
+		case *parser.Call:
+			var (
+				ms   *MatrixSelector
+				marg int
+			)
+			for i := range t.Args {
+				if arg, ok := t.Args[i].(*MatrixSelector); ok {
+					ms = arg
+					marg = i
+				}
+			}
+			if ms == nil {
+				return false
+			}
+
+			exprs := make([]parser.Expr, numShards)
+			for i := 0; i < numShards; i++ {
+				aux := &MatrixSelector{
+					MatrixSelector: ms.MatrixSelector,
+					OriginalString: ms.OriginalString,
+					Shard:          i,
+					NumShards:      numShards,
+				}
+				f := &parser.Call{
+					Func:     t.Func,
+					Args:     t.Args,
+					PosRange: t.PosRange,
+				}
+				f.Args[marg] = aux
+
+				exprs[i] = f
+			}
+			*e = Coalesce{Exprs: exprs}
+		default:
+			return true
+		}
+		return true
+	})
+	return expr, nil
+}
diff --git a/logicalplan/plan.go b/logicalplan/plan.go
@@ -28,6 +28,7 @@ var (
 var DefaultOptimizers = []Optimizer{
 	SortMatchers{},
 	MergeSelectsOptimizer{},
+	CoalesceOptimizer{},
 }
 
 type Plan interface {
@@ -183,9 +184,9 @@ func replaceSelectors(plan parser.Expr) parser.Expr {
 	traverse(&plan, func(current *parser.Expr) {
 		switch t := (*current).(type) {
 		case *parser.MatrixSelector:
-			*current = &MatrixSelector{MatrixSelector: t, OriginalString: t.String()}
+			*current = &MatrixSelector{MatrixSelector: t, OriginalString: t.String(), Shard: 0, NumShards: 1}
 		case *parser.VectorSelector:
-			*current = &VectorSelector{VectorSelector: t}
+			*current = &VectorSelector{VectorSelector: t, Shard: 0, NumShards: 1}
 		}
 	})
 	return plan
@@ -414,6 +415,9 @@ type VectorSelector struct {
 	*parser.VectorSelector
 	Filters   []*labels.Matcher
 	BatchSize int64
+
+	Shard     int
+	NumShards int
 }
 
 func (f VectorSelector) String() string {
@@ -435,6 +439,9 @@ type MatrixSelector struct {
 
 	// Needed because this operator is used in the distributed mode
 	OriginalString string
+
+	Shard     int
+	NumShards int
 }
 
 func (f MatrixSelector) String() string {
diff --git a/logicalplan/plan_test.go b/logicalplan/plan_test.go
@@ -32,8 +32,6 @@ var closedParenthesis = regexp.MustCompile(`\s+\)`)
 // by testMatrixSelector that has a overridden string method?
 func renderExprTree(expr parser.Expr) string {
 	switch t := expr.(type) {
-	case *parser.NumberLiteral:
-		return fmt.Sprint(t.Val)
 	case *VectorSelector:
 		var b strings.Builder
 		base := t.VectorSelector.String()
@@ -49,8 +47,6 @@ func renderExprTree(expr parser.Expr) string {
 			return b.String()
 		}
 		return base
-	case *MatrixSelector:
-		return t.String()
 	case *parser.BinaryExpr:
 		var b strings.Builder
 		b.WriteString(renderExprTree(t.LHS))
@@ -105,8 +101,6 @@ func renderExprTree(expr parser.Expr) string {
 		b.WriteString(renderExprTree(t.Expr))
 		b.WriteRune(')')
 		return b.String()
-	case *parser.StepInvariantExpr:
-		return renderExprTree(t.Expr)
 	default:
 		return t.String()
 	}
diff --git a/query/options.go b/query/options.go
@@ -5,6 +5,7 @@ package query
 
 import (
 	"context"
+	"runtime"
 	"time"
 
 	"github.com/prometheus/prometheus/promql/parser"
@@ -36,6 +37,14 @@ func (o *Options) NumSteps() int {
 	return int(totalSteps)
 }
 
+func (o *Options) NumShards() int {
+	numShards := runtime.GOMAXPROCS(0) / 2
+	if numShards < 1 {
+		numShards = 1
+	}
+	return numShards
+}
+
 func (o *Options) IsInstantQuery() bool {
 	return o.NumSteps() == 1
 }