Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 50 additions & 50 deletions pkg/planner/cardinality/testdata/cardinality_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,69 +26,69 @@
"End": 900,
"Count": 763,
"MinEst": 759,
"MaxEst": 2258
"MaxEst": 763
},
{
"Start": 900,
"End": 950,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 67,
"MinEst": 13,
"MaxEst": 247
},
{
"Start": 950,
"End": 1000,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 62,
"MinEst": 13,
"MaxEst": 226
},
{
"Start": 1000,
"End": 1050,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 57,
"MinEst": 13,
"MaxEst": 205
},
{
"Start": 1050,
"End": 1100,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 52,
"MinEst": 13,
"MaxEst": 184
},
{
"Start": 1150,
"End": 1200,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 41,
"MinEst": 13,
"MaxEst": 143
},
{
"Start": 1200,
"End": 1300,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 59,
"MinEst": 13,
"MaxEst": 215
},
{
"Start": 1300,
"End": 1400,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 38,
"MinEst": 13,
"MaxEst": 131
},
{
"Start": 1400,
"End": 1500,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 18,
"MinEst": 13,
"MaxEst": 48
},
{
"Start": 1500,
"End": 1600,
"Count": 13,
"MinEst": 9,
"MinEst": 13,
"MaxEst": 1508
},
{
Expand All @@ -101,16 +101,16 @@
{
"Start": 800,
"End": 1000,
"Count": 763,
"MinEst": 759,
"MaxEst": 2258
"Count": 873,
"MinEst": 763,
"MaxEst": 1221
},
{
"Start": 900,
"End": 1500,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 381,
"MinEst": 13,
"MaxEst": 1502
},
{
"Start": 300,
Expand All @@ -122,23 +122,23 @@
{
"Start": 200,
"End": 300,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 122,
"MinEst": 13,
"MaxEst": 467
},
{
"Start": 100,
"End": 200,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 101,
"MinEst": 13,
"MaxEst": 383
},
{
"Start": 200,
"End": 400,
"Count": 763,
"MinEst": 759,
"MaxEst": 2258
"Count": 872,
"MinEst": 763,
"MaxEst": 1217
},
{
"Start": 200,
Expand All @@ -150,23 +150,23 @@
{
"Start": 0,
"End": 100,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 80,
"MinEst": 13,
"MaxEst": 299
},
{
"Start": -100,
"End": 100,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 132,
"MinEst": 13,
"MaxEst": 507
},
{
"Start": -100,
"End": 0,
"Count": 13,
"MinEst": 9,
"MaxEst": 1508
"Count": 60,
"MinEst": 13,
"MaxEst": 216
}
]
},
Expand Down
75 changes: 44 additions & 31 deletions pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1107,12 +1107,20 @@ func (hg *Histogram) OutOfRangeRowCount(
return DefaultRowEst(0)
}

// oneValue assumes "one value qualifes", and is used as a lower bound.
oneValue := float64(0)
if histNDV > 0 {
oneValue = max(1, hg.NotNullCount()/max(float64(histNDV), outOfRangeBetweenRate)) // avoid inaccurate selectivity caused by small NDV
}

// Step 1: Calculate "one value"
// oneValue assumes "one value qualifies", and is used as a lower bound.
// outOfRangeBetweenRate (default == 100) avoids an artificially low NDV.
// TODO: If we have a large number of added rows, the NDV may be underestimated.
histNDV = max(histNDV, 1)
oneValue := hg.NotNullCount() / float64(histNDV)
if float64(histNDV) < outOfRangeBetweenRate {
// If NDV is low, it may no longer be representative of the data since ANALYZE
// was last run. Use a default value against realtimeRowCount.
// If NDV is not representitative, then hg.NotNullCount may not be either.
oneValue = max(min(oneValue, float64(realtimeRowCount)/outOfRangeBetweenRate), 1.0)
}

// Step 2: If modifications are not allowed, return the one value.
// In OptObjectiveDeterminate mode, we can't rely on real time statistics, so default to assuming
// one value qualifies.
allowUseModifyCount := sctx.GetSessionVars().GetOptObjective() != vardef.OptObjectiveDeterminate
Expand Down Expand Up @@ -1177,14 +1185,24 @@ func (hg *Histogram) OutOfRangeRowCount(
// but deleted from the other, resulting in qualifying out of range rows even though
// realtimeRowCount is less than histogram count
addedRows := hg.AbsRowCountDifference(realtimeRowCount)
// percentInHist is the percentage of rows that were included in the histogram.
// This is used to scale back the out-of-range estimate.
percentInHist := hg.NotNullCount() / hg.TotalRowCount()
addedOutOfRangePct := min(1.0-percentInHist, 0.5)
totalPercent := min(leftPercent*0.5+rightPercent*0.5, 1.0)
// Assume on average, half of newly added rows are within the histogram range, and the other
// half are distributed out of range according to the diagram in the function description.
avgRowCount := (addedRows * addedOutOfRangePct) * totalPercent

// maxTotalPercent is the maximum out of range percentage that is used for MaxEst.
maxTotalPercent := min(leftPercent+rightPercent, 1.0)

estRows := oneValue
skewRatio := sctx.GetSessionVars().RiskRangeSkewRatio
sctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptRiskRangeSkewRatio)
if totalPercent > 0 {
// Multiplying addedRows by 0.5 provides the assumption that 50% "addedRows" are inside
// the histogram range, and 50% (0.5) are out-of-range. Users can adjust this
// magic number by setting the session variable `tidb_opt_risk_range_skew_ratio`.
addedRowMultiplier := 0.5
if skewRatio > 0 {
addedRowMultiplier = skewRatio
}
estRows = (addedRows * addedRowMultiplier) * totalPercent
}

// We may have missed the true lowest/highest values due to sampling OR there could be a delay in
// updates to modifyCount (meaning modifyCount is incorrectly set to 0). So ensure we always
Expand All @@ -1198,28 +1216,23 @@ func (hg *Histogram) OutOfRangeRowCount(
// modifyCount (since outOfRangeBetweenRate has a default value of 100).
addedRows = max(addedRows, float64(realtimeRowCount)/outOfRangeBetweenRate)
}
maxAddedRows := addedRows
if maxTotalPercent > 0 {
// Always apply maxTotalPercent to maxAddedRows (matching old behavior where addedRows was always scaled)
maxAddedRows *= maxTotalPercent
}

skewRatio := sctx.GetSessionVars().RiskRangeSkewRatio
sctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptRiskRangeSkewRatio)
minEst := min(estRows, oneValue)
if skewRatio > 0 {
// Add "ratio" of the maximum row count that could be out of range, i.e. all newly added rows
result := CalculateSkewRatioCounts(avgRowCount, addedRows, skewRatio)
result.Est = max(result.Est, oneValue)
result.MinEst = 1
result.MaxEst = max(result.Est, addedRows)
return result
result = CalculateSkewRatioCounts(minEst, maxAddedRows, skewRatio)
} else {
result.MinEst = minEst
result.Est = estRows
}
result.Est = max(result.Est, oneValue)
result.MaxEst = max(result.Est, maxAddedRows)

// Use oneValue as lower bound and provide meaningful min/max estimates
finalEst := max(avgRowCount, oneValue)
// Maximum could be as high as all added rows.
maxEst := max(finalEst, addedRows)

return RowEstimate{
Est: finalEst,
MinEst: 1, // Assume a minimum of 1 row qualifies
MaxEst: maxEst,
}
return result
}

// Copy deep copies the histogram.
Expand Down
14 changes: 7 additions & 7 deletions tests/integrationtest/r/explain_union_scan.result
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ Limit root offset:0, count:10
└─HashJoin root left outer join, left side:Limit, equal:[eq(explain_union_scan.city.province_id, explain_union_scan.city.province_id)]
├─Limit(Build) root offset:0, count:10
│ └─IndexJoin root inner join, inner:UnionScan, outer key:explain_union_scan.city.id, inner key:explain_union_scan.city.id, equal cond:eq(explain_union_scan.city.id, explain_union_scan.city.id)
│ ├─UnionScan(Build) root gt(explain_union_scan.city.province_id, 1), lt(explain_union_scan.city.province_id, 100)
│ │ └─TableReader root data:Selection
│ │ └─Selection cop[tikv] gt(explain_union_scan.city.province_id, 1), lt(explain_union_scan.city.province_id, 100)
│ │ └─TableFullScan cop[tikv] table:t1 keep order:false
│ └─UnionScan(Probe) root
│ ├─UnionScan(Build) root
│ │ └─TableReader root data:TableFullScan
│ │ └─TableFullScan cop[tikv] table:t2 keep order:false
│ └─UnionScan(Probe) root gt(explain_union_scan.city.province_id, 1), lt(explain_union_scan.city.province_id, 100)
│ └─IndexLookUp root
│ ├─IndexRangeScan(Build) cop[tikv] table:t2, index:PRIMARY(id) range: decided by [eq(explain_union_scan.city.id, explain_union_scan.city.id)], keep order:false
│ └─TableRowIDScan(Probe) cop[tikv] table:t2 keep order:false
│ ├─IndexRangeScan(Build) cop[tikv] table:t1, index:PRIMARY(id) range: decided by [eq(explain_union_scan.city.id, explain_union_scan.city.id)], keep order:false
│ └─Selection(Probe) cop[tikv] gt(explain_union_scan.city.province_id, 1), lt(explain_union_scan.city.province_id, 100)
│ └─TableRowIDScan cop[tikv] table:t1 keep order:false
└─UnionScan(Probe) root gt(explain_union_scan.city.province_id, 1), lt(explain_union_scan.city.province_id, 100), not(isnull(explain_union_scan.city.province_id))
└─TableReader root data:Selection
└─Selection cop[tikv] gt(explain_union_scan.city.province_id, 1), lt(explain_union_scan.city.province_id, 100), not(isnull(explain_union_scan.city.province_id))
Expand Down
6 changes: 3 additions & 3 deletions tests/integrationtest/r/imdbload.result
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,9 @@ TableReader 34260.33 root data:Selection
└─TableFullScan 528337.00 cop[tikv] table:aka_title keep order:false
explain format = 'brief' select * from aka_title where kind_id > 7;
id estRows task access object operator info
IndexLookUp 1027.81 root
├─IndexRangeScan(Build) 1027.81 cop[tikv] table:aka_title, index:aka_title_idx_kindid(kind_id) range:(7,+inf], keep order:false
└─TableRowIDScan(Probe) 1027.81 cop[tikv] table:aka_title keep order:false
TableReader 106389.00 root data:Selection
└─Selection 106389.00 cop[tikv] gt(imdbload.aka_title.kind_id, 7)
└─TableFullScan 528337.00 cop[tikv] table:aka_title keep order:false
explain format = 'brief' select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
id estRows task access object operator info
IndexLookUp 901.00 root
Expand Down
Loading