Skip to content

Commit

Permalink
Merge pull request #750 from cheney-lin/dev/round
Browse files Browse the repository at this point in the history
fix(sysadvisor): fix provision logic of dedicated_cores region
  • Loading branch information
xu282934741 authored Dec 31, 2024
2 parents ce5047c + e7355da commit bbf3ec5
Show file tree
Hide file tree
Showing 16 changed files with 145 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,8 @@ func TestAdvisorUpdate(t *testing.T) {
wantInternalCalculationResult: types.InternalCPUCalculationResult{
PoolEntries: map[string]map[int]int{
commonstate.PoolNameReserve: {-1: 2},
commonstate.PoolNameShare: {-1: 84},
commonstate.PoolNameReclaim: {-1: 8},
commonstate.PoolNameShare: {-1: 82},
commonstate.PoolNameReclaim: {-1: 10},
"isolation-pod1": {-1: 2},
},
},
Expand Down Expand Up @@ -959,8 +959,8 @@ func TestAdvisorUpdate(t *testing.T) {
wantInternalCalculationResult: types.InternalCPUCalculationResult{
PoolEntries: map[string]map[int]int{
commonstate.PoolNameReserve: {-1: 2},
commonstate.PoolNameShare: {-1: 90},
commonstate.PoolNameReclaim: {-1: 4},
commonstate.PoolNameShare: {-1: 88},
commonstate.PoolNameReclaim: {-1: 6},
},
},
wantHeadroom: resource.Quantity{},
Expand Down Expand Up @@ -1074,8 +1074,8 @@ func TestAdvisorUpdate(t *testing.T) {
wantInternalCalculationResult: types.InternalCPUCalculationResult{
PoolEntries: map[string]map[int]int{
commonstate.PoolNameReserve: {-1: 2},
commonstate.PoolNameShare: {-1: 90},
commonstate.PoolNameReclaim: {-1: 4},
commonstate.PoolNameShare: {-1: 88},
commonstate.PoolNameReclaim: {-1: 6},
},
},
wantHeadroom: resource.Quantity{},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ func (fake *FakeRegion) Type() configapi.QoSRegionType {
return fake.regionType
}

func (fake *FakeRegion) GetMetaInfo() string {
return "fake"
}

func (fake *FakeRegion) OwnerPoolName() string {
return fake.ownerPoolName
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ type ProvisionPolicy interface {
Update() error
// GetControlKnobAdjusted returns the latest legal control knob value
GetControlKnobAdjusted() (types.ControlKnob, error)

GetMetaInfo() string
}

type InitFunc func(regionName string, regionType configapi.QoSRegionType, ownerPoolName string,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (p *PolicyBase) GetControlKnobAdjusted() (types.ControlKnob, error) {
return p.controlKnobAdjusted.Clone(), nil

case configapi.QoSRegionTypeIsolation:
return map[configapi.ControlKnobName]types.ControlKnobValue{
return map[configapi.ControlKnobName]types.ControlKnobItem{
configapi.ControlKnobNonReclaimedCPURequirementUpper: {
Value: p.ResourceUpperBound,
Action: types.ControlKnobActionNone,
Expand All @@ -91,3 +91,7 @@ func (p *PolicyBase) GetControlKnobAdjusted() (types.ControlKnob, error) {
return nil, fmt.Errorf("unsupported region type %v", p.regionType)
}
}

func (p *PolicyBase) GetMetaInfo() string {
return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", p.regionName, p.regionType, p.ownerPoolName, p.bindingNumas.String())
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (p *PolicyCanonical) Update() error {
}

p.controlKnobAdjusted = types.ControlKnob{
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobValue{
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobItem{
Value: cpuEstimation,
Action: types.ControlKnobActionNone,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ func (p *PolicyNone) Update() error
func (p *PolicyNone) GetControlKnobAdjusted() (types.ControlKnob, error) {
return types.InvalidControlKnob, nil
}
func (p *PolicyNone) GetMetaInfo() string { return "" }
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ func (p *PolicyRama) Update() error {

controller, ok := p.controllers[metricName]
if !ok {
controller = helper.NewPIDController(metricName, params)
controller = helper.NewPIDController(metricName, params, p.GetMetaInfo())
p.controllers[metricName] = controller
}

controller.SetEssentials(p.ResourceEssentials)
cpuAdjusted := controller.Adjust(cpuSize, indicator.Target, indicator.Current)

general.InfoS("[qosaware-cpu-rama] pid adjust result", "regionName", p.regionName, "metricName", metricName, "cpuAdjusted", cpuAdjusted, "last cpu size", cpuSize)
general.InfoS("[qosaware-cpu-rama] pid adjust result", "meta", p.GetMetaInfo(), "metricName", metricName, "cpuAdjusted", cpuAdjusted, "last cpu size", cpuSize)

if cpuAdjusted > cpuAdjustedRaw {
cpuAdjustedRaw = cpuAdjusted
Expand All @@ -104,12 +104,10 @@ func (p *PolicyRama) Update() error {
}
}

general.Infof("[qosaware-cpu-rama] ReclaimOverlap=%v, region=%v", p.ControlEssentials.ReclaimOverlap, p.regionName)

cpuAdjustedRestricted := cpuAdjustedRaw

p.controlKnobAdjusted = types.ControlKnob{
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobValue{
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobItem{
Value: cpuAdjustedRestricted,
Action: types.ControlKnobActionNone,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ type QoSRegion interface {
GetStatus() types.RegionStatus
// GetControlEssentials returns the latest control essentials
GetControlEssentials() types.ControlEssentials

GetMetaInfo() string
}

// GetRegionBasicMetricTags returns metric tag slice of region info and status
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ type internalHeadroomPolicy struct {
}

type provisionPolicyResult struct {
msg string
essentials types.ResourceEssentials
regulatorOptions regulator.RegulatorOptions
controlKnobValueRegulators map[v1alpha1.ControlKnobName]regulator.Regulator
}

func newProvisionPolicyResult(essentials types.ResourceEssentials, regulatorOptions regulator.RegulatorOptions) *provisionPolicyResult {
func newProvisionPolicyResult(essentials types.ResourceEssentials, regulatorOptions regulator.RegulatorOptions, msg string) *provisionPolicyResult {
return &provisionPolicyResult{
msg: msg,
essentials: essentials,
regulatorOptions: regulatorOptions,
controlKnobValueRegulators: make(map[v1alpha1.ControlKnobName]regulator.Regulator),
Expand All @@ -102,26 +104,20 @@ func (r *provisionPolicyResult) setEssentials(essentials types.ResourceEssential

// regulateControlKnob is to regulate control knob with current and last one
// todo: current only regulate control knob value, it will also regulate action in the future
func (r *provisionPolicyResult) regulateControlKnob(currentControlKnob types.ControlKnob, lastControlKnob *types.ControlKnob) {
if lastControlKnob != nil {
for name, knob := range *lastControlKnob {
reg, ok := r.controlKnobValueRegulators[name]
if !ok || reg == nil {
reg = r.newRegulator(name)
}

reg.SetLatestControlKnobValue(knob)
r.controlKnobValueRegulators[name] = reg
}
}

func (r *provisionPolicyResult) regulateControlKnob(currentControlKnob, effectiveControlKnob types.ControlKnob) {
klog.InfoS("[provisionPolicyResult]", "region", r.msg,
"currentControlKnob", currentControlKnob, "effectiveControlKnob", effectiveControlKnob)
for name, knob := range currentControlKnob {
reg, ok := r.controlKnobValueRegulators[name]
if !ok || reg == nil {
reg = r.newRegulator(name)
}

reg.Regulate(knob)
effectiveKnobItem, ok := effectiveControlKnob[name]
if ok {
reg.Regulate(knob, &effectiveKnobItem)
} else {
reg.Regulate(knob, nil)
}
r.controlKnobValueRegulators[name] = reg
}
}
Expand All @@ -140,9 +136,9 @@ func (r *provisionPolicyResult) newRegulator(name v1alpha1.ControlKnobName) regu
// getControlKnob is to get final control knob from regulators
func (r *provisionPolicyResult) getControlKnob() types.ControlKnob {
controlKnob := make(types.ControlKnob)
for name, r := range r.controlKnobValueRegulators {
controlKnob[name] = types.ControlKnobValue{
Value: float64(r.GetRequirement()),
for name, regulator := range r.controlKnobValueRegulators {
controlKnob[name] = types.ControlKnobItem{
Value: float64(regulator.GetRequirement()),
Action: types.ControlKnobActionNone,
}
}
Expand Down Expand Up @@ -230,6 +226,11 @@ func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoS
MaxRampUpStep: conf.MaxRampUpStep,
MaxRampDownStep: conf.MaxRampDownStep,
MinRampDownPeriod: conf.MinRampDownPeriod,
NeedHTAligned: func() bool {
return machine.SmtActive() &&
!conf.GetDynamicConfiguration().AllowSharedCoresOverlapReclaimedCores &&
regionType == v1alpha1.QoSRegionTypeShare
},
},

metaReader: metaReader,
Expand Down Expand Up @@ -289,6 +290,16 @@ func (r *QoSRegionBase) Clear() {
r.containerTopologyAwareAssignment = make(types.TopologyAwareAssignment)
}

func (r *QoSRegionBase) GetMetaInfo() string {
r.Lock()
defer r.Unlock()
return r.getMetaInfo()
}

func (r *QoSRegionBase) getMetaInfo() string {
return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", r.name, r.regionType, r.ownerPoolName, r.bindingNumas.String())
}

func (r *QoSRegionBase) GetBindingNumas() machine.CPUSet {
r.Lock()
defer r.Unlock()
Expand Down Expand Up @@ -626,7 +637,7 @@ func (r *QoSRegionBase) getProvisionControlKnob() map[types.CPUProvisionPolicyNa
{Key: metricTagKeyControlKnobAction, Val: string(value.Action)},
}...)

klog.InfoS("[qosaware-cpu] get raw control knob", "region", r.name, "policy", internal.name,
klog.InfoS("[qosaware-cpu] get raw control knob", "meta", r.getMetaInfo(), "policy", internal.name,
"knob", name, "action", value.Action, "value", value.Value)
}
}
Expand All @@ -636,7 +647,7 @@ func (r *QoSRegionBase) getProvisionControlKnob() map[types.CPUProvisionPolicyNa

// regulateProvisionControlKnob regulate provision control knob for each provision policy
func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types.CPUProvisionPolicyName]types.ControlKnob,
lastControlKnob *types.ControlKnob,
effectiveControlKnob types.ControlKnob,
) {
provisionPolicyResults := make(map[types.CPUProvisionPolicyName]*provisionPolicyResult)
firstValidPolicy := types.CPUProvisionPolicyNone
Expand All @@ -660,13 +671,13 @@ func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types

policyResult, ok := r.provisionPolicyResults[internal.name]
if !ok || policyResult == nil {
policyResult = newProvisionPolicyResult(r.ResourceEssentials, r.cpuRegulatorOptions)
policyResult.regulateControlKnob(controlKnob, lastControlKnob)
policyResult = newProvisionPolicyResult(r.ResourceEssentials, r.cpuRegulatorOptions, r.getMetaInfo())
policyResult.regulateControlKnob(controlKnob, effectiveControlKnob)
} else {
policyResult.setEssentials(r.ResourceEssentials)
// only set regulator last cpu requirement for first valid policy
if internal.name == firstValidPolicy {
policyResult.regulateControlKnob(controlKnob, lastControlKnob)
policyResult.regulateControlKnob(controlKnob, effectiveControlKnob)
} else {
policyResult.regulateControlKnob(controlKnob, nil)
}
Expand All @@ -685,8 +696,8 @@ func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types
{Key: metricTagKeyControlKnobName, Val: string(knob)},
{Key: metricTagKeyControlKnobAction, Val: string(value.Action)},
}...)
klog.InfoS("[qosaware-cpu] get regulated control knob", "region", r.name, "policy", policy, "knob", knob,
"action", value.Action, "value", value.Value)
klog.InfoS("[qosaware-cpu] get regulated control knob", "region", r.name, "bindingNumas", r.bindingNumas.String(),
"policy", policy, "knob", knob, "action", value.Action, "value", value.Value)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ func (r *QoSRegionDedicatedNumaExclusive) TryUpdateProvision() {
rawControlKnobs := r.getProvisionControlKnob()

// regulate control knobs
r.regulateProvisionControlKnob(rawControlKnobs, &r.ControlKnobs)
r.regulateProvisionControlKnob(rawControlKnobs, r.getEffectiveControlKnobs())
}

func (r *QoSRegionDedicatedNumaExclusive) updateProvisionPolicy() {
r.ControlEssentials = types.ControlEssentials{
ControlKnobs: r.getControlKnobs(),
ControlKnobs: r.getEffectiveControlKnobs(),
ReclaimOverlap: true,
}

Expand Down Expand Up @@ -174,7 +174,7 @@ out:
r.idle.Store(idle)
}

func (r *QoSRegionDedicatedNumaExclusive) getControlKnobs() types.ControlKnob {
func (r *QoSRegionDedicatedNumaExclusive) getEffectiveControlKnobs() types.ControlKnob {
reclaimedCPUSize := 0
if reclaimedInfo, ok := r.metaReader.GetPoolInfo(commonstate.PoolNameReclaim); ok {
for _, numaID := range r.bindingNumas.ToSliceInt() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ func (r *QoSRegionShare) TryUpdateProvision() {
restrictedControlKnobs := r.restrictProvisionControlKnob(rawControlKnobs)

// regulate control knobs
r.regulateProvisionControlKnob(restrictedControlKnobs, &r.ControlKnobs)
r.regulateProvisionControlKnob(restrictedControlKnobs, r.getEffectiveControlKnobs())
}

func (r *QoSRegionShare) updateProvisionPolicy() {
r.ControlEssentials = types.ControlEssentials{
ControlKnobs: r.getControlKnobs(),
ControlKnobs: r.getEffectiveControlKnobs(),
ReclaimOverlap: r.AllowSharedCoresOverlapReclaimedCores,
}

Expand Down Expand Up @@ -197,7 +197,7 @@ func (r *QoSRegionShare) restrictProvisionControlKnob(originControlKnob map[type
return restrictedControlKnob
}

func (r *QoSRegionShare) getControlKnobs() types.ControlKnob {
func (r *QoSRegionShare) getEffectiveControlKnobs() types.ControlKnob {
regionInfo, ok := r.metaReader.GetRegionInfo(r.name)
if ok {
if _, existed := regionInfo.ControlKnobMap[configapi.ControlKnobNonReclaimedCPURequirement]; existed {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ func TestRestrictProvisionControlKnob(t *testing.T) {
MaxLowerGap: pointer.Float64(1),
},
},
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 8}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 9}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 8}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 9}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
},
{
name: "upper ref",
Expand All @@ -231,8 +231,8 @@ func TestRestrictProvisionControlKnob(t *testing.T) {
MaxLowerGap: pointer.Float64(1),
},
},
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 16}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 14}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 16}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 14}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
},
}

Expand Down
Loading

0 comments on commit bbf3ec5

Please sign in to comment.