Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: migrate contextual logging #1613

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/descheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
rs.SecureServing.AddFlags(fs)
}

func (rs *DeschedulerServer) Apply() error {
func (rs *DeschedulerServer) Apply(logger klog.Logger) error {
err := features.DefaultMutableFeatureGate.SetFromMap(rs.FeatureGates)
if err != nil {
return err
Expand All @@ -138,7 +138,7 @@ func (rs *DeschedulerServer) Apply() error {
var loopbackClientConfig *restclient.Config
var secureServing *apiserver.SecureServingInfo
if err := rs.SecureServing.ApplyTo(&secureServing, &loopbackClientConfig); err != nil {
klog.ErrorS(err, "failed to apply secure server configuration")
logger.Error(err, "failed to apply secure server configuration")
return err
}

Expand Down
17 changes: 10 additions & 7 deletions cmd/descheduler/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ import (

// NewDeschedulerCommand creates a *cobra.Command object with default parameters
func NewDeschedulerCommand(out io.Writer) *cobra.Command {
logger := klog.FromContext(context.TODO())
s, err := options.NewDeschedulerServer()
if err != nil {
klog.ErrorS(err, "unable to initialize server")
logger.Error(err, "unable to initialize server")
}

featureGate := featuregate.NewFeatureGate()
Expand All @@ -60,17 +61,18 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
if logsapi.ValidateAndApply(logConfig, featureGate); err != nil {
return err
}
descheduler.SetupPlugins()
descheduler.SetupPlugins(cmd.Context())
return nil
},
RunE: func(cmd *cobra.Command, args []string) error {
if err = s.Apply(); err != nil {
klog.ErrorS(err, "failed to apply")
logger := klog.FromContext(cmd.Context())
if err = s.Apply(logger); err != nil {
logger.Error(err, "failed to apply")
return err
}

if err = Run(cmd.Context(), s); err != nil {
klog.ErrorS(err, "failed to run descheduler server")
logger.Error(err, "failed to run descheduler server")
return err
}

Expand All @@ -89,6 +91,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {

func Run(rootCtx context.Context, rs *options.DeschedulerServer) error {
ctx, done := signal.NotifyContext(rootCtx, syscall.SIGINT, syscall.SIGTERM)
logger := klog.FromContext(ctx)

pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !rs.DisableMetrics {
Expand All @@ -99,13 +102,13 @@ func Run(rootCtx context.Context, rs *options.DeschedulerServer) error {

stoppedCh, _, err := rs.SecureServingInfo.Serve(pathRecorderMux, 0, ctx.Done())
if err != nil {
klog.Fatalf("failed to start secure server: %v", err)
logger.Error(err, "failed to start secure server")
return err
}

err = tracing.NewTracerProvider(ctx, rs.Tracing.CollectorEndpoint, rs.Tracing.TransportCert, rs.Tracing.ServiceName, rs.Tracing.ServiceNamespace, rs.Tracing.SampleRate, rs.Tracing.FallbackToNoOpProviderOnError)
if err != nil {
klog.ErrorS(err, "failed to create tracer provider")
logger.Error(err, "failed to create tracer provider")
}
defer tracing.Shutdown(ctx)

Expand Down
37 changes: 21 additions & 16 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,11 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
defer func(loopStartDuration time.Time) {
metrics.DeschedulerLoopDuration.With(map[string]string{}).Observe(time.Since(loopStartDuration).Seconds())
}(time.Now())
logger := klog.FromContext(ctx)

// if len is still <= 1 error out
if len(nodes) <= 1 {
klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
logger.V(1).Info("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
return fmt.Errorf("the cluster size is 0 or 1")
}

Expand All @@ -210,7 +211,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
// So when evicting pods while running multiple strategies in a row have the cummulative effect
// as is when evicting pods for real.
if d.rs.DryRun {
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
logger.V(3).Info("Building a cached client from the cluster for the dry run")
// Create a new cache so we start from scratch without any leftovers
fakeClient := fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
Expand Down Expand Up @@ -240,13 +241,13 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
client = d.rs.Client
}

klog.V(3).Infof("Setting up the pod evictor")
logger.V(3).Info("Setting up the pod evictor")
d.podEvictor.SetClient(client)
d.podEvictor.ResetCounters()

d.runProfiles(ctx, client, nodes)

klog.V(1).InfoS("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())
logger.V(1).Info("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())

return nil
}
Expand All @@ -259,8 +260,10 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
ctx, span = tracing.Tracer().Start(ctx, "runProfiles")
defer span.End()
var profileRunners []profileRunner
logger := klog.FromContext(ctx)
for _, profile := range d.deschedulerPolicy.Profiles {
currProfile, err := frameworkprofile.NewProfile(
ctx,
profile,
pluginregistry.PluginRegistry,
frameworkprofile.WithClientSet(client),
Expand All @@ -270,7 +273,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
frameworkprofile.WithMetricsCollector(d.metricsCollector),
)
if err != nil {
klog.ErrorS(err, "unable to create a profile", "profile", profile.Name)
logger.Error(err, "unable to create a profile", "profile", profile.Name)
continue
}
profileRunners = append(profileRunners, profileRunner{profile.Name, currProfile.RunDeschedulePlugins, currProfile.RunBalancePlugins})
Expand All @@ -281,7 +284,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
status := profileR.descheduleEPs(ctx, nodes)
if status != nil && status.Err != nil {
span.AddEvent("failed to perform deschedule operations", trace.WithAttributes(attribute.String("err", status.Err.Error()), attribute.String("profile", profileR.name), attribute.String("operation", tracing.DescheduleOperation)))
klog.ErrorS(status.Err, "running deschedule extension point failed with error", "profile", profileR.name)
logger.Error(status.Err, "running deschedule extension point failed with error", "profile", profileR.name)
continue
}
}
Expand All @@ -291,7 +294,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
status := profileR.balanceEPs(ctx, nodes)
if status != nil && status.Err != nil {
span.AddEvent("failed to perform balance operations", trace.WithAttributes(attribute.String("err", status.Err.Error()), attribute.String("profile", profileR.name), attribute.String("operation", tracing.BalanceOperation)))
klog.ErrorS(status.Err, "running balance extension point failed with error", "profile", profileR.name)
logger.Error(status.Err, "running balance extension point failed with error", "profile", profileR.name)
continue
}
}
Expand All @@ -302,6 +305,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
ctx, span = tracing.Tracer().Start(ctx, "Run")
defer span.End()
metrics.Register()
logger := klog.FromContext(ctx)

clientConnection := rs.ClientConnection
if rs.KubeconfigFile != "" && clientConnection.Kubeconfig == "" {
Expand All @@ -314,7 +318,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
rs.Client = rsclient
rs.EventClient = eventClient

deschedulerPolicy, err := LoadPolicyConfig(rs.PolicyConfigFile, rs.Client, pluginregistry.PluginRegistry)
deschedulerPolicy, err := LoadPolicyConfig(ctx, rs.PolicyConfigFile, rs.Client, pluginregistry.PluginRegistry)
if err != nil {
return err
}
Expand All @@ -324,7 +328,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {

// Add k8s compatibility warnings to logs
if err := validateVersionCompatibility(rs.Client.Discovery(), version.Get()); err != nil {
klog.Warning(err.Error())
logger.Error(err, "validate version error")
}

evictionPolicyGroupVersion, err := eutils.SupportEviction(rs.Client)
Expand All @@ -350,11 +354,11 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
}

if rs.LeaderElection.LeaderElect && rs.DryRun {
klog.V(1).Info("Warning: DryRun is set to True. You need to disable it to use Leader Election.")
logger.V(1).Info("Warning: DryRun is set to True. You need to disable it to use Leader Election.")
}

if rs.LeaderElection.LeaderElect && !rs.DryRun {
if err := NewLeaderElection(runFn, rsclient, &rs.LeaderElection, ctx); err != nil {
if err := NewLeaderElection(ctx, runFn, rsclient, &rs.LeaderElection); err != nil {
span.AddEvent("Leader Election Failure", trace.WithAttributes(attribute.String("err", err.Error())))
return fmt.Errorf("leaderElection: %w", err)
}
Expand Down Expand Up @@ -416,6 +420,7 @@ func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action cor
}

func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string) error {
logger := klog.FromContext(ctx)
var span trace.Span
ctx, span = tracing.Tracer().Start(ctx, "RunDeschedulerStrategies")
defer span.End()
Expand Down Expand Up @@ -450,11 +455,11 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer

if deschedulerPolicy.MetricsCollector.Enabled {
go func() {
klog.V(2).Infof("Starting metrics collector")
logger.V(2).Info("Starting metrics collector")
descheduler.metricsCollector.Run(ctx)
klog.V(2).Infof("Stopped metrics collector")
logger.V(2).Info("Stopped metrics collector")
}()
klog.V(2).Infof("Waiting for metrics collector to sync")
logger.V(2).Info("Waiting for metrics collector to sync")
if err := wait.PollWithContext(ctx, time.Second, time.Minute, func(context.Context) (done bool, err error) {
return descheduler.metricsCollector.HasSynced(), nil
}); err != nil {
Expand All @@ -470,14 +475,14 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
nodes, err := nodeutil.ReadyNodes(sCtx, rs.Client, descheduler.sharedInformerFactory.Core().V1().Nodes().Lister(), nodeSelector)
if err != nil {
sSpan.AddEvent("Failed to detect ready nodes", trace.WithAttributes(attribute.String("err", err.Error())))
klog.Error(err)
logger.Error(err, "Failed to retrieve ready nodes from the cluster")
cancel()
return
}
err = descheduler.runDeschedulerLoop(sCtx, nodes)
if err != nil {
sSpan.AddEvent("Failed to run descheduler loop", trace.WithAttributes(attribute.String("err", err.Error())))
klog.Error(err)
logger.Error(err, "Failed to execute the descheduler loop with the retrieved nodes")
cancel()
return
}
Expand Down
Loading