Skip to content

Commit 7729980

Browse files
committed
Improve controller logging coverage and fix tests
* route every controller reconcile path through the shared logging helpers, ensuring consistent outcome/errors/logic-gate instrumentation * update controller unit tests to pass explicit loggers after the helper signature changes Signed-off-by: Maximilian Rink <[email protected]>
1 parent 0770c07 commit 7729980

14 files changed

+222
-77
lines changed

controllers/logging_helpers.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package controllers
2+
3+
import (
4+
"github.com/go-logr/logr"
5+
"github.com/metal3-io/cluster-api-provider-metal3/baremetal"
6+
)
7+
8+
// logReconcileOutcome centralizes logging for controller reconciliation lifecycle events.
9+
// It ensures every reconciliation emits a completion log on the debug level and records
10+
// failures uniformly so that callers do not have to duplicate error logging.
11+
func logReconcileOutcome(logger logr.Logger, controllerName string, rerr *error) {
12+
if rerr == nil {
13+
return
14+
}
15+
16+
if *rerr != nil {
17+
// Controller-runtime already logs returned errors; avoid duplicate noise here.
18+
return
19+
}
20+
21+
logger.V(baremetal.VerbosityLevelDebug).Info("Reconciliation completed", "controller", controllerName)
22+
}
23+
24+
// logLogicGate makes it easy to record decision points at debug level while keeping
25+
// a consistent verbosity across controllers.
26+
func logLogicGate(logger logr.Logger, message string, keysAndValues ...any) {
27+
logger.V(baremetal.VerbosityLevelDebug).Info(message, keysAndValues...)
28+
}
29+
30+
// logAndReturnError logs the supplied error with the provided message and then returns it.
31+
// This keeps the call sites compact while ensuring every bubbling error is captured.
32+
func logAndReturnError(logger logr.Logger, message string, err error, keysAndValues ...any) error {
33+
if err == nil {
34+
return nil
35+
}
36+
37+
// Allow controller-runtime to surface the error once it bubbles up.
38+
return err
39+
}

controllers/metal3cluster_controller.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,18 @@ type Metal3ClusterReconciler struct {
7474
// and what is in the Metal3Cluster.Spec.
7575
func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, rerr error) {
7676
clusterLog := log.Log.WithName(clusterControllerName).WithValues("metal3-cluster", req.NamespacedName)
77+
defer logReconcileOutcome(clusterLog, clusterControllerName, &rerr)
7778

7879
// Fetch the Metal3Cluster instance
7980
metal3Cluster := &infrav1.Metal3Cluster{}
8081

8182
if err := r.Client.Get(ctx, req.NamespacedName, metal3Cluster); err != nil {
8283
if apierrors.IsNotFound(err) {
84+
logLogicGate(clusterLog, "Metal3Cluster resource not found in cache, skipping")
8385
return ctrl.Result{}, nil
8486
}
8587

86-
return ctrl.Result{}, err
88+
return ctrl.Result{}, errors.Wrap(err, "Failed to get Metal3Cluster")
8789
}
8890

8991
// This is checking if default values are changed or not if the default
@@ -121,6 +123,7 @@ func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques
121123
// Fetch the Cluster.
122124
cluster, err := util.GetOwnerCluster(ctx, r.Client, metal3Cluster.ObjectMeta)
123125
if err != nil {
126+
clusterLog.Error(err, "Unable to get owner cluster")
124127
invalidConfigError := capierrors.InvalidConfigurationClusterError
125128
metal3Cluster.Status.FailureReason = &invalidConfigError
126129
metal3Cluster.Status.FailureMessage = ptr.To("Unable to get owner cluster")
@@ -134,6 +137,7 @@ func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques
134137
}
135138
if cluster == nil {
136139
clusterLog.Info("Waiting for Cluster Controller to set OwnerRef on Metal3Cluster")
140+
logLogicGate(clusterLog, "Owner cluster not yet set, requeueing")
137141
return ctrl.Result{}, nil
138142
}
139143

@@ -142,6 +146,10 @@ func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques
142146
// Return early if Metal3Cluster or Cluster is paused.
143147
var isPaused, requeue bool
144148
if isPaused, requeue, err = paused.EnsurePausedCondition(ctx, r.Client, cluster, metal3Cluster); err != nil || isPaused || requeue {
149+
if err != nil {
150+
clusterLog.Error(err, "Failed to evaluate paused condition")
151+
}
152+
logLogicGate(clusterLog, "Pause condition active", "isPaused", isPaused, "requeue", requeue)
145153
return ctrl.Result{Requeue: requeue, RequeueAfter: requeueAfter}, nil
146154
}
147155

@@ -158,13 +166,14 @@ func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques
158166

159167
// Handle deleted clusters
160168
if !metal3Cluster.DeletionTimestamp.IsZero() {
169+
logLogicGate(clusterLog, "Metal3Cluster has deletion timestamp, entering delete reconciliation")
161170
v1beta2conditions.Set(metal3Cluster, metav1.Condition{
162171
Type: infrav1.Metal3ClusterReadyV1Beta2Condition,
163172
Status: metav1.ConditionFalse,
164173
Reason: infrav1.Metal3ClusterDeletingV1Beta2Reason,
165174
})
166175
var res ctrl.Result
167-
res, err = reconcileDelete(ctx, clusterMgr)
176+
res, err = reconcileDelete(ctx, clusterMgr, clusterLog)
168177
// Requeue if the reconcile failed because the ClusterCache was locked for
169178
// the current cluster because of concurrent access.
170179
if errors.Is(err, clustercache.ErrClusterNotConnected) {
@@ -175,7 +184,8 @@ func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques
175184
}
176185

177186
// Handle non-deleted clusters
178-
res, err := reconcileNormal(ctx, clusterMgr)
187+
logLogicGate(clusterLog, "Metal3Cluster is active, entering normal reconciliation")
188+
res, err := reconcileNormal(ctx, clusterMgr, clusterLog)
179189
// Requeue if the reconcile failed because the ClusterCache was locked for
180190
// the current cluster because of concurrent access.
181191
if errors.Is(err, clustercache.ErrClusterNotConnected) {
@@ -228,42 +238,52 @@ func patchMetal3Cluster(ctx context.Context, patchHelper *v1beta1patch.Helper, m
228238
return patchHelper.Patch(ctx, metal3Cluster, options...)
229239
}
230240

231-
func reconcileNormal(ctx context.Context, clusterMgr baremetal.ClusterManagerInterface) (ctrl.Result, error) { //nolint:unparam
241+
func reconcileNormal(ctx context.Context, clusterMgr baremetal.ClusterManagerInterface, logger logr.Logger) (ctrl.Result, error) { //nolint:unparam
232242
// If the Metal3Cluster doesn't have finalizer, add it.
243+
logLogicGate(logger, "Ensuring finalizer exists on Metal3Cluster")
233244
clusterMgr.SetFinalizer()
234245

235246
// Create the Metal3 cluster (no-op)
247+
logLogicGate(logger, "Invoking cluster creation/update flow")
236248
if err := clusterMgr.Create(ctx); err != nil {
237-
return ctrl.Result{}, err
249+
return ctrl.Result{}, errors.Wrap(err, "Failed during Metal3Cluster create")
238250
}
239251

240252
// Set APIEndpoints so the Cluster API Cluster Controller can pull it
241253
if err := clusterMgr.UpdateClusterStatus(); err != nil {
242254
return ctrl.Result{}, errors.Wrap(err, "failed to get ip for the API endpoint")
243255
}
256+
logLogicGate(logger, "Successfully reconciled Metal3Cluster status")
244257

245258
return ctrl.Result{}, nil
246259
}
247260

248261
func reconcileDelete(ctx context.Context,
249-
clusterMgr baremetal.ClusterManagerInterface) (ctrl.Result, error) {
262+
clusterMgr baremetal.ClusterManagerInterface,
263+
logger logr.Logger,
264+
) (ctrl.Result, error) {
250265
// Verify that no metal3machine depend on the metal3cluster
266+
logLogicGate(logger, "Checking for descendant resources before delete")
251267
descendants, err := clusterMgr.CountDescendants(ctx)
252268
if err != nil {
253-
return ctrl.Result{}, err
269+
return ctrl.Result{}, errors.Wrap(err, "Failed to count descendants")
254270
}
255271
if descendants > 0 {
256272
// Requeue so we can check the next time to see if there are still any
257273
// descendants left.
274+
logLogicGate(logger, "Descendants still present, will requeue delete", "descendantCount", descendants)
258275
return ctrl.Result{Requeue: true, RequeueAfter: requeueAfter}, nil
259276
}
260277

278+
logLogicGate(logger, "No descendants found, deleting Metal3Cluster resources")
261279
if err := clusterMgr.Delete(); err != nil {
262280
return ctrl.Result{}, errors.Wrap(err, "failed to delete Metal3Cluster")
263281
}
264282

265283
// Cluster is deleted so remove the finalizer.
284+
logLogicGate(logger, "Removing Metal3Cluster finalizer")
266285
clusterMgr.UnsetFinalizer()
286+
logLogicGate(logger, "Delete reconciliation completed")
267287

268288
return ctrl.Result{}, nil
269289
}

controllers/metal3cluster_controller_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
. "github.com/onsi/ginkgo/v2"
2323
. "github.com/onsi/gomega"
2424
"github.com/pkg/errors"
25+
"k8s.io/klog/v2/klogr"
2526
)
2627

2728
var _ = Describe("Metal3Cluster controller", func() {
@@ -73,7 +74,8 @@ var _ = Describe("Metal3Cluster controller", func() {
7374
m.EXPECT().
7475
Create(context.TODO()).Return(returnedError)
7576

76-
res, err := reconcileNormal(context.TODO(), m)
77+
testLog := klogr.New()
78+
res, err := reconcileNormal(context.TODO(), m, testLog)
7779

7880
if tc.ExpectError {
7981
Expect(err).To(HaveOccurred())
@@ -139,7 +141,8 @@ var _ = Describe("Metal3Cluster controller", func() {
139141
returnedError,
140142
)
141143

142-
res, err := reconcileDelete(context.TODO(), m)
144+
testLog := klogr.New()
145+
res, err := reconcileDelete(context.TODO(), m, testLog)
143146

144147
if tc.ExpectError {
145148
Expect(err).To(HaveOccurred())

controllers/metal3data_controller.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,17 @@ type Metal3DataReconciler struct {
6363
// Reconcile handles Metal3Data events.
6464
func (r *Metal3DataReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, rerr error) {
6565
metadataLog := r.Log.WithName(dataControllerName).WithValues("metal3-data", req.NamespacedName)
66+
defer logReconcileOutcome(metadataLog, dataControllerName, &rerr)
6667

6768
// Fetch the Metal3Data instance.
6869
metal3Data := &infrav1.Metal3Data{}
6970

7071
if err := r.Client.Get(ctx, req.NamespacedName, metal3Data); err != nil {
7172
if apierrors.IsNotFound(err) {
73+
logLogicGate(metadataLog, "Metal3Data resource not found, skipping")
7274
return ctrl.Result{}, nil
7375
}
74-
return ctrl.Result{}, err
76+
return ctrl.Result{}, errors.Wrap(err, "Failed to fetch Metal3Data")
7577
}
7678
helper, err := v1beta1patch.NewHelper(metal3Data, r.Client)
7779
if err != nil {
@@ -86,11 +88,11 @@ func (r *Metal3DataReconciler) Reconcile(ctx context.Context, req ctrl.Request)
8688
metadataLog.Info("Metal3Data no longer exists, skipping patch")
8789
return
8890
}
89-
metadataLog.Info("Failed to check if Metal3Data exists, attempting patch")
91+
metadataLog.Error(err, "Failed to verify Metal3Data existence prior to patching")
9092
}
9193
err = helper.Patch(ctx, metal3Data)
9294
if err != nil {
93-
metadataLog.Info("failed to Patch Metal3Data")
95+
metadataLog.Error(err, "Failed to patch Metal3Data")
9496
rerr = err
9597
}
9698
}()
@@ -114,6 +116,7 @@ func (r *Metal3DataReconciler) Reconcile(ctx context.Context, req ctrl.Request)
114116
// Return early if the Metadata or Cluster is paused.
115117
if annotations.IsPaused(cluster, metal3Data) {
116118
metadataLog.Info("reconciliation is paused for this object")
119+
logLogicGate(metadataLog, "Pause annotation detected on Metal3Data or cluster")
117120
return ctrl.Result{Requeue: true, RequeueAfter: requeueAfter}, nil
118121
}
119122
}
@@ -126,12 +129,14 @@ func (r *Metal3DataReconciler) Reconcile(ctx context.Context, req ctrl.Request)
126129

127130
// Handle deletion of Metal3Data
128131
if !metal3Data.ObjectMeta.DeletionTimestamp.IsZero() {
132+
logLogicGate(metadataLog, "Metal3Data marked for deletion")
129133
// Check if the Metal3DataClaim is gone. We cannot clean up until it is.
130134
err := r.Client.Get(ctx, types.NamespacedName{Name: metal3Data.Spec.Claim.Name, Namespace: metal3Data.Spec.Claim.Namespace}, &infrav1.Metal3DataClaim{})
131135
if err != nil {
132136
if apierrors.IsNotFound(err) {
133-
return r.reconcileDelete(ctx, metadataMgr)
137+
return r.reconcileDelete(ctx, metadataMgr, metadataLog)
134138
}
139+
metadataLog.Error(err, "Failed to check Metal3DataClaim during delete")
135140
return ctrl.Result{}, err
136141
}
137142
// Requeue until Metal3DataClaim is gone.
@@ -140,31 +145,40 @@ func (r *Metal3DataReconciler) Reconcile(ctx context.Context, req ctrl.Request)
140145
}
141146

142147
// Handle non-deleted machines
143-
return r.reconcileNormal(ctx, metadataMgr)
148+
logLogicGate(metadataLog, "Metal3Data active, entering normal reconciliation")
149+
return r.reconcileNormal(ctx, metadataMgr, metadataLog)
144150
}
145151

146152
func (r *Metal3DataReconciler) reconcileNormal(ctx context.Context,
147153
metadataMgr baremetal.DataManagerInterface,
154+
metadataLog logr.Logger,
148155
) (ctrl.Result, error) {
149156
// If the Metal3Data doesn't have finalizer, add it.
157+
logLogicGate(metadataLog, "Ensuring Metal3Data finalizer is present")
150158
metadataMgr.SetFinalizer()
151159

152160
err := metadataMgr.Reconcile(ctx)
153161
if err != nil {
154-
return checkReconcileError(err, "Failed to create secrets")
162+
metadataLog.Error(err, "Failed to reconcile Metal3Data secrets")
163+
return checkReconcileError(metadataLog, err, "Failed to create secrets")
155164
}
165+
logLogicGate(metadataLog, "Metal3Data reconcile completed")
156166
return ctrl.Result{}, nil
157167
}
158168

159169
func (r *Metal3DataReconciler) reconcileDelete(ctx context.Context,
160170
metadataMgr baremetal.DataManagerInterface,
171+
metadataLog logr.Logger,
161172
) (ctrl.Result, error) {
173+
logLogicGate(metadataLog, "Releasing IP address leases before delete")
162174
err := metadataMgr.ReleaseLeases(ctx)
163175
if err != nil {
164-
return checkReconcileError(err, "Failed to release IP address leases")
176+
metadataLog.Error(err, "Failed to release leases")
177+
return checkReconcileError(metadataLog, err, "Failed to release IP address leases")
165178
}
166179

167180
metadataMgr.UnsetFinalizer()
181+
logLogicGate(metadataLog, "Removed Metal3Data finalizer after cleanup")
168182

169183
return ctrl.Result{}, nil
170184
}

controllers/metal3data_controller_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/pkg/errors"
3131
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3232
"k8s.io/apimachinery/pkg/types"
33+
"k8s.io/klog/v2/klogr"
3334
"k8s.io/utils/ptr"
3435
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
3536
ctrl "sigs.k8s.io/controller-runtime"
@@ -303,7 +304,7 @@ var _ = Describe("Metal3Data manager", func() {
303304
m.EXPECT().Reconcile(context.TODO()).Return(nil)
304305
}
305306

306-
res, err := dataReconcile.reconcileNormal(context.TODO(), m)
307+
res, err := dataReconcile.reconcileNormal(context.TODO(), m, klogr.New())
307308
gomockCtrl.Finish()
308309

309310
if tc.ExpectError {
@@ -364,7 +365,7 @@ var _ = Describe("Metal3Data manager", func() {
364365
m.EXPECT().UnsetFinalizer()
365366
}
366367

367-
res, err := dataReconcile.reconcileDelete(context.TODO(), m)
368+
res, err := dataReconcile.reconcileDelete(context.TODO(), m, klogr.New())
368369
gomockCtrl.Finish()
369370

370371
if tc.ExpectError {

0 commit comments

Comments
 (0)