@@ -40,6 +40,7 @@ import (
4040 "sigs.k8s.io/controller-runtime/pkg/client"
4141
4242 "github.com/medik8s/poison-pill/api/v1alpha1"
43+ machinev1beta1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1"
4344)
4445
4546const (
@@ -89,6 +90,7 @@ type PoisonPillRemediationReconciler struct {
8990//+kubebuilder:rbac:groups=poison-pill.medik8s.io,resources=poisonpillremediations/status,verbs=get;update;patch
9091//+kubebuilder:rbac:groups=poison-pill.medik8s.io,resources=poisonpillremediations/finalizers,verbs=update
9192//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
93+ //+kubebuilder:rbac:groups=machine.openshift.io,resources=machines,verbs=get;list;watch
9294
9395func (r * PoisonPillRemediationReconciler ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
9496 r .logger = r .Log .WithValues ("poisonpillremediation" , req .NamespacedName )
@@ -280,8 +282,21 @@ func (r *PoisonPillRemediationReconciler) restoreNode(nodeToRestore *v1.Node) (c
280282 return ctrl.Result {RequeueAfter : reconcileInterval }, nil
281283}
282284
283- // getNodeByMachine returns the node object referenced by machine
285+ // getNodeFromPpr returns the unhealthy node reported in the given ppr
284286func (r * PoisonPillRemediationReconciler ) getNodeFromPpr (ppr * v1alpha1.PoisonPillRemediation ) (* v1.Node , error ) {
287+ //PPR could be created by either machine based controller (e.g. MHC) or
288+ //by a node based controller (e.g. NHC). This assumes that machine based controller
289+ //will create the ppr with machine owner reference
290+
291+ for _ , ownerRef := range ppr .OwnerReferences {
292+ if ownerRef .Kind == "Machine" {
293+ r .logger .Info ("assuming the unhealthy resource is a machine" )
294+ return r .getNodeFromMachine (ownerRef , ppr .Namespace )
295+ }
296+ }
297+
298+ r .logger .Info ("assuming the unhealthy resource is a node" )
299+ //since we didn't find a machine owner ref, we assume that ppr name is the unhealthy node name
285300 node := & v1.Node {}
286301 key := client.ObjectKey {
287302 Name : ppr .Name ,
@@ -295,6 +310,40 @@ func (r *PoisonPillRemediationReconciler) getNodeFromPpr(ppr *v1alpha1.PoisonPil
295310 return node , nil
296311}
297312
313+ func (r * PoisonPillRemediationReconciler ) getNodeFromMachine (ref metav1.OwnerReference , ns string ) (* v1.Node , error ) {
314+ machine := & machinev1beta1.Machine {}
315+ machineKey := client.ObjectKey {
316+ Name : ref .Name ,
317+ Namespace : ns ,
318+ }
319+
320+ if err := r .Client .Get (context .Background (), machineKey , machine ); err != nil {
321+ r .logger .Error (err , "failed to get machine from PoisonPillRemediation CR owner ref" ,
322+ "machine name" , machineKey .Name , "namespace" , machineKey .Namespace )
323+ return nil , err
324+ }
325+
326+ if machine .Status .NodeRef == nil {
327+ err := errors .New ("nodeRef is nil" )
328+ r .logger .Error (err , "failed to retrieve node from the unhealthy machine" )
329+ return nil , err
330+ }
331+
332+ node := & v1.Node {}
333+ key := client.ObjectKey {
334+ Name : machine .Status .NodeRef .Name ,
335+ Namespace : machine .Status .NodeRef .Namespace ,
336+ }
337+
338+ if err := r .ApiReader .Get (context .Background (), key , node ); err != nil {
339+ r .logger .Error (err , "failed to retrieve node from the unhealthy machine" ,
340+ "node name" , node .Name , "machine name" , machine .Name )
341+ return nil , err
342+ }
343+
344+ return node , nil
345+ }
346+
298347func (r * PoisonPillRemediationReconciler ) markNodeAsUnschedulable (node * v1.Node ) (ctrl.Result , error ) {
299348 node .Spec .Unschedulable = true
300349 r .logger .Info ("Marking node as unschedulable" , "node name" , node .Name )
0 commit comments