Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: refresh node metadata on stale nodes #225

Merged
merged 1 commit into from
Aug 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 42 additions & 17 deletions cloud/linode/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
"github.com/linode/linode-cloud-controller-manager/cloud/linode/client"
)

const (
informerResyncPeriod = 1 * time.Minute
defaultMetadataTTL = 300 * time.Second
)

type nodeController struct {
sync.RWMutex

Expand All @@ -38,10 +43,10 @@
}

func newNodeController(kubeclient kubernetes.Interface, client client.Client, informer v1informers.NodeInformer) *nodeController {
timeout := 300
timeout := defaultMetadataTTL

Check warning on line 46 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L46

Added line #L46 was not covered by tests
if raw, ok := os.LookupEnv("LINODE_METADATA_TTL"); ok {
if t, _ := strconv.Atoi(raw); t > 0 {
timeout = t
timeout = time.Duration(t) * time.Second

Check warning on line 49 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L49

Added line #L49 was not covered by tests
}
}

Expand All @@ -50,24 +55,36 @@
instances: newInstances(client),
kubeclient: kubeclient,
informer: informer,
ttl: time.Duration(timeout) * time.Second,
ttl: timeout,

Check warning on line 58 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L58

Added line #L58 was not covered by tests
metadataLastUpdate: make(map[string]time.Time),
queue: workqueue.NewDelayingQueue(),
}
}

func (s *nodeController) Run(stopCh <-chan struct{}) {
if _, err := s.informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
node, ok := obj.(*v1.Node)
if !ok {
return
}

klog.Infof("NodeController will handle newly created node (%s) metadata", node.Name)
s.queue.Add(node)
if _, err := s.informer.Informer().AddEventHandlerWithResyncPeriod(
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
node, ok := obj.(*v1.Node)
if !ok {
return

Check warning on line 70 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L65-L70

Added lines #L65 - L70 were not covered by tests
}

klog.Infof("NodeController will handle newly created node (%s) metadata", node.Name)
s.queue.Add(node)

Check warning on line 74 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L73-L74

Added lines #L73 - L74 were not covered by tests
},
UpdateFunc: func(oldObj, newObj interface{}) {
node, ok := newObj.(*v1.Node)
if !ok {
return

Check warning on line 79 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L76-L79

Added lines #L76 - L79 were not covered by tests
}

klog.Infof("NodeController will handle newly updated node (%s) metadata", node.Name)
s.queue.Add(node)

Check warning on line 83 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L82-L83

Added lines #L82 - L83 were not covered by tests
},
},
}); err != nil {
informerResyncPeriod,
); err != nil {

Check warning on line 87 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L87

Added line #L87 was not covered by tests
klog.Errorf("NodeController can't handle newly created node's metadata. %s", err)
}

Expand Down Expand Up @@ -125,19 +142,27 @@
}

func (s *nodeController) handleNode(ctx context.Context, node *v1.Node) error {
klog.Infof("NodeController handling node (%s) metadata", node.Name)
klog.V(3).InfoS("NodeController handling node metadata",
"node", klog.KObj(node))

Check warning on line 146 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L145-L146

Added lines #L145 - L146 were not covered by tests

lastUpdate := s.LastMetadataUpdate(node.Name)

uuid, foundLabel := node.Labels[annotations.AnnLinodeHostUUID]
configuredPrivateIP, foundAnnotation := node.Annotations[annotations.AnnLinodeNodePrivateIP]
if foundLabel && foundAnnotation && time.Since(lastUpdate) < s.ttl {

metaAge := time.Since(lastUpdate)
if foundLabel && foundAnnotation && metaAge < s.ttl {
klog.V(3).InfoS("Skipping refresh, ttl not reached",
"node", klog.KObj(node),
"ttl", s.ttl,
"metadata_age", metaAge,
)

Check warning on line 159 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L153-L159

Added lines #L153 - L159 were not covered by tests
return nil
}

linode, err := s.instances.lookupLinode(ctx, node)
if err != nil {
klog.Infof("instance lookup error: %s", err.Error())
klog.V(1).ErrorS(err, "Instance lookup error")

Check warning on line 165 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L165

Added line #L165 was not covered by tests
return err
}

Expand Down Expand Up @@ -182,7 +207,7 @@
_, err = s.kubeclient.CoreV1().Nodes().Update(ctx, n, metav1.UpdateOptions{})
return err
}); err != nil {
klog.Infof("node update error: %s", err.Error())
klog.V(1).ErrorS(err, "Node update error")

Check warning on line 210 in cloud/linode/node_controller.go

View check run for this annotation

Codecov / codecov/patch

cloud/linode/node_controller.go#L210

Added line #L210 was not covered by tests
return err
}

Expand Down
Loading