Skip to content

Commit

Permalink
Merge pull request #19 from utilitywarehouse/replace-old-hc
Browse files Browse the repository at this point in the history
Replace old healthcheck
  • Loading branch information
ffilippopoulos authored Sep 7, 2021
2 parents 4b8fb4e + de5ed81 commit 89c5ba0
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 26 deletions.
9 changes: 6 additions & 3 deletions deploy/example/namespace/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,15 @@ spec:
capabilities:
add:
- 'NET_ADMIN'
readinessProbe:
livenessProbe:
httpGet:
path: /healthz
port: readiness-port
periodSeconds: 10
failureThreshold: 1
periodSeconds: 30
failureThreshold: 6
initialDelaySeconds: 10
successThreshold: 1
timeoutSeconds: 1
volumes:
- name: var-lib-semaphore-wireguard
hostPath:
Expand Down
13 changes: 0 additions & 13 deletions kube/node_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ type NodeWatcher struct {
store cache.Store
controller cache.Controller
eventHandler NodeEventHandler
ListHealthy bool
WatchHealthy bool
}

// NewNodeWatcher returns a new node wathcer.
Expand All @@ -53,9 +51,6 @@ func (nw *NodeWatcher) Init() {
if err != nil {
log.Logger.Error("nw: list error", "err", err)
metrics.IncNodeWatcherFailures(nw.clusterName, "list")
nw.ListHealthy = false
} else {
nw.ListHealthy = true
}
return l, err
},
Expand All @@ -64,9 +59,6 @@ func (nw *NodeWatcher) Init() {
if err != nil {
log.Logger.Error("nw: watch error", "err", err)
metrics.IncNodeWatcherFailures(nw.clusterName, "watch")
nw.WatchHealthy = false
} else {
nw.WatchHealthy = true
}
return w, err
},
Expand Down Expand Up @@ -117,8 +109,3 @@ func (nw *NodeWatcher) List() ([]*v1.Node, error) {
}
return nodes, nil
}

// Healthy is true when both list and watch handlers are running without errors.
func (nw *NodeWatcher) Healthy() bool {
return nw.ListHealthy && nw.WatchHealthy
}
6 changes: 5 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,12 @@ func listenAndServe(runners []*Runner) {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
// if we reach listenAndServe func the wg devices should have
// been intialised and running. One could use the ruuners'
// initialised flag for a liveness probe to kick the deployment
// after some time
for _, r := range runners {
if !r.Healthy() {
if !r.initialised {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
Expand Down
4 changes: 4 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
}
}

// SyncPeerAttempt increases the counter for attempts to sync wg peers
func SyncPeerAttempt(device string, err error) {
s := "1"
if err != nil {
Expand All @@ -236,18 +237,21 @@ func SyncPeerAttempt(device string, err error) {
}).Inc()
}

// IncSyncQueueFullFailures increases sync queue failures counter
func IncSyncQueueFullFailures(device string) {
syncQueueFullFailures.With(prometheus.Labels{
"device": device,
}).Inc()
}

// IncSyncRequeue increases requeue counter
func IncSyncRequeue(device string) {
syncRequeue.With(prometheus.Labels{
"device": device,
}).Inc()
}

// IncNodeWatcherFailures increases node watcher failures counter
func IncNodeWatcherFailures(c, v string) {
nodeWatcherFailures.With(prometheus.Labels{
"cluster": c,
Expand Down
18 changes: 9 additions & 9 deletions runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type Runner struct {
nodeWatcher *kube.NodeWatcher
peers map[string]Peer
canSync bool // Flag to allow updating wireguard peers only after initial node watcher sync
initialised bool // Flag to turn on after the successful initialisation of the runner to report healthy
annotations RunnerAnnotations
sync chan struct{}
stop chan struct{}
Expand All @@ -65,6 +66,7 @@ func newRunner(client, watchClient kubernetes.Interface, nodeName, wgDeviceName,
podSubnet: podSubnet,
peers: make(map[string]Peer),
canSync: false,
initialised: false,
annotations: constructRunnerAnnotations(localClusterName, remoteClusterName),
sync: make(chan struct{}),
stop: make(chan struct{}),
Expand Down Expand Up @@ -100,17 +102,20 @@ func (r *Runner) Run() error {
if err := r.device.EnsureLinkUp(); err != nil {
return err
}
// Static route to the whole subnet cidr
if err := r.device.AddRouteToNet(r.podSubnet); err != nil {
return err
}
// At this point the runner should be considered successfully initialised
r.initialised = true

go r.nodeWatcher.Run()
// wait for node watcher to sync. TODO: atm dummy and could run forever
// if node cache fails to sync
stopCh := make(chan struct{})
if ok := cache.WaitForNamedCacheSync("nodeWatcher", stopCh, r.nodeWatcher.HasSynced); !ok {
return fmt.Errorf("failed to wait for nodes cache to sync")
}
// Static route to the whole subnet cidr
if err := r.device.AddRouteToNet(r.podSubnet); err != nil {
return err
}
r.canSync = true
r.enqueuePeersSync()
return nil
Expand Down Expand Up @@ -308,8 +313,3 @@ func (r *Runner) onPeerNodeDelete(node *v1.Node) {
}
r.enqueuePeersSync()
}

// Healthy is true if the node watcher is reporting healthy.
func (r *Runner) Healthy() bool {
return r.nodeWatcher.Healthy()
}

0 comments on commit 89c5ba0

Please sign in to comment.