Skip to content

Commit

Permalink
add check to access agent readiness endpoint
Browse files Browse the repository at this point in the history
This is intended as minimal change to fix

signadot/signadot#5172

tested with warp, it causes restarts and we are able
to connect whereas previously it hangs

still to consider:

- do we need to make this visible in status?
- do we need to make check period configurable?

add check to agent-metrics endpoints in tp monitor
  • Loading branch information
scott-cotton committed Dec 20, 2024
1 parent d5518e4 commit ed05c99
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 11 deletions.
39 changes: 29 additions & 10 deletions internal/locald/rootmanager/tp_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package rootmanager

import (
"fmt"
"net/http"
"time"

"log/slog"
Expand Down Expand Up @@ -128,16 +129,34 @@ func (mon *tpMonitor) checkTunnelProxyAccess(ctx context.Context) bool {
restartSvcs = true
}
}
if !restartSvcs {
// the grpc check for connecting to the tunnel proxy does not suffice
// because it has built-in retries and may re-use a connection while
// we are unable to establish a new connection. So, we also check
// the controller manager health endpoint
cli := &http.Client{
Transport: &http.Transport{},
Timeout: 10 * time.Second,
}
resp, err := cli.Get("http://agent-metrics.signadot.svc:9090/metrics")
if err != nil {
mon.log.Error("unable to reach agent-metrics, restarting services", "error", err)
restartSvcs = true
} else {
resp.Body.Close()
}
}
if !restartSvcs {
mon.starting = false
return true
}

if restartSvcs {
// Restart localnet
mon.root.stopLocalnetService()
mon.root.runLocalnetService(ctx, mon.tpLocalAddr, mon.ipMap)
// Restart localnet
mon.root.stopLocalnetService()
mon.root.runLocalnetService(ctx, mon.tpLocalAddr, mon.ipMap)

// Restart etc hosts
mon.root.stopEtcHostsService()
mon.root.runEtcHostsService(ctx, mon.tpLocalAddr, mon.ipMap)
}
mon.starting = false
return true
// Restart etc hosts
mon.root.stopEtcHostsService()
mon.root.runEtcHostsService(ctx, mon.tpLocalAddr, mon.ipMap)
return false
}
2 changes: 1 addition & 1 deletion internal/locald/sandboxmanager/sdk.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (

var (
ErrSandboxManagerUnavailable = errors.New(
"sandboxmanager is not running, start it with \"signadot local connect\"")
`sandboxmanager is not running, start it with "signadot local connect"`)
)

func GetStatus() (*sbmapi.StatusResponse, error) {
Expand Down

0 comments on commit ed05c99

Please sign in to comment.