From 0b75508242e443c4cac32ef0ac90f8783a1e5767 Mon Sep 17 00:00:00 2001 From: Zygimantas Date: Thu, 7 Nov 2024 15:35:18 +0100 Subject: [PATCH] fix(provider): improve recognition of exit codes currently, both DO and Docker providers do not wait for the container to exit until returning an exit code. this introduces a condition, where the container hasn't exited yet (e.g. with an error), but we already fetch it's stdout and exit code (which at that point is 0). this commit makes the providers wait for the exec container to finish and only then returns the response. (cherry picked from commit 4a670527d3ccfa408a44530125dd386fda0151af) # Conflicts: # core/provider/digitalocean/task.go # core/provider/docker/task.go # cosmos/node/init.go --- core/provider/digitalocean/task.go | 88 ++++++++++++++++++++++++++++-- core/provider/docker/task.go | 29 +++++++++- cosmos/node/genesis.go | 24 ++++++-- cosmos/node/init.go | 15 ++++- cosmos/node/keys.go | 6 +- 5 files changed, 149 insertions(+), 13 deletions(-) diff --git a/core/provider/digitalocean/task.go b/core/provider/digitalocean/task.go index 42b8777..9e9ff72 100644 --- a/core/provider/digitalocean/task.go +++ b/core/provider/digitalocean/task.go @@ -346,16 +346,39 @@ func (p *Provider) RunCommand(ctx context.Context, taskName string, command []st defer resp.Close() - execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + lastExitCode := 0 + + err = util.WaitForCondition(ctx, 10*time.Second, 100*time.Millisecond, func() (bool, error) { + execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + if err != nil { + return false, err + } + + if execInspect.Running { + return false, nil + } + + lastExitCode = execInspect.ExitCode + + return true, nil + }) + if err != nil { - return "", "", 0, err + p.logger.Error("failed to wait for exec", zap.Error(err), zap.String("taskName", taskName)) + return "", "", lastExitCode, err } var stdout, stderr bytes.Buffer _, err = stdcopy.StdCopy(&stdout, &stderr, resp.Reader) +<<<<<<< HEAD +======= + if err != nil { + return "", "", lastExitCode, err + } +>>>>>>> 4a67052 (fix(provider): improve recognition of exit codes) - return stdout.String(), stderr.String(), execInspect.ExitCode, nil + return stdout.String(), stderr.String(), lastExitCode, nil } func (p *Provider) RunCommandWhileStopped(ctx context.Context, taskName string, definition provider.TaskDefinition, command []string) (string, string, int, error) { @@ -425,16 +448,73 @@ func (p *Provider) RunCommandWhileStopped(ctx context.Context, taskName string, defer resp.Close() - execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + lastExitCode := 0 + + err = util.WaitForCondition(ctx, 10*time.Second, 100*time.Millisecond, func() (bool, error) { + execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + if err != nil { + return false, err + } + + if execInspect.Running { + return false, nil + } + + lastExitCode = execInspect.ExitCode + + return true, nil + }) + if err != nil { +<<<<<<< HEAD return "", "", 0, err +======= + p.logger.Error("failed to wait for exec", zap.Error(err), zap.String("taskName", taskName)) + return "", "", lastExitCode, err +>>>>>>> 4a67052 (fix(provider): improve recognition of exit codes) } var stdout, stderr bytes.Buffer _, err = stdcopy.StdCopy(&stdout, &stderr, resp.Reader) +<<<<<<< HEAD return stdout.String(), stderr.String(), execInspect.ExitCode, nil +======= + return stdout.String(), stderr.String(), lastExitCode, err +} + +func startContainerWithBlock(ctx context.Context, dockerClient *dockerclient.Client, containerID string) error { + // start container + if err := dockerClient.ContainerStart(ctx, containerID, types.ContainerStartOptions{}); err != nil { + return err + } + + // cancel container after a minute + waitCtx, cancel := context.WithTimeout(ctx, 3*time.Minute) + defer cancel() + ticker := time.NewTicker(100 * time.Millisecond) + for { + select { + case <-waitCtx.Done(): + return fmt.Errorf("error waiting for container to start: %v", waitCtx.Err()) + case <-ticker.C: + container, err := dockerClient.ContainerInspect(ctx, containerID) + if err != nil { + return err + } + + // if the container is running, we're done + if container.State.Running { + return nil + } + + if container.State.Status == "exited" && container.State.ExitCode != 0 { + return fmt.Errorf("container exited with status %d", container.State.ExitCode) + } + } + } +>>>>>>> 4a67052 (fix(provider): improve recognition of exit codes) } func (p *Provider) pullImage(ctx context.Context, dockerClient *dockerclient.Client, image string) error { diff --git a/core/provider/docker/task.go b/core/provider/docker/task.go index 7685536..fac2e8d 100644 --- a/core/provider/docker/task.go +++ b/core/provider/docker/task.go @@ -216,16 +216,39 @@ func (p *Provider) RunCommand(ctx context.Context, id string, command []string) defer resp.Close() - execInspect, err := p.dockerClient.ContainerExecInspect(ctx, exec.ID) + lastExitCode := 0 + + err = util.WaitForCondition(ctx, 10*time.Second, 100*time.Millisecond, func() (bool, error) { + execInspect, err := p.dockerClient.ContainerExecInspect(ctx, exec.ID) + if err != nil { + return false, err + } + + if execInspect.Running { + return false, nil + } + + lastExitCode = execInspect.ExitCode + + return true, nil + }) + if err != nil { - return "", "", 0, err + p.logger.Error("failed to wait for exec", zap.Error(err), zap.String("id", id)) + return "", "", lastExitCode, err } var stdout, stderr bytes.Buffer _, err = stdcopy.StdCopy(&stdout, &stderr, resp.Reader) +<<<<<<< HEAD +======= + if err != nil { + return "", "", lastExitCode, err + } +>>>>>>> 4a67052 (fix(provider): improve recognition of exit codes) - return stdout.String(), stderr.String(), execInspect.ExitCode, nil + return stdout.String(), stderr.String(), lastExitCode, nil } func (p *Provider) RunCommandWhileStopped(ctx context.Context, id string, definition provider.TaskDefinition, command []string) (string, string, int, error) { diff --git a/cosmos/node/genesis.go b/cosmos/node/genesis.go index 8101ace..65a6fcf 100644 --- a/cosmos/node/genesis.go +++ b/cosmos/node/genesis.go @@ -76,7 +76,11 @@ func (n *Node) AddGenesisAccount(ctx context.Context, address string, genesisAmo n.logger.Debug("add-genesis-account", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) if err != nil { - return err + return fmt.Errorf("failed to add genesis account: %w", err) + } + + if exitCode != 0 { + return fmt.Errorf("failed to add genesis account (exitcode=%d): %s", exitCode, stderr) } return nil @@ -103,11 +107,15 @@ func (n *Node) GenerateGenTx(ctx context.Context, genesisSelfDelegation types.Co stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, command) n.logger.Debug("gentx", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) + if err != nil { + return fmt.Errorf("failed to generate genesis transaction: %w", err) + } + if exitCode != 0 { - return fmt.Errorf("failed to generate genesis transaction: %s (exitcode=%d)", stderr, exitCode) + return fmt.Errorf("failed to generate genesis transaction (exitcode=%d): %s", exitCode, stderr) } - return err + return nil } // CollectGenTxs collects the genesis transactions from the node and create a finalized genesis file @@ -125,7 +133,15 @@ func (n *Node) CollectGenTxs(ctx context.Context) error { stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, n.BinCommand(command...)) n.logger.Debug("collect-gentxs", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) - return err + if err != nil { + return fmt.Errorf("failed to collect genesis transactions: %w", err) + } + + if exitCode != 0 { + return fmt.Errorf("failed to collect genesis transactions (exitcode=%d): %s", exitCode, stderr) + } + + return nil } // OverwriteGenesisFile overwrites the genesis file on the node with the provided genesis file diff --git a/cosmos/node/init.go b/cosmos/node/init.go index 80fde22..37ba662 100644 --- a/cosmos/node/init.go +++ b/cosmos/node/init.go @@ -2,6 +2,11 @@ package node import ( "context" +<<<<<<< HEAD +======= + "fmt" + +>>>>>>> 4a67052 (fix(provider): improve recognition of exit codes) "go.uber.org/zap" ) @@ -13,5 +18,13 @@ func (n *Node) InitHome(ctx context.Context) error { stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, n.BinCommand([]string{"init", n.Definition.Name, "--chain-id", chainConfig.ChainId}...)) n.logger.Debug("init home", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) - return err + if err != nil { + return fmt.Errorf("failed to init home: %w", err) + } + + if exitCode != 0 { + return fmt.Errorf("failed to init home (exit code %d): %s", exitCode, stderr) + } + + return nil } diff --git a/cosmos/node/keys.go b/cosmos/node/keys.go index f132d0a..13dd8bb 100644 --- a/cosmos/node/keys.go +++ b/cosmos/node/keys.go @@ -57,12 +57,16 @@ func (n *Node) KeyBech32(ctx context.Context, name, bech string) (string, error) command = append(command, "--bech", bech) } - stdout, stderr, _, err := n.Task.RunCommand(ctx, command) + stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, command) n.logger.Debug("show key", zap.String("name", name), zap.String("stdout", stdout), zap.String("stderr", stderr)) if err != nil { return "", fmt.Errorf("failed to show key %q (stderr=%q): %w", name, stderr, err) } + if exitCode != 0 { + return "", fmt.Errorf("failed to show key %q (exitcode=%d): %s", name, exitCode, stderr) + } + return util.CleanDockerOutput(stdout), nil }