Skip to content

Commit 87022df

Browse files
authored
fix(pod-replacement): exclude container requirement (#1004)
Pod replacement operates at the pod level and should not require container information. The feature was failing because the injector expected target containers to be specified, but pod replacement works directly with pods identified by their IP address. This fix adds logic to detect when running the pod-replacement command and skips the container validation, creating a single config without container dependencies instead. Jira: CHAOSPLT-1330
1 parent c0d629e commit 87022df

File tree

4 files changed

+70
-24
lines changed

4 files changed

+70
-24
lines changed

api/v1beta1/pod_replacement.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55

66
package v1beta1
77

8-
import "strconv"
8+
import (
9+
"strconv"
10+
11+
chaostypes "github.com/DataDog/chaos-controller/types"
12+
)
913

1014
// PodReplacementSpec represents a pod replacement disruption
1115
type PodReplacementSpec struct {
@@ -27,7 +31,7 @@ func (s *PodReplacementSpec) Validate() error {
2731
// GenerateArgs generates injection or cleanup pod arguments for the given spec
2832
func (s *PodReplacementSpec) GenerateArgs() []string {
2933
args := []string{
30-
"pod-replacement",
34+
chaostypes.DisruptionKindPodReplacement,
3135
"inject",
3236
}
3337

cli/injector/main.go

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,16 @@ func initConfig() {
222222
// assign to the pointer to level the new value to persist it after this method
223223
disruptionArgs.Level = chaostypes.DisruptionLevel(disruptionLevelRaw)
224224

225+
// check if we're running pod-replacement command which doesn't need containers
226+
isPodReplacement := false
227+
228+
for _, arg := range os.Args {
229+
if arg == chaostypes.DisruptionKindPodReplacement {
230+
isPodReplacement = true
231+
break
232+
}
233+
}
234+
225235
switch disruptionArgs.Level {
226236
case chaostypes.DisruptionLevelPod:
227237
// check for container ID flag
@@ -231,33 +241,39 @@ func initConfig() {
231241
return
232242
}
233243

234-
for containerName, containerID := range disruptionArgs.TargetContainers {
235-
// retrieve container info
236-
ctn, err := container.New(containerID, containerName)
237-
if err != nil {
238-
log.Fatalw("can't create container object", tags.ErrorKey, err)
244+
if !isPodReplacement {
245+
// Pod replacement operates at the pod level and doesn't need container information
246+
for containerName, containerID := range disruptionArgs.TargetContainers {
247+
// retrieve container info
248+
ctn, err := container.New(containerID, containerName)
249+
if err != nil {
250+
log.Fatalw("can't create container object", tags.ErrorKey, err)
239251

240-
return
241-
}
252+
return
253+
}
242254

243-
log.Infow("injector targeting container", tags.ContainerIDKey, containerID, tags.ContainerNameKey, containerName)
255+
log.Infow("injector targeting container", tags.ContainerIDKey, containerID, tags.ContainerNameKey, containerName)
244256

245-
pid := ctn.PID()
257+
pid := ctn.PID()
246258

247-
// keep pid for later if this is a chaos handler container
248-
if disruptionArgs.OnInit && ctn.Name() == chaosInitContName {
249-
handlerPID = pid
250-
}
259+
// keep pid for later if this is a chaos handler container
260+
if disruptionArgs.OnInit && ctn.Name() == chaosInitContName {
261+
handlerPID = pid
262+
}
251263

252-
ctns = append(ctns, ctn)
253-
pids = append(pids, pid)
254-
}
264+
ctns = append(ctns, ctn)
265+
pids = append(pids, pid)
266+
}
267+
} else {
268+
// check for pod IP flag
269+
if disruptionArgs.TargetPodIP == "" {
270+
log.Fatal("--target-pod-ip flag must be passed when --level=pod")
255271

256-
// check for pod IP flag
257-
if disruptionArgs.TargetPodIP == "" {
258-
log.Fatal("--target-pod-ip flag must be passed when --level=pod")
272+
return
273+
}
259274

260-
return
275+
pids = []uint32{1}
276+
ctns = []container.Container{nil}
261277
}
262278
case chaostypes.DisruptionLevelNode:
263279
pids = []uint32{1}

cli/injector/pod_replacement.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@ package main
88
import (
99
"strconv"
1010

11+
"github.com/spf13/cobra"
12+
1113
"github.com/DataDog/chaos-controller/api/v1beta1"
1214
"github.com/DataDog/chaos-controller/injector"
1315
"github.com/DataDog/chaos-controller/o11y/tags"
14-
"github.com/spf13/cobra"
16+
chaostypes "github.com/DataDog/chaos-controller/types"
1517
)
1618

1719
var podReplacementCmd = &cobra.Command{
18-
Use: "pod-replacement",
20+
Use: chaostypes.DisruptionKindPodReplacement,
1921
Short: "Pod replacement subcommands",
2022
Run: injectAndWait,
2123
PreRun: func(cmd *cobra.Command, args []string) {

examples/demo.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,30 @@ spec:
217217
requests:
218218
memory: 16Mi
219219
cpu: 10m
220+
## Create a second container to ensure the pod replace is working as expected
221+
- name: storage-monitor-2
222+
image: alpine:latest
223+
command: ["/bin/sh"]
224+
args:
225+
- -c
226+
- |
227+
echo "Storage monitoring container started"
228+
# Create a test file to demonstrate persistence
229+
echo "demo-storage-$(date)" > /mnt/shared/storage-info.txt
230+
# Keep container running and periodically update the file
231+
while true; do
232+
echo "$(date): Storage is available" >> /mnt/shared/heartbeat.log
233+
sleep 30
234+
done
235+
volumeMounts:
236+
- mountPath: /mnt/shared
237+
name: shared-storage
238+
resources:
239+
limits:
240+
memory: 16Mi
241+
cpu: 10m
242+
requests:
243+
memory: 16Mi
220244
volumeClaimTemplates:
221245
- metadata:
222246
name: shared-storage

0 commit comments

Comments
 (0)