Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions chart/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,21 @@ data:
{{- end }}
serviceAccount: {{ .Values.injector.serviceAccount | quote }}
chaosNamespace: {{ .Values.chaosNamespace | quote }}
{{- if .Values.injector.networkDisruption.allowedHosts }}
networkDisruption:
hostResolveInterval: {{ .Values.injector.networkDisruption.hostResolveInterval | quote }}
{{- if .Values.injector.networkDisruption.allowedHosts }}
allowedHosts:
{{- range $index, $allowedHost := .Values.injector.networkDisruption.allowedHosts }}
{{ $v := printf "%s;%v;%s;%s" ($allowedHost.host | default "") ($allowedHost.port | default "") ($allowedHost.protocol | default "") ($allowedHost.flow | default "") -}}
- {{ tpl $v $ }}
{{- end }}
{{- end }}
{{- end }}
{{- if .Values.injector.networkDisruption.dnsPodResolvConf }}
dnsPodResolvConf: {{ .Values.injector.networkDisruption.dnsPodResolvConf | quote }}
{{- end }}
{{- if .Values.injector.networkDisruption.dnsNodeResolvConf }}
dnsNodeResolvConf: {{ .Values.injector.networkDisruption.dnsNodeResolvConf | quote }}
{{- end }}
handler:
image: {{ template "chaos-controller.format-image" deepCopy .Values.global.chaos.defaultImage | merge .Values.global.oci | merge .Values.handler.image }}
enabled: {{ .Values.handler.enabled }}
Expand Down
2 changes: 2 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ injector:
# port: 81
# protocol: tcp
# flow: ingress
# dnsPodResolvConf: "/etc/resolv.conf" # (optional) path for pod DNS resolv.conf
# dnsNodeResolvConf: "/mnt/host/etc/resolv.conf" # (optional) path for node DNS resolv.conf
Comment on lines +135 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The indentation looks a little off?

handler:
image:
repo: chaos-handler
Expand Down
11 changes: 10 additions & 1 deletion cli/injector/network_disruption.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ var networkDisruptionCmd = &cobra.Command{
hostResolveInterval, _ := cmd.Flags().GetDuration("host-resolve-interval")
methods, _ := cmd.Flags().GetStringArray("method")
paths, _ := cmd.Flags().GetStringArray("path")
dnsPodResolvConf, _ := cmd.Flags().GetString("dns-pod-resolv-conf")
dnsNodeResolvConf, _ := cmd.Flags().GetString("dns-node-resolv-conf")

// prepare injectors
for i, config := range configs {
Expand Down Expand Up @@ -80,7 +82,12 @@ var networkDisruptionCmd = &cobra.Command{
}

// generate injector
inj, err := injector.NewNetworkDisruptionInjector(spec, injector.NetworkDisruptionInjectorConfig{Config: config, HostResolveInterval: hostResolveInterval})
inj, err := injector.NewNetworkDisruptionInjector(spec, injector.NetworkDisruptionInjectorConfig{
Config: config,
HostResolveInterval: hostResolveInterval,
DNSPodResolvConf: dnsPodResolvConf,
DNSNodeResolvConf: dnsNodeResolvConf,
})
if err != nil {
log.Fatalw("error initializing the network disruption injector: %w", err)
}
Expand All @@ -103,4 +110,6 @@ func init() {
networkDisruptionCmd.Flags().Duration("host-resolve-interval", time.Minute, "Interval to resolve hostnames")
networkDisruptionCmd.Flags().StringArray("method", []string{}, "Filter by http method: GET, DELETE, POST, CREATE, PUT, HEAD, PATCH, CONNECT, OPTIONS or TRACE")
networkDisruptionCmd.Flags().StringArray("path", []string{v1beta1.DefaultHTTPPathFilter}, "Filter by path and must not exceed 100 characters")
networkDisruptionCmd.Flags().String("dns-pod-resolv-conf", "", "Path to pod DNS resolv.conf file (defaults to /etc/resolv.conf)")
networkDisruptionCmd.Flags().String("dns-node-resolv-conf", "", "Path to node DNS resolv.conf file (defaults to /mnt/host/etc/resolv.conf)")
}
14 changes: 14 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ type Toleration struct {
type injectorNetworkDisruptionConfig struct {
AllowedHosts []string `json:"allowedHosts" yaml:"allowedHosts"`
HostResolveInterval time.Duration `json:"hostResolveInterval" yaml:"hostResolveInterval"`
DNSPodResolvConf string `json:"dnsPodResolvConf" yaml:"dnsPodResolvConf"`
DNSNodeResolvConf string `json:"dnsNodeResolvConf" yaml:"dnsNodeResolvConf"`
}

type handlerConfig struct {
Expand Down Expand Up @@ -338,6 +340,18 @@ func New(client corev1client.ConfigMapInterface, logger *zap.SugaredLogger, osAr
return cfg, err
}

mainFS.StringVar(&cfg.Injector.NetworkDisruption.DNSPodResolvConf, "injector-network-disruption-dns-pod-resolv-conf", "/etc/resolv.conf", "Path to pod DNS resolv.conf file")

if err := viper.BindPFlag("injector.networkDisruption.dnsPodResolvConf", mainFS.Lookup("injector-network-disruption-dns-pod-resolv-conf")); err != nil {
return cfg, err
}

mainFS.StringVar(&cfg.Injector.NetworkDisruption.DNSNodeResolvConf, "injector-network-disruption-dns-node-resolv-conf", "/mnt/host/etc/resolv.conf", "Path to node DNS resolv.conf file")

if err := viper.BindPFlag("injector.networkDisruption.dnsNodeResolvConf", mainFS.Lookup("injector-network-disruption-dns-node-resolv-conf")); err != nil {
return cfg, err
}

mainFS.BoolVar(&cfg.Handler.Enabled, "handler-enabled", false, "Enable the chaos handler for on-init disruptions")

if err := viper.BindPFlag("handler.enabled", mainFS.Lookup("handler-enabled")); err != nil {
Expand Down
77 changes: 77 additions & 0 deletions docs/network_disruption/hosts-and-services.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,12 @@ apiVersion: chaos.datadoghq.com/v1beta1
kind: Disruption
metadata:
name: network-disruption-istio
namespace: chaos-demo
spec:
level: pod
selector:
app: my-service
count: 1
network:
drop: 50
hosts:
Expand Down Expand Up @@ -322,6 +324,81 @@ network:
- **`pod-fallback-node`** (default): Use when you want resilience - try pod DNS first but fall back to node DNS if it fails
- **`node-fallback-pod`**: Use when node DNS is preferred but you want pod DNS as a backup

#### Customizing resolv.conf Paths

By default, the chaos-controller uses these locations for resolv.conf files:

- **Pod DNS**: `/etc/resolv.conf`
- **Node DNS**: `/mnt/host/etc/resolv.conf`

These defaults are set in the controller configuration. Some Kubernetes distributions or node configurations may use different locations for resolv.conf. You can override the defaults using Helm values:

**Helm Configuration:**

Override resolv.conf paths in your Helm values:

```yaml
injector:
networkDisruption:
# Path for pod DNS resolv.conf
# Default: /etc/resolv.conf
dnsPodResolvConf: "/run/systemd/resolve/resolv.conf"

# Path for node DNS resolv.conf
# Default: /mnt/host/etc/resolv.conf
dnsNodeResolvConf: "/mnt/host/run/systemd/resolve/stub-resolv.conf"
```

**Behavior:**
- Defaults are defined in the controller configuration (can be overridden via ConfigMap, environment variables, or CLI flags)
- The specified resolv.conf file must exist and be readable
- Configuration is passed to injector pods as command-line arguments
- Logging will indicate which resolv.conf file was loaded

**Example: Full Helm configuration for systemd-resolved nodes**

```yaml
# values.yaml
injector:
networkDisruption:
hostResolveInterval: 1m

# For nodes using systemd-resolved (Ubuntu, Debian, etc.)
dnsNodeResolvConf: "/mnt/host/run/systemd/resolve/stub-resolv.conf"

# For pods with custom DNS configuration
dnsPodResolvConf: "/run/systemd/resolve/resolv.conf"
```

**How it works:**

When you deploy with these Helm values:
1. The controller reads the configuration from the ConfigMap
2. For each network disruption, the controller creates an injector pod with CLI arguments:
```bash
/chaos-injector network-disruption \
--dns-pod-resolv-conf /run/systemd/resolve/resolv.conf \
--dns-node-resolv-conf /mnt/host/run/systemd/resolve/stub-resolv.conf \
...
```
3. The injector uses these paths for DNS resolution
4. Logs will show which resolv.conf files were loaded:
```
INFO loaded pod DNS configuration resolv_conf_path=/run/systemd/resolve/resolv.conf nameservers=[8.8.8.8, 8.8.4.4]
INFO loaded node DNS configuration resolv_conf_path=/mnt/host/run/systemd/resolve/stub-resolv.conf nameservers=[10.0.0.1]
```

You can verify the paths used by an injector pod:
```bash
kubectl describe pod chaos-injector-xxxxx -n chaos-engineering | grep dns-
```

**Use cases:**
- **systemd-resolved**: Nodes using systemd-resolved may have resolv.conf at `/run/systemd/resolve/resolv.conf` or `/run/systemd/resolve/stub-resolv.conf`
- **NetworkManager**: Some distributions use `/run/NetworkManager/resolv.conf`
- **Custom Kubernetes distributions**: Distributions like k3s, microk8s, or OpenShift may use non-standard paths
- **Custom DNS configurations**: Environments with custom DNS setups that require specific resolv.conf locations

### Some special cases

Cluster IPs can also be specified to target the relevant pods.
Expand Down
7 changes: 2 additions & 5 deletions injector/ipresolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,8 @@ func resolveHost(client network.DNSClient, host string, dnsStrategy string) ([]*
// if no IP has been parsed, fallback on a hostname
// and try to resolve it by using the container resolv.conf file
var resolvedIPs []net.IP
if dnsStrategy != "" {
resolvedIPs, err = client.ResolveWithStrategy(host, dnsStrategy)
} else {
resolvedIPs, err = client.Resolve(host)
}

resolvedIPs, err = client.ResolveWithStrategy(host, dnsStrategy)

if err != nil {
return nil, fmt.Errorf("can't resolve the given host with the configured dns resolver: %w", err)
Expand Down
10 changes: 9 additions & 1 deletion injector/network_disruption.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ type NetworkDisruptionInjectorConfig struct {
DNSClient network.DNSClient
HostResolveInterval time.Duration
BPFConfigInformer ebpf.ConfigInformer
DNSPodResolvConf string
DNSNodeResolvConf string
}

// tcServiceFilter describes a tc filter, representing the service filtered and its priority
Expand Down Expand Up @@ -144,7 +146,13 @@ func NewNetworkDisruptionInjector(spec v1beta1.NetworkDisruptionSpec, config Net
}

if config.DNSClient == nil {
config.DNSClient = network.NewDNSClient()
// Create DNS client with custom resolv.conf paths if provided
dnsConfig := network.DNSClientConfig{
PodResolvConfPath: config.DNSPodResolvConf,
NodeResolvConfPath: config.DNSNodeResolvConf,
Logger: config.Log,
}
config.DNSClient = network.NewDNSClient(dnsConfig)
}

if spec.HasHTTPFilters() && config.BPFConfigInformer == nil {
Expand Down
26 changes: 14 additions & 12 deletions injector/network_disruption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ import (
"strings"
"time"

"github.com/stretchr/testify/mock"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/watch"
kubernetes "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/testing"

"github.com/DataDog/chaos-controller/api"
"github.com/DataDog/chaos-controller/api/v1beta1"
"github.com/DataDog/chaos-controller/cgroup"
Expand All @@ -24,15 +32,9 @@ import (
"github.com/DataDog/chaos-controller/netns"
"github.com/DataDog/chaos-controller/network"
chaostypes "github.com/DataDog/chaos-controller/types"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/mock"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/watch"
kubernetes "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/testing"
)

const (
Expand Down Expand Up @@ -143,7 +145,7 @@ var _ = Describe("Failure", func() {

// dns
dns = network.NewDNSClientMock(GinkgoT())
dns.EXPECT().Resolve("kubernetes.default").Return([]net.IP{net.ParseIP("192.168.0.254")}, nil).Maybe()
dns.EXPECT().ResolveWithStrategy("kubernetes", "default").Return([]net.IP{net.ParseIP("192.168.0.254")}, nil).Maybe()

// container
ctn = container.NewContainerMock(GinkgoT())
Expand Down Expand Up @@ -398,7 +400,7 @@ var _ = Describe("Failure", func() {
},
}

dns.EXPECT().Resolve("testhost").Return([]net.IP{net.ParseIP(testHostIP), net.ParseIP(testHostIPTwo)}, nil).Once()
dns.EXPECT().ResolveWithStrategy("testhost", "").Return([]net.IP{net.ParseIP(testHostIP), net.ParseIP(testHostIPTwo)}, nil).Once()
})

It("should not raise an error", func() {
Expand All @@ -409,7 +411,7 @@ var _ = Describe("Failure", func() {
tc.AssertCalled(GinkgoT(), "AddFilter", []string{"lo", "eth0", "eth1"}, "1:0", "", nilIPNet, buildSingleIPNet(testHostIP), 0, 80, network.TCP, network.ConnStateUndefined, "1:4")
tc.AssertCalled(GinkgoT(), "AddFilter", []string{"lo", "eth0", "eth1"}, "1:0", "", nilIPNet, buildSingleIPNet(testHostIPTwo), 0, 80, network.TCP, network.ConnStateUndefined, "1:4")

dns.EXPECT().Resolve("testhost").Return([]net.IP{net.ParseIP(testHostIPTwo), net.ParseIP(testHostIPThree)}, nil).Maybe()
dns.EXPECT().ResolveWithStrategy("testhost", "").Return([]net.IP{net.ParseIP(testHostIPTwo), net.ParseIP(testHostIPThree)}, nil).Maybe()
time.Sleep(time.Second) // Wait for changed IPs to be caught by the hostWatcher

tc.AssertCalled(GinkgoT(), "DeleteFilter", "lo", uint32(0))
Expand All @@ -422,7 +424,7 @@ var _ = Describe("Failure", func() {
tc.AssertCalled(GinkgoT(), "AddFilter", []string{"lo", "eth0", "eth1"}, "1:0", "", nilIPNet, buildSingleIPNet(testHostIP), 0, 80, network.TCP, network.ConnStateUndefined, "1:4")
tc.AssertCalled(GinkgoT(), "AddFilter", []string{"lo", "eth0", "eth1"}, "1:0", "", nilIPNet, buildSingleIPNet(testHostIPTwo), 0, 80, network.TCP, network.ConnStateUndefined, "1:4")

dns.EXPECT().Resolve("testhost").Return([]net.IP{net.ParseIP(testHostIP), net.ParseIP(testHostIPTwo), net.ParseIP(testHostIPThree)}, nil).Maybe()
dns.EXPECT().ResolveWithStrategy("testhost", "").Return([]net.IP{net.ParseIP(testHostIP), net.ParseIP(testHostIPTwo), net.ParseIP(testHostIPThree)}, nil).Maybe()
time.Sleep(time.Second) // Wait for changed IPs to be caught by the hostWatcher

tc.AssertNotCalled(GinkgoT(), "DeleteFilter")
Expand All @@ -433,7 +435,7 @@ var _ = Describe("Failure", func() {
tc.AssertCalled(GinkgoT(), "AddFilter", []string{"lo", "eth0", "eth1"}, "1:0", "", nilIPNet, buildSingleIPNet(testHostIP), 0, 80, network.TCP, network.ConnStateUndefined, "1:4")
tc.AssertCalled(GinkgoT(), "AddFilter", []string{"lo", "eth0", "eth1"}, "1:0", "", nilIPNet, buildSingleIPNet(testHostIPTwo), 0, 80, network.TCP, network.ConnStateUndefined, "1:4")

dns.EXPECT().Resolve("testhost").Return([]net.IP{net.ParseIP(testHostIP)}, nil).Maybe()
dns.EXPECT().ResolveWithStrategy("testhost", "").Return([]net.IP{net.ParseIP(testHostIP)}, nil).Maybe()
time.Sleep(time.Second) // Wait for changed IPs to be caught by the hostWatcher

tc.AssertCalled(GinkgoT(), "DeleteFilter", "lo", uint32(0))
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ func main() {
Labels: cfg.Injector.Labels,
Tolerations: cfg.Injector.Tolerations,
NetworkDisruptionAllowedHosts: cfg.Injector.NetworkDisruption.AllowedHosts,
DNSPodResolvConf: cfg.Injector.NetworkDisruption.DNSPodResolvConf,
DNSNodeResolvConf: cfg.Injector.NetworkDisruption.DNSNodeResolvConf,
ImagePullSecrets: cfg.Injector.ImagePullSecrets,
LogLevel: cfg.Injector.LogLevel,
},
Expand Down
Loading