diff --git a/charts/terway/templates/clusterrole.yaml b/charts/terway/templates/clusterrole.yaml index 9a80759c..5726e805 100644 --- a/charts/terway/templates/clusterrole.yaml +++ b/charts/terway/templates/clusterrole.yaml @@ -98,11 +98,5 @@ rules: - network.alibabacloud.com resources: - '*' - verbs: - - '*' - - apiGroups: - - alibabacloud.com - resources: - - '*' verbs: - '*' \ No newline at end of file diff --git a/charts/terway/templates/daemonset.yaml b/charts/terway/templates/daemonset.yaml index a0eb6a89..35304bf3 100644 --- a/charts/terway/templates/daemonset.yaml +++ b/charts/terway/templates/daemonset.yaml @@ -1,59 +1,4 @@ --- -{{if .Values.enableIPvlan }} -apiVersion: batch/v1 -kind: Job -metadata: - name: terway-preflight -spec: - ttlSecondsAfterFinished: 300 - backoffLimit: 10 - template: - spec: - hostNetwork: true - priorityClassName: system-node-critical - tolerations: - - operator: "Exists" - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: type - operator: NotIn - values: - - virtual-kubelet - - key: kubernetes.io/arch - operator: In - values: - - amd64 - - arm64 - - key: kubernetes.io/os - operator: In - values: - - linux - containers: - - name: preflight - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" - command: - - sh - - "-ce" - - "cilium preflight register-crd" - volumeMounts: - - name: configvolume - mountPath: /etc/eni - restartPolicy: OnFailure - serviceAccountName: terway - volumes: - - name: configvolume - configMap: - name: eni-config - items: - - key: eni_conf - path: eni.json - - key: 10-terway.conf - path: 10-terway.conf -{{end}} ---- apiVersion: apps/v1 kind: DaemonSet @@ -106,40 +51,43 @@ spec: command: - /bin/init.sh env: - - name: TERWAY_DAEMON_MODE - value: "{{.Values.daemonMode}}" - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DISABLE_POLICY - valueFrom: - configMapKeyRef: - name: eni-config - key: disable_network_policy - optional: true + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DISABLE_POLICY + valueFrom: + configMapKeyRef: + name: eni-config + key: disable_network_policy + optional: true volumeMounts: - - name: config - mountPath: /etc/eni - - mountPath: /var-run-eni - name: var-run-eni - - name: configvolume - mountPath: /tmp/eni - - name: cni-bin - mountPath: /opt/cni/bin/ - - name: cni - mountPath: /etc/cni/net.d/ - - mountPath: /lib/modules - name: lib-modules - - mountPath: /host - name: host-root - - mountPath: /var/run/ - name: eni-run + - name: eni-config + mountPath: /etc/eni + - mountPath: /var-run-eni + name: var-run-eni + - name: cni-bin + mountPath: /opt/cni/bin/ + - name: cni-config-project + mountPath: /etc/cni/net.d/ + - mountPath: /lib/modules + name: lib-modules + - mountPath: /host + name: host-root + - mountPath: /var/run/ + name: eni-run containers: - name: terway image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: IfNotPresent - command: [ "/usr/bin/terwayd", "-log-level", "info", "-daemon-mode", "{{.Values.daemonMode}}" ] + command: + - "/usr/bin/terwayd" + - "-log-level" + - "info" + - "-daemon-mode" + - "{{.Values.daemonMode}}" + - "-config" + - "/etc/eni/eni_conf" securityContext: capabilities: add: @@ -164,27 +112,25 @@ spec: fieldRef: fieldPath: metadata.namespace volumeMounts: - - name: config - mountPath: /etc/eni - readOnly: true - - mountPath: /var/run/ - name: eni-run - - mountPath: /lib/modules - name: lib-modules - - mountPath: /var/lib/cni/networks - name: cni-networks - - mountPath: /var/lib/cni/terway - name: cni-terway - - mountPath: /etc/cni/net.d - name: cni - readOnly: true - - mountPath: /host-etc-net.d - name: host-cni - - mountPath: /var/lib/kubelet/device-plugins - name: device-plugin-path - - name: addon-token - mountPath: "/var/addon" - readOnly: true + - name: configvolume + mountPath: /etc/eni + readOnly: true + - mountPath: /var/run/ + name: eni-run + - mountPath: /lib/modules + name: lib-modules + - mountPath: /var/lib/cni/terway + name: cni-terway + - mountPath: /etc/cni/net.d + name: cni-config-project + readOnly: true + - mountPath: /host-etc-net.d + name: cni-config + - mountPath: /var/lib/kubelet/device-plugins + name: device-plugin-path + - name: addon-token + mountPath: "/var/addon" + readOnly: true - name: policy image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: IfNotPresent @@ -218,12 +164,6 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CNI_CHAINING_MODE value: terway-chainer - - name: IN_CLUSTER_LOADBALANCE - valueFrom: - configMapKeyRef: - name: eni-config - key: in_cluster_loadbalance - optional: true securityContext: privileged: false capabilities: @@ -233,7 +173,7 @@ spec: - SYS_ADMIN - NET_RAW - SYS_MODULE -{{if .Values.enableIPvlan }} +{{- if .Values.enableIPvlan }} - CHOWN - KILL - IPC_LOCK @@ -263,12 +203,15 @@ spec: host: 127.0.0.1 periodSeconds: 10 volumeMounts: + - mountPath: /etc/eni + name: configvolume + readOnly: true - mountPath: /var-run-eni name: var-run-eni - mountPath: /lib/modules name: lib-modules - mountPath: /etc/cni/net.d - name: cni + name: cni-config-project readOnly: true # volumes use by cilium - mountPath: /sys/fs @@ -279,71 +222,62 @@ spec: - mountPath: /run/xtables.lock name: xtables-lock volumes: - - name: config - emptyDir: {} - - name: var-run-eni - emptyDir: { } - - name: configvolume - configMap: - name: eni-config - items: null - - name: cni-bin - hostPath: - path: /opt/cni/bin - type: "Directory" - - name: host-cni - hostPath: - path: /etc/cni/net.d - - name: cni - emptyDir: {} - - name: eni-run - hostPath: - path: /var/run/ - type: "Directory" - - name: lib-modules - hostPath: - path: /lib/modules - - name: cni-networks - hostPath: - path: /var/lib/cni/networks - - name: cni-terway - hostPath: - path: /var/lib/cni/terway - - name: device-plugin-path - hostPath: - path: /var/lib/kubelet/device-plugins - type: "Directory" - - name: host-root - hostPath: - path: / - type: "Directory" - - name: addon-token - secret: - secretName: addon.network.token - items: - - key: addon.token.config - path: token-config - optional: true - - name: alibaba-addon-secret - secret: - secretName: alibaba-addon-secret - optional: true - # used by cilium - # To keep state between restarts / upgrades - - hostPath: - path: /var/run/cilium - type: DirectoryOrCreate - name: cilium-run - # To keep state between restarts / upgrades for bpf maps - - hostPath: - path: /sys/fs/ - type: DirectoryOrCreate - name: sys-fs - # To access iptables concurrently with other processes (e.g. kube-proxy) - - hostPath: - path: /run/xtables.lock - type: FileOrCreate - name: xtables-lock + - name: var-run-eni + emptyDir: { } + - name: configvolume + configMap: + name: eni-config + items: null + - name: cni-bin + hostPath: + path: /opt/cni/bin + type: "Directory" + - name: cni-config + hostPath: + path: /etc/cni/net.d + - name: cni-config-project + emptyDir: { } + - name: eni-run + hostPath: + path: /var/run/ + type: "Directory" + - name: lib-modules + hostPath: + path: /lib/modules + - name: cni-terway + hostPath: + path: /var/lib/cni/terway + - name: device-plugin-path + hostPath: + path: /var/lib/kubelet/device-plugins + type: "Directory" + - name: host-root + hostPath: + path: / + type: "Directory" + - name: addon-token + secret: + secretName: addon.network.token + items: + - key: addon.token.config + path: token-config + optional: true + # used by cilium + # To keep state between restarts / upgrades + - hostPath: + path: /var/run/cilium + type: DirectoryOrCreate + name: cilium-run + # To keep state between restarts / upgrades for bpf maps + - hostPath: + path: /sys/fs/ + type: DirectoryOrCreate + name: sys-fs + # To access iptables concurrently with other processes (e.g. kube-proxy) + - hostPath: + path: /run/xtables.lock + type: FileOrCreate + name: xtables-lock --- diff --git a/cmd/terway-cli/cni.go b/cmd/terway-cli/cni.go index bc2806d2..a29f0df3 100644 --- a/cmd/terway-cli/cni.go +++ b/cmd/terway-cli/cni.go @@ -24,11 +24,10 @@ type switchDataPathV2Func func() bool var _switchDataPathV2 switchDataPathV2Func const ( - dataPathDefault = "" - dataPathVeth = "veth" - dataPathIPvlan = "ipvlan" - dataPathV2 = "datapathv2" - nodeCapabilityDatapath = "datapath" + dataPathDefault = "" + dataPathVeth = "veth" + dataPathIPvlan = "ipvlan" + dataPathV2 = "datapathv2" ) const ( @@ -102,7 +101,7 @@ func processCNIConfig(cmd *cobra.Command, args []string) error { } } - return nil + return storeRuntimeConfig(nodeCapabilitiesFile, cniJSON) } func processInput(files []string) error { @@ -192,7 +191,7 @@ func mergeConfigList(configs [][]byte, f *feature) (string, error) { } switch pluginType { - case "cilium-cni": + case pluginTypeCilium: // make sure cilium-cni is behind terway if !ebpfSupport { continue @@ -205,7 +204,7 @@ func mergeConfigList(configs [][]byte, f *feature) (string, error) { return "", err } - case "terway": + case pluginTypeTerway: if plugin.Exists("network_policy_provider") { networkPolicyProvider, ok = plugin.Path("network_policy_provider").Data().(string) if !ok { @@ -227,10 +226,11 @@ func mergeConfigList(configs [][]byte, f *feature) (string, error) { switch strings.ToLower(virtualType) { case dataPathVeth, dataPathDefault: + datapath = dataPathVeth + // only for terway-eniip if ebpfSupport && networkPolicyProvider == NetworkPolicyProviderEBPF { requireEBPFChainer = true - datapath = dataPathVeth } case dataPathIPvlan: requireIPvlan = true @@ -242,7 +242,7 @@ func mergeConfigList(configs [][]byte, f *feature) (string, error) { if requireIPvlan && !_switchDataPathV2() { fmt.Printf("keep ipvlan mode %v %v\n", requireIPvlan, !_switchDataPathV2()) - _, err = plugin.Set("IPVlan", "eniip_virtual_type") + _, err = plugin.Set(dataPathIPvlan, "eniip_virtual_type") if err != nil { return "", err } @@ -254,11 +254,6 @@ func mergeConfigList(configs [][]byte, f *feature) (string, error) { } datapath = dataPathV2 - - err = nodecap.WriteNodeCapabilities(nodeCapabilityDatapath, dataPathV2) - if err != nil { - return "", err - } } if edtSupport { @@ -272,9 +267,9 @@ func mergeConfigList(configs [][]byte, f *feature) (string, error) { } } } else { + datapath = dataPathVeth if ebpfSupport && networkPolicyProvider == NetworkPolicyProviderEBPF { requireEBPFChainer = true - datapath = dataPathVeth } } } @@ -312,3 +307,28 @@ func mountHostBpf() error { _, _ = fmt.Fprint(os.Stdout, string(out)) return nil } + +func storeRuntimeConfig(filePath string, container *gabs.Container) error { + store := nodecap.NewFileNodeCapabilities(filePath) + err := store.Load() + if err != nil { + return err + } + + // write back current runtime config + for _, plugin := range container.Path("plugins").Children() { + if plugin.Path("type").Data().(string) != pluginTypeTerway { + continue + } + if plugin.Exists("network_policy_provider") { + networkPolicyProvider := plugin.Path("network_policy_provider").Data().(string) + store.Set(nodecap.NodeCapabilityNetworkPolicyProvider, networkPolicyProvider) + } + if plugin.Exists("eniip_virtual_type") { + datapath := plugin.Path("eniip_virtual_type").Data().(string) + store.Set(nodecap.NodeCapabilityDataPath, datapath) + } + } + + return store.Save() +} diff --git a/cmd/terway-cli/cni_linux.go b/cmd/terway-cli/cni_linux.go index 7a485292..ab07dd9a 100644 --- a/cmd/terway-cli/cni_linux.go +++ b/cmd/terway-cli/cni_linux.go @@ -17,7 +17,7 @@ func switchDataPathV2() bool { return false } - prevDatapath := nodecap.GetNodeCapabilities(nodeCapabilityDatapath) + prevDatapath := nodecap.GetNodeCapabilities(nodecap.NodeCapabilityDataPath) if prevDatapath == dataPathV2 { fmt.Println("datapath is already v2") return true diff --git a/cmd/terway-cli/common.go b/cmd/terway-cli/common.go new file mode 100644 index 00000000..3cce1978 --- /dev/null +++ b/cmd/terway-cli/common.go @@ -0,0 +1,79 @@ +package main + +import ( + "os" + "path/filepath" +) + +const ( + pluginTypeTerway = "terway" + pluginTypeCilium = "cilium-cni" +) +const eniConfBasePath = "/etc/eni" + +type TerwayConfig struct { + enableNetworkPolicy bool + enableInClusterLB bool + + eniConfig []byte + cniConfig []byte + cniConfigList []byte +} + +// getAllConfig ready terway configmap mounted on path +func getAllConfig(base string) (*TerwayConfig, error) { + cfg := &TerwayConfig{ + enableNetworkPolicy: true, + } + + r, err := os.ReadFile(filepath.Join(base, "10-terway.conf")) + if err != nil { + // this file must exist + return nil, err + } + + cfg.cniConfig = r + + r, err = os.ReadFile(filepath.Join(base, "10-terway.conflist")) + if err != nil { + if !os.IsNotExist(err) { + return nil, err + } + } else { + cfg.cniConfigList = r + } + + r, err = os.ReadFile(filepath.Join(base, "disable_network_policy")) + if err != nil { + if !os.IsNotExist(err) { + return nil, err + } + // default enable policy + } else { + switch string(r) { + case "false", "0", "": + cfg.enableNetworkPolicy = true + default: + cfg.enableNetworkPolicy = false + } + } + + r, err = os.ReadFile(filepath.Join(base, "eni_conf")) + if err != nil { + // this file must exist + return nil, err + } + cfg.eniConfig = r + + r, err = os.ReadFile(filepath.Join(base, "in_cluster_loadbalance")) + if err != nil { + if !os.IsNotExist(err) { + return nil, err + } + } + if string(r) == "true" { + cfg.enableInClusterLB = true + } + + return cfg, nil +} diff --git a/cmd/terway-cli/common_test.go b/cmd/terway-cli/common_test.go new file mode 100644 index 00000000..6ee9da36 --- /dev/null +++ b/cmd/terway-cli/common_test.go @@ -0,0 +1,135 @@ +package main + +import ( + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReadyENIConfig(t *testing.T) { + testCases := []struct { + name string + files map[string]string + expectedConfig *TerwayConfig + expectedError error + }{ + { + name: "basic", + files: map[string]string{ + "10-terway.conf": "cni_config", + "eni_conf": "eni_config", + "in_cluster_loadbalance": "true", + }, + expectedConfig: &TerwayConfig{ + enableNetworkPolicy: true, + enableInClusterLB: true, + eniConfig: []byte("eni_config"), + cniConfig: []byte("cni_config"), + }, + }, + { + name: "disable network policy", + files: map[string]string{ + "10-terway.conf": "cni_config", + "eni_conf": "eni_config", + "disable_network_policy": "true", + }, + expectedConfig: &TerwayConfig{ + enableNetworkPolicy: false, + eniConfig: []byte("eni_config"), + cniConfig: []byte("cni_config"), + }, + }, + { + name: "enable network policy with empty file", + files: map[string]string{ + "10-terway.conf": "cni_config", + "eni_conf": "eni_config", + "disable_network_policy": "", + }, + expectedConfig: &TerwayConfig{ + enableNetworkPolicy: true, + eniConfig: []byte("eni_config"), + cniConfig: []byte("cni_config"), + }, + }, + + { + name: "with conflist", + files: map[string]string{ + "10-terway.conf": "cni_config", + "10-terway.conflist": "cni_config_list", + "eni_conf": "eni_config", + }, + expectedConfig: &TerwayConfig{ + enableNetworkPolicy: true, + eniConfig: []byte("eni_config"), + cniConfig: []byte("cni_config"), + cniConfigList: []byte("cni_config_list"), + }, + }, + { + name: "missing 10-terway.conf", + files: map[string]string{ + "eni_conf": "eni_config", + }, + expectedError: &fs.PathError{}, + }, + { + name: "missing eni_conf", + files: map[string]string{ + "10-terway.conf": "cni_config", + }, + expectedError: &fs.PathError{}, + }, + { + name: "error reading disable_network_policy", + files: map[string]string{ + "10-terway.conf": "cni_config", + "eni_conf": "eni_config", + "disable_network_policy": "invalid content", + }, + expectedConfig: &TerwayConfig{ + enableNetworkPolicy: false, // Because any non-false, 0, or empty value disables + eniConfig: []byte("eni_config"), + cniConfig: []byte("cni_config"), + }, + }, + { + name: "error reading in_cluster_loadbalance", + files: map[string]string{ + "10-terway.conf": "cni_config", + "eni_conf": "eni_config", + }, + expectedConfig: &TerwayConfig{ + enableNetworkPolicy: true, + enableInClusterLB: false, + eniConfig: []byte("eni_config"), + cniConfig: []byte("cni_config"), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tempDir := t.TempDir() + for filename, content := range tc.files { + filePath := filepath.Join(tempDir, filename) + err := os.WriteFile(filePath, []byte(content), 0644) + assert.NoError(t, err) + } + + cfg, err := getAllConfig(tempDir) + + if tc.expectedError != nil { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tc.expectedConfig, cfg) + } + }) + } +} diff --git a/cmd/terway-cli/main.go b/cmd/terway-cli/main.go index 22944edb..a7acc9f5 100644 --- a/cmd/terway-cli/main.go +++ b/cmd/terway-cli/main.go @@ -94,7 +94,7 @@ var ( ) func init() { - rootCmd.AddCommand(listCmd, showCmd, mappingCmd, executeCmd, metadataCmd, cniCmd, nodeconfigCmd) + rootCmd.AddCommand(listCmd, showCmd, mappingCmd, executeCmd, metadataCmd, cniCmd, nodeconfigCmd, policyCmd) } func main() { diff --git a/cmd/terway-cli/node.go b/cmd/terway-cli/node.go index 9dd58013..8e1b1dd2 100644 --- a/cmd/terway-cli/node.go +++ b/cmd/terway-cli/node.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/json" "fmt" "os" "time" @@ -75,11 +76,18 @@ var eniCfg *daemon.Config func getENIConfig(cmd *cobra.Command, args []string) error { var err error - eniCfg, err = daemon.GetConfigFromFileWithMerge("/etc/eni/eni.json", nil) + terwayConfig, err := getAllConfig(eniConfBasePath) if err != nil { return err } + cfg := daemon.Config{} + err = json.Unmarshal(terwayConfig.eniConfig, &cfg) + if err != nil { + return err + } + eniCfg = &cfg + fmt.Printf("eni config: %+v\n", eniCfg) return nil } diff --git a/cmd/terway-cli/policy.go b/cmd/terway-cli/policy.go new file mode 100644 index 00000000..0c0d0afd --- /dev/null +++ b/cmd/terway-cli/policy.go @@ -0,0 +1,334 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "strings" + "syscall" + + "github.com/Jeffail/gabs/v2" + "github.com/samber/lo" + "github.com/spf13/cobra" + + "github.com/AliyunContainerService/terway/pkg/utils/nodecap" + "github.com/AliyunContainerService/terway/types" +) + +type PolicyConfig struct { + Datapath string + EnableNetworkPolicy bool + PolicyProvider string + ExclusiveENI bool + HealthCheckPort string + IPv6 bool + InClusterLoadBalance bool +} + +type CNIConfig struct { + HubbleEnabled bool `json:"cilium_enable_hubble,omitempty"` + HubbleMetrics string `json:"cilium_hubble_metrics,omitempty"` + HubbleListenAddress string `json:"cilium_hubble_listen_address,omitempty"` + HubbleMetricServer string `json:"cilium_hubble_metrics_server,omitempty"` + CiliumExtraArgs string `json:"cilium_args,omitempty"` // legacy way. should move to config map +} + +var policyCmd = &cobra.Command{ + Use: "policy", + SilenceUsage: true, + Run: func(cmd *cobra.Command, args []string) { + err := initPolicy(cmd, args) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to init policy: %v\n", err) + os.Exit(1) + } + }, +} + +func getPolicyConfig(capFilePath string) (*PolicyConfig, error) { + cfg := &PolicyConfig{} + + _, err := os.Stat(capFilePath) + if err != nil { + // file must exist + return nil, err + } + + store := nodecap.NewFileNodeCapabilities(capFilePath) + err = store.Load() + if err != nil { + return nil, err + } + + if store.Get(nodecap.NodeCapabilityIPv6) == ("true") { + cfg.IPv6 = true + } + + if store.Get(nodecap.NodeCapabilityExclusiveENI) == string(types.ExclusiveENIOnly) { + cfg.ExclusiveENI = true + } + cfg.Datapath = store.Get(nodecap.NodeCapabilityDataPath) + cfg.PolicyProvider = store.Get(nodecap.NodeCapabilityNetworkPolicyProvider) + + cfg.HealthCheckPort = os.Getenv("FELIX_HEALTHPORT") + if cfg.HealthCheckPort == "" { + cfg.HealthCheckPort = "9099" + } + + cm, err := getAllConfig(eniConfBasePath) + if err != nil { + return nil, err + } + cfg.EnableNetworkPolicy = cm.enableNetworkPolicy + cfg.InClusterLoadBalance = cm.enableInClusterLB + + return cfg, nil +} + +func initPolicy(cmd *cobra.Command, args []string) error { + cfg, err := getPolicyConfig("/var-run-eni/node_capabilities") + if err != nil { + return err + } + if cfg.ExclusiveENI { + return runExclusiveENI(cfg) + } + + switch cfg.Datapath { + case dataPathDefault, dataPathVeth: + if cfg.PolicyProvider != NetworkPolicyProviderEBPF { + if cfg.EnableNetworkPolicy { + return runCalico(cfg) + } + err = cleanUPFelix() + if err != nil { + return err + } + return runSocat(cfg) + } + fmt.Printf("enable ebpf provider, run cilium") + fallthrough + case dataPathIPvlan, dataPathV2: + return runCilium(cfg) + } + + return nil +} + +func runExclusiveENI(cfg *PolicyConfig) error { + err := configENIOnlyMasq("iptables") + if err != nil { + return err + } + if cfg.IPv6 { + err = configENIOnlyMasq("ip6tables") + if err != nil { + return err + } + } + + return runSocat(cfg) +} + +func runCalico(cfg *PolicyConfig) error { + args := []string{ + "calico-felix", + } + env := os.Environ() + env = append(env, + "FELIX_IPTABLESBACKEND=NFT", + "FELIX_LOGSEVERITYSYS=none", + "FELIX_LOGSEVERITYSCREEN=info", + "CALICO_NETWORKING_BACKEND=none", + "CLUSTER_TYPE=k8s,aliyun", + "CALICO_DISABLE_FILE_LOGGING=true", + "FELIX_DATASTORETYPE=kubernetes", + "FELIX_FELIXHOSTNAME="+os.Getenv("NODENAME"), + "FELIX_IPTABLESREFRESHINTERVAL=60", + "FELIX_IPV6SUPPORT=true", + "WAIT_FOR_DATASTORE=true", + "NO_DEFAULT_POOLS=true", + "FELIX_DEFAULTENDPOINTTOHOSTACTION=ACCEPT", + "FELIX_HEALTHENABLED=true", + "FELIX_LOGFILEPATH=/dev/null", + "FELIX_BPFENABLED=false", + "FELIX_XDPENABLED=false", + "FELIX_BPFCONNECTTIMELOADBALANCINGENABLED=false", + "FELIX_BPFKUBEPROXYIPTABLESCLEANUPENABLED=false", + ) + + binary, err := exec.LookPath("calico-felix") + if err != nil { + return fmt.Errorf("calico-felix is not installed %w", err) + } + err = syscall.Exec(binary, args, env) + return err +} + +func runCilium(cfg *PolicyConfig) error { + extraArgs, err := parsePolicyConfig() + if err != nil { + return err + } + + args := []string{ + "cilium-agent", + "--cni-chaining-mode=terway-chainer", + "--tunnel=disabled", + "--enable-ipv4-masquerade=false", + "--enable-ipv6-masquerade=false", + "--disable-envoy-version-check=true", + "--enable-endpoint-routes=true", + "--ipv4-range=169.254.10.0/30", + "--ipv6-range=fe80:2400:3200:baba::/30", + "--enable-local-node-route=false", + "--enable-endpoint-health-checking=false", + "--enable-health-checking=false", + "--enable-service-topology=true", + "--disable-cnp-status-updates=true", + "--k8s-heartbeat-timeout=0", + "--enable-session-affinity=true", + "--install-iptables-rules=false", + "--enable-l7-proxy=false", + "--ipam=cluster-pool", + "--enable-runtime-device-detection=true", + "--enable-policy=" + fmt.Sprintf("%t", cfg.EnableNetworkPolicy), + "--agent-health-port=" + cfg.HealthCheckPort, + } + if cfg.EnableNetworkPolicy { + args = append(args, "--enable-policy=default") + } else { + args = append(args, "--enable-policy=never") + args = append(args, "--labels=k8s:io\\.kubernetes\\.pod\\.namespace") + } + + switch cfg.Datapath { + case dataPathIPvlan: + args = append(args, "--datapath-mode=ipvlan") + case dataPathV2: + args = append(args, "--datapath-mode=veth") + default: + args = append(args, "--kube-proxy-replacement=disabled") + } + + if cfg.InClusterLoadBalance { + args = append(args, "--enable-in-cluster-loadbalance=true") + } + + args = append(args, extraArgs...) + env := os.Environ() + binary, err := exec.LookPath("cilium-agent") + if err != nil { + return fmt.Errorf("cilium-agent is not installed %w", err) + } + err = syscall.Exec(binary, args, env) + return err +} + +func parsePolicyConfig() ([]string, error) { + cni, err := os.ReadFile(cniFilePath) + if err != nil { + return nil, err + } + + cniJSON, err := gabs.ParseJSON(cni) + if err != nil { + return nil, err + } + + return policyConfig(cniJSON) +} + +func policyConfig(container *gabs.Container) ([]string, error) { + var ciliumArgs []string + for _, plugin := range container.Path("plugins").Children() { + if plugin.Path("type").Data().(string) != pluginTypeTerway { + continue + } + h := &CNIConfig{} + + err := json.Unmarshal(plugin.Bytes(), h) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal args: %w", err) + } + + if h.HubbleEnabled { + if h.HubbleMetrics == "" { + h.HubbleMetrics = "drop" + } + if h.HubbleListenAddress == "" { + h.HubbleListenAddress = ":4244" + } + if h.HubbleMetricServer == "" { + h.HubbleMetricServer = ":9091" + } + ciliumArgs = append(ciliumArgs, []string{ + "--enable-hubble=true", + "--hubble-disable-tls=true", + "--hubble-metrics=" + h.HubbleMetrics, + "--hubble-listen-address=" + h.HubbleListenAddress, + "--hubble-metrics-server=" + h.HubbleMetricServer, + }...) + } + + // parse extra args + ciliumArgs = append(ciliumArgs, extractArgs(h.CiliumExtraArgs)...) + } + + return ciliumArgs, nil +} + +func extractArgs(in string) []string { + return lo.FilterMap(strings.Split(in, "--"), func(item string, index int) (string, bool) { + if strings.TrimSpace(item) == "" { + return "", false + } + return "--" + strings.TrimSpace(item), true + }) +} + +func configENIOnlyMasq(ipt string) error { + binary, err := exec.LookPath("bash") + if err != nil { + return fmt.Errorf("bash is not installed %w", err) + } + cmd := exec.Command(binary, "-cx", "source uninstall_policy.sh;masq_eni_only "+ipt) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Run() + if err != nil { + return fmt.Errorf("eni only masq failed: %w", err) + } + return nil +} + +func cleanUPFelix() error { + binary, err := exec.LookPath("bash") + if err != nil { + return fmt.Errorf("bash is not installed %w", err) + } + cmd := exec.Command(binary, "-c", "source uninstall_policy.sh;cleanup_felix") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + _ = cmd.Run() + return nil +} + +func runSocat(cfg *PolicyConfig) error { + port := cfg.HealthCheckPort + if port == "" { + port = "9099" + } + args := []string{ + "socat", + fmt.Sprintf("TCP-LISTEN:%s,bind=127.0.0.1,fork,reuseaddr", port), + "system:'sleep 2;kill -9 $SOCAT_PID 2>/dev/null'", + } + env := os.Environ() + binary, err := exec.LookPath("socat") + if err != nil { + return fmt.Errorf("socat is not installed %w", err) + } + return syscall.Exec(binary, args, env) +} diff --git a/cmd/terway-cli/policy_test.go b/cmd/terway-cli/policy_test.go new file mode 100644 index 00000000..2455c225 --- /dev/null +++ b/cmd/terway-cli/policy_test.go @@ -0,0 +1,31 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_extractArgs(t *testing.T) { + type args struct { + in string + } + tests := []struct { + name string + args args + want []string + }{ + { + name: "test1", + args: args{ + in: "--foo=bar --baz=\"aa bb\"", + }, + want: []string{"--foo=bar", "--baz=\"aa bb\""}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, extractArgs(tt.args.in), "extractArgs(%v)", tt.args.in) + }) + } +} diff --git a/cmd/terway/main.go b/cmd/terway/main.go index 9faa727b..806b2e2c 100644 --- a/cmd/terway/main.go +++ b/cmd/terway/main.go @@ -23,7 +23,6 @@ var ( log = ctrl.Log.WithName("setup") ) -const defaultConfigPath = "/etc/eni/eni.json" const defaultSocketPath = "/var/run/eni/eni.socket" const debugSocketPath = "unix:///var/run/eni/eni_debug.socket" @@ -31,6 +30,7 @@ var ( logLevel string daemonMode string readonlyListen string + configFilePath string ) func main() { @@ -40,6 +40,7 @@ func main() { fs.StringVar(&daemonMode, "daemon-mode", "VPC", "terway network mode") fs.StringVar(&logLevel, "log-level", "info", "terway log level") fs.StringVar(&readonlyListen, "readonly-listen", utils.NormalizePath(debugSocketPath), "terway readonly listen") + fs.StringVar(&configFilePath, "config", "/etc/eni/eni.json", "terway config file") ctrl.RegisterFlags(fs) err := fs.Parse(os.Args[1:]) @@ -56,7 +57,7 @@ func main() { log.Info(version.Version) ctx := ctrl.SetupSignalHandler() - err = daemon.Run(ctx, utils.NormalizePath(defaultSocketPath), readonlyListen, utils.NormalizePath(defaultConfigPath), daemonMode) + err = daemon.Run(ctx, utils.NormalizePath(defaultSocketPath), readonlyListen, utils.NormalizePath(configFilePath), daemonMode) if err != nil { klog.Fatal(err) diff --git a/init.sh b/init.sh index 8e293197..4ed107a2 100755 --- a/init.sh +++ b/init.sh @@ -7,15 +7,11 @@ set -o nounset cp -f /usr/bin/terway /opt/cni/bin/ chmod +x /opt/cni/bin/terway -if [ "$TERWAY_DAEMON_MODE" != "VPC" ]; then - cp -f /usr/bin/cilium-cni /opt/cni/bin/ - chmod +x /opt/cni/bin/cilium-cni -fi +cp -f /usr/bin/cilium-cni /opt/cni/bin/ +chmod +x /opt/cni/bin/cilium-cni # init cni config -cp /tmp/eni/eni_conf /etc/eni/eni.json - -terway-cli cni /tmp/eni/10-terway.conflist /tmp/eni/10-terway.conf --output /etc/cni/net.d/10-terway.conflist +terway-cli cni /etc/eni/10-terway.conflist /etc/eni/10-terway.conf --output /etc/cni/net.d/10-terway.conflist terway-cli nodeconfig node_capabilities=/var/run/eni/node_capabilities @@ -25,7 +21,7 @@ if [ ! -f "$node_capabilities" ]; then touch "$node_capabilities" fi -require_erdma=$(jq '.enable_erdma' -r /dev/null' - else - # shellcheck disable=SC2016 - exec socat TCP-LISTEN:9099,bind=127.0.0.1,fork,reuseaddr system:'sleep 2;kill -9 $SOCAT_PID 2>/dev/null' - fi -fi - -if grep -q "datapath *= *datapathv2" "$node_capabilities"; then - datapath_mode=veth -fi - -# kernel version has already checked in initContainer, so just determine whether plugin chaining exists -if [ "$virtyal_type" = "ipvlan" ] || [ "$virtyal_type" = "datapathv2" ]; then - # check kernel version & enable cilium - - # kernel version equal and above 4.19 - if { [ "$KERNEL_MAJOR_VERSION" -eq 4 ] && [ "$KERNEL_MINOR_VERSION" -ge 19 ]; } || - [ "$KERNEL_MAJOR_VERSION" -gt 4 ]; then - - extra_args=$(terway_config_val 'cilium_args') - if [ -z "$DISABLE_POLICY" ] || [ "$DISABLE_POLICY" = "false" ] || [ "$DISABLE_POLICY" = "0" ]; then - ENABLE_POLICY="default" - else - ENABLE_POLICY="never" - extra_args="${extra_args} --labels=k8s:io\\.kubernetes\\.pod\\.namespace " - fi - - if [[ $extra_args != *"bpf-map-dynamic-size-ratio"* ]]; then - extra_args="${extra_args} --bpf-map-dynamic-size-ratio=0.0025" - fi - - if [ "$(terway_config_val 'cilium_enable_hubble' | tr '[:upper:]' '[:lower:]')" = "true" ]; then - cilium_hubble_metrics=$(terway_config_val 'cilium_hubble_metrics') - cilium_hubble_metrics=${cilium_hubble_metrics:="drop"} - cilium_hubble_listen_address=$(terway_config_val 'cilium_hubble_listen_address') - cilium_hubble_listen_address=${cilium_hubble_listen_address:=":4244"} - cilium_hubble_metrics_server=$(terway_config_val 'cilium_hubble_metrics_server') - cilium_hubble_metrics_server=${cilium_hubble_metrics_server:=":9091"} - extra_args="${extra_args} --enable-hubble=true --hubble-disable-tls=true --hubble-metrics=${cilium_hubble_metrics}" - extra_args="${extra_args} --hubble-listen-address=${cilium_hubble_listen_address} --hubble-metrics-server=${cilium_hubble_metrics_server}" - echo "turning up hubble, passing args \"${extra_args}\"" - fi - - if [ "$IN_CLUSTER_LOADBALANCE" = "true" ]; then - extra_args="${extra_args} --enable-in-cluster-loadbalance=true " - echo "turning up in cluster loadbalance, passing args \"${extra_args}\"" - fi - - if bpftool -j feature probe | grep bpf_skb_ecn_set_ce; then - extra_args="${extra_args} --enable-bandwidth-manager=true " - fi - - echo "using cilium as network routing & policy" - - # shellcheck disable=SC2086 - exec cilium-agent --tunnel=disabled --enable-ipv4-masquerade=false --enable-ipv6-masquerade=false \ - --enable-policy=$ENABLE_POLICY \ - --agent-health-port=9099 --disable-envoy-version-check=true \ - --enable-local-node-route=false --ipv4-range=169.254.10.0/30 --ipv6-range=fe80:2400:3200:baba::/30 --enable-endpoint-health-checking=false \ - --enable-health-checking=false --enable-service-topology=true --disable-cnp-status-updates=true --k8s-heartbeat-timeout=0 --enable-session-affinity=true \ - --install-iptables-rules=false --enable-l7-proxy=false \ - --ipam=cluster-pool --datapath-mode=${datapath_mode} --enable-runtime-device-detection=true ${extra_args} - fi -fi -# shellcheck disable=SC1091 -source uninstall_policy.sh - -# check kernel version - -export FELIX_IPTABLESBACKEND=Auto -if (uname -r | grep -E "el7|an7" && [ "${KERNEL_MAJOR_VERSION}" -eq 3 ]) || (uname -r | grep -E "al7" && [ "${KERNEL_MAJOR_VERSION}" -eq 4 ]); then - export FELIX_IPTABLESBACKEND=Legacy -elif (uname -r | grep -E "el8|an8" && [ "${KERNEL_MAJOR_VERSION}" -ge 4 ]) || (uname -r | grep -E "al8|lifsea8" && [ "${KERNEL_MAJOR_VERSION}" -ge 5 ]); then - export FELIX_IPTABLESBACKEND=NFT - - # clean legacy rules if exist - cleanup_legacy -fi - -# default for veth -export FELIX_LOGSEVERITYSYS=none -export FELIX_LOGSEVERITYSCREEN=info -export CALICO_NETWORKING_BACKEND=none -export CLUSTER_TYPE=k8s,aliyun -export CALICO_DISABLE_FILE_LOGGING=true -# shellcheck disable=SC2154 -export CALICO_IPV4POOL_CIDR="${Network}" -export FELIX_IPTABLESREFRESHINTERVAL="${IPTABLESREFRESHINTERVAL:-60}" -export FELIX_IPV6SUPPORT=true -export WAIT_FOR_DATASTORE=true -export IP="" -export NO_DEFAULT_POOLS=true -export FELIX_DEFAULTENDPOINTTOHOSTACTION=ACCEPT -export FELIX_HEALTHENABLED=true -export FELIX_LOGFILEPATH=/dev/null -export FELIX_BPFENABLED=false -export FELIX_XDPENABLED=false -export FELIX_BPFCONNECTTIMELOADBALANCINGENABLED=false -export FELIX_BPFKUBEPROXYIPTABLESCLEANUPENABLED=false -exec 2>&1 -if [ -n "$NODENAME" ]; then - export FELIX_FELIXHOSTNAME="$NODENAME" -fi -if [ -n "$DATASTORE_TYPE" ]; then - export FELIX_DATASTORETYPE="$DATASTORE_TYPE" -fi - -if [ "$network_policy_provider" = "ebpf" ]; then - cleanup_felix - # kernel version equal and above 4.19 - if { [ "$KERNEL_MAJOR_VERSION" -eq 4 ] && [ "$KERNEL_MINOR_VERSION" -ge 19 ]; } || - [ "$KERNEL_MAJOR_VERSION" -gt 4 ]; then - - extra_args=$(terway_config_val 'cilium_args') - - if [ -z "$DISABLE_POLICY" ] || [ "$DISABLE_POLICY" = "false" ] || [ "$DISABLE_POLICY" = "0" ]; then - ENABLE_POLICY="default" - else - ENABLE_POLICY="never" - extra_args="${extra_args} --labels=k8s:io\\.kubernetes\\.pod\\.namespace " - fi - - if [ "$IN_CLUSTER_LOADBALANCE" = "true" ]; then - extra_args="${extra_args} --enable-in-cluster-loadbalance=true " - echo "turning up in cluster loadbalance, passing args \"${extra_args}\"" - fi - - # shellcheck disable=SC2086 - exec cilium-agent --kube-proxy-replacement=disabled --tunnel=disabled --enable-ipv4-masquerade=false --enable-ipv6-masquerade=false \ - --enable-policy=$ENABLE_POLICY \ - --agent-health-port=9099 --disable-envoy-version-check=true \ - --enable-local-node-route=false --ipv4-range=169.254.10.0/30 --ipv6-range=fe80:2400:3200:baba::/30 --enable-endpoint-health-checking=false \ - --enable-health-checking=false --enable-service-topology=true --disable-cnp-status-updates=true --k8s-heartbeat-timeout=0 --enable-session-affinity=true \ - --install-iptables-rules=false --enable-l7-proxy=false \ - --ipam=cluster-pool ${extra_args} - else - echo "unsupported kernel version" - exit 1 - fi -else - if [ -z "$DISABLE_POLICY" ] || [ "$DISABLE_POLICY" = "false" ] || [ "$DISABLE_POLICY" = "0" ]; then - exec calico-felix - fi -fi - -config_masquerade -cleanup_felix -# for health check -if [ "$FELIX_HEALTHPORT" != "" ]; then - # shellcheck disable=SC2016 - exec socat TCP-LISTEN:"$FELIX_HEALTHPORT",bind=127.0.0.1,fork,reuseaddr system:'sleep 2;kill -9 $SOCAT_PID 2>/dev/null' -else - # shellcheck disable=SC2016 - exec socat TCP-LISTEN:9099,bind=127.0.0.1,fork,reuseaddr system:'sleep 2;kill -9 $SOCAT_PID 2>/dev/null' -fi +terway-cli policy \ No newline at end of file diff --git a/policy/uninstall_policy.sh b/policy/uninstall_policy.sh index 59574fe2..0bed4ba7 100755 --- a/policy/uninstall_policy.sh +++ b/policy/uninstall_policy.sh @@ -1,5 +1,21 @@ #!/bin/sh +masq_eni_only() { + if ! "$1" -t nat -L terway-masq; then + # Create a new chain in nat table. + "$1" -t nat -N terway-masq + fi + + if ! "$1" -t nat -L POSTROUTING | grep -q terway-masq; then + # Append that chain to POSTROUTING table. + "$1" -t nat -A POSTROUTING -m comment --comment "terway:masq-outgoing" ! -o lo -j terway-masq + fi + + if ! "$1" -t nat -L terway-masq | grep -q MASQUERADE; then + "$1" -t nat -A terway-masq -j MASQUERADE + fi +} + cleanup_rules(){ # Set FORWARD action to ACCEPT so outgoing packets can go through POSTROUTING chains. echo "Setting default FORWARD action to ACCEPT..." @@ -33,34 +49,6 @@ cleanup_rules(){ "$1"-save -t filter | grep -e '--comment "cali:' | cut -c 3- | sed 's/^ *//;s/ *$//' | xargs -l1 "$1" -t filter -D } -config_masquerade() { - # Set the CALICO_IPV4POOL_CIDR environment variable to the appropriate CIDR for this cluster if Calico is adding the traffic. - if [ "$CALICO_IPV4POOL_CIDR" != "" ]; then - clusterCIDR=$CALICO_IPV4POOL_CIDR - - # Set up NAT rule so traffic gets masqueraded if it is going to any subnet other than cluster-cidr. - echo "Adding masquerade rule for traffic going from $clusterCIDR to ! $clusterCIDR" - - if ! iptables -t nat -L terway-brb-masq; then - # Create a new chain in nat table. - iptables -t nat -N terway-brb-masq - fi - - if ! iptables -t nat -L POSTROUTING | grep -q terway-brb; then - # Append that chain to POSTROUTING table. - iptables -t nat -A POSTROUTING -m comment --comment "terway:masq-outgoing" -j terway-brb-masq - fi - - if ! iptables -t nat -L terway-brb-masq | grep -q "$clusterCIDR"; then - # Add MASQUERADE rule for traffic from clusterCIDR to non-clusterCIDR. - if ! iptables -t nat -A terway-brb-masq -s "$clusterCIDR" ! -d "$clusterCIDR" -j MASQUERADE --random-fully; then - # fallback to no random-fully - iptables -t nat -A terway-brb-masq -s "$clusterCIDR" ! -d "$clusterCIDR" -j MASQUERADE - fi - fi - fi -} - cleanup_felix() { # Make sure ip_forward sysctl is set to allow ip forwarding. sysctl -w net.ipv4.ip_forward=1