From 69d1bb2b6619322a0a35981532e61258d9a60baf Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Tue, 29 Aug 2017 14:37:57 -0700 Subject: [PATCH 1/5] Support kerberos as option --- charts/README.md | 3 + charts/hdfs-datanode-k8s/README.md | 56 +++++++++++- .../templates/datanode-daemonset.yaml | 86 +++++++++++++++++-- charts/hdfs-datanode-k8s/values.yaml | 24 ++++++ charts/hdfs-namenode-k8s/README.md | 45 +++++++++- .../templates/namenode-statefulset.yaml | 46 ++++++++-- charts/hdfs-namenode-k8s/values.yaml | 19 ++++ 7 files changed, 262 insertions(+), 17 deletions(-) diff --git a/charts/README.md b/charts/README.md index 578b3b7..27af92a 100644 --- a/charts/README.md +++ b/charts/README.md @@ -11,3 +11,6 @@ the following order. `hdfs-namenode-k8s/README.md` for how to launch. 2. `hdfs-datanode-k8s`: Launches the hdfs datanode daemons. See `hdfs-datanode-k8s/README.md` for how to launch. + +Kerberos is supported. See the `kerberosEnabled` option in the namenode and +datanode charts. diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md index a223695..50c2d37 100644 --- a/charts/hdfs-datanode-k8s/README.md +++ b/charts/hdfs-datanode-k8s/README.md @@ -9,13 +9,61 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for $ kubectl label node YOUR-MASTER-NAME hdfs-datanode-exclude=yes ``` - 2. Launch this helm chart, `hdfs-datanode-k8s`. + 2. (Skip this if you do not plan to enable Kerberos) + Prepare Kerberos setup, following the steps below. + + - Create a config map containg your Kerberos config file, if you have + not done this already as part of the namenode launch. The config + map will be mounted onto the namenode and datanode pods. + + ``` + $ kubectl create configmap kerberos-config --from-file=/etc/krb5.conf + ``` + + - Generate the principal account and password keytab file for your datanode + daemons. This is typically done in your Kerberos KDC host. For example, + if one of your datanodes will run on the k8s cluster node + kube-n2.mycompany.com, and your Kerberos realm is MYCOMPANY.COM, then + + ``` + $ kadmin.local -q "addprinc -randkey hdfs/kube-n2.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "addprinc -randkey http/kube-n2.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "ktadd -norandkey -k kube-n2.hdfs.keytab \ + hdfs/kube-n2.mycompany.com@MYCOMPANY.COM \ + http/kube-n2.mycompany.com@MYCOMPANY.COM" + ``` + Repeat the above for all of your other datanodes, applying different k8s + cluster node names. + + - Copy the keytab files to the k8s cluster nodes. The keytab files will be + mounted onto the datanode pods. (You may want to restrict which + pods can use `hostPath` using k8s `PodSecurityPolicy` and `RBAC` + to minimize exposure of the keytab files. See [reference]( + https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) + + ``` + $ ssh root@kube-n2.mycompany.com mkdir /hdfs-credentials + $ scp root@kube-n2.hdfs.keytab kube-n2.mycompany.com:/hdfs-credentials/hdfs.keytab + $ ssh root@kube-n2.mycompany.com chmod 0600 /hdfs-credentials/hdfs.keytab + ``` + Repeat the above for all of your other datanodes, applying different k8s + cluster node names. + + 3. Launch this helm chart, `hdfs-datanode-k8s`. ``` $ helm install -n my-hdfs-datanode hdfs-datanode-k8s ``` - 3. Confirm the daemons are launched. + If enabling Kerberos, specify necessary options. For instance, + + ``` + $ helm install -n my-hdfs-datanode \ + --set kerberosEnabled=true,kerberosRealm=MYCOMPANY.COM hdfs-datanode-k8s + ``` + The two variables above are required. For other variables, see values.yaml. + + 4. Confirm the daemons are launched. ``` $ kubectl get pods | grep hdfs-datanode- @@ -34,4 +82,6 @@ Note they run under the `default` namespace. ###Credits This chart is using public Hadoop docker images hosted by - [uhopper](https://hub.docker.com/u/uhopper/). +[uhopper](https://hub.docker.com/u/uhopper/). When Kerberos is enabled, +we also use `jsvc` in a public docker image hosted by +[mschlimb](https://hub.docker.com/r/mschlimb). diff --git a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml index 68df523..9d68054 100644 --- a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml +++ b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -32,6 +32,46 @@ spec: - name: datanode image: uhopper/hadoop-datanode:2.7.2 env: + # The following env vars are listed according to low-to-high precendence order. + # i.e. Whoever comes last will override the earlier value of the same variable. + {{- if .Values.kerberosEnabled }} + - name: CORE_CONF_hadoop_security_authentication + value: kerberos + - name: CORE_CONF_hadoop_security_authorization + value: "true" + - name: CORE_CONF_hadoop_rpc_protection + value: privacy + - name: HDFS_CONF_dfs_block_access_token_enable + value: "true" + - name: HDFS_CONF_dfs_encrypt_data_transfer + value: "true" + - name: HDFS_CONF_dfs_datanode_kerberos_principal + value: hdfs/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }} + - name: HDFS_CONF_dfs_datanode_kerberos_https_principal + value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }} + - name: HDFS_CONF_dfs_web_authentication_kerberos_principal + value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }} + - name: HDFS_CONF_dfs_datanode_keytab_file + value: /etc/security/hdfs.keytab + {{- if .Values.jsvcEnabled }} + - name: HDFS_CONF_dfs_datanode_address + value: 0.0.0.0:1004 + - name: HDFS_CONF_dfs_datanode_http_address + value: 0.0.0.0:1006 + - name: HADOOP_SECURE_DN_USER + value: root + - name: JSVC_OUTFILE + value: /dev/stdout + - name: JSVC_ERRFILE + value: /dev/stderr + - name: JSVC_HOME + value: /jsvc-home + {{- end }} + {{- end }} + {{- range $key, $value := .Values.customHadoopConfig }} + - name: {{ $key | quote }} + value: {{ $value | quote }} + {{- end }} - name: CORE_CONF_fs_defaultFS value: hdfs://hdfs-namenode-0.hdfs-namenode.default.svc.cluster.local:8020 # The below uses two loops to make sure the last item does not have comma. It uses index 0 @@ -48,19 +88,16 @@ spec: /hadoop/dfs/data/{{ $index }} {{- end }} {{- end }} - # We now add custom hadoop configuration provided - {{- range $key, $value := .Values.customHadoopConfig }} - {{- if and (ne $key "HDFS_CONF_dfs_datanode_data_dir") (ne $key "CORE_CONF_fs_defaultFS") }} - - name: {{ $key | quote }} - value: {{ $value | quote }} - {{- end }} - {{- end }} livenessProbe: initialDelaySeconds: 30 httpGet: host: 127.0.0.1 path: / + {{- if and .Values.kerberosEnabled .Values.jsvcEnabled }} + port: 1006 + {{- else }} port: 50075 + {{- end }} securityContext: privileged: true volumeMounts: @@ -68,6 +105,29 @@ spec: - name: hdfs-data-{{ $index }} mountPath: /hadoop/dfs/data/{{ $index }} {{- end }} + {{- if .Values.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab + mountPath: /etc/security/hdfs.keytab + readOnly: true + {{- if .Values.jsvcEnabled }} + - name: jsvc-home + mountPath: /jsvc-home + {{- end }} + {{- end }} + {{- if and .Values.kerberosEnabled .Values.jsvcEnabled }} + initContainers: + - name: copy-jsvc + # Pull by digest because the image doesn't have tags to pin. + image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef + command: ['sh', '-c', 'cp /usr/bin/jsvc /jsvc-home/jsvc'] + volumeMounts: + - name: jsvc-home + mountPath: /jsvc-home + {{- end }} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -82,3 +142,15 @@ spec: hostPath: path: {{ $path }} {{- end }} + {{- if .Values.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ .Values.kerberosConfigMap }} + - name: kerberos-keytab + hostPath: + path: {{ .Values.kerberosKeytabHostPath }} + {{- if .Values.jsvcEnabled }} + - name: jsvc-home + emptyDir: {} + {{- end }} + {{- end }} diff --git a/charts/hdfs-datanode-k8s/values.yaml b/charts/hdfs-datanode-k8s/values.yaml index f9736cb..608b1b3 100644 --- a/charts/hdfs-datanode-k8s/values.yaml +++ b/charts/hdfs-datanode-k8s/values.yaml @@ -31,3 +31,27 @@ dataNodeHostPath: customHadoopConfig: {} # Set variables through a hash where env variable is the key, e.g. # HDFS_CONF_dfs_datanode_use_datanode_hostname: "false" + +# Whether or not Kerberos support is enabled. +kerberosEnabled: false + +# Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name. +# This should match the realm name in your Kerberos config file. +kerberosRealm: "" + +# Effective only if Kerberos is enabled. Name of the k8s config map containing +# the kerberos config file. +kerberosConfigMap: kerberos-config + +# Effective only if Kerberos is enabled. Name of the kerberos config file inside +# the config map. +kerberosConfigFileName: krb5.conf + +# Effective only if Kerberos is enabled. Path of the kerberos keytab file on +# k8s cluster nodes. +kerberosKeytabHostPath: /hdfs-credentials/hdfs.keytab + +# Effective only if Kerberos is enabled. Enable protection of datanodes using +# the jsvc utility. See the reference doc at +# https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-common/SecureMode.html#Secure_DataNode +jsvcEnabled: true diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md index ff48bfe..98a7a7d 100644 --- a/charts/hdfs-namenode-k8s/README.md +++ b/charts/hdfs-namenode-k8s/README.md @@ -12,13 +12,54 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for $ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0 ``` - 2. Launch this helm chart, `hdfs-namenode-k8s`. + 2. (Skip this if you do not plan to enable Kerberos) + Prepare Kerberos setup, following the steps below. + + - Create a config map containg your Kerberos config file. This will be + mounted onto the namenode and datanode pods. + + ``` + $ kubectl create configmap kerberos-config --from-file=/etc/krb5.conf + ``` + + - Generate the principal account and password keytab file for the namenode + daemon. This is typically done in your Kerberos KDC host. For example, + if the namenode will run on the k8s cluster node kube-n1.mycompany.com, + and your Kerberos realm is MYCOMPANY.COM, then + + ``` + $ kadmin.local -q "addprinc -randkey hdfs/kube-n1.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "addprinc -randkey http/kube-n1.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "ktadd -norandkey -k kube-n1.hdfs.keytab \ + hdfs/kube-n1.mycompany.com@MYCOMPANY.COM \ + http/kube-n1.mycompany.com@MYCOMPANY.COM" + ``` + + - Copy the keytab file to the k8s cluster node. This will be mounted + onto the namenode pod as `hostPath`. (You may want to restrict which + pods can use `hostPath` using k8s `PodSecurityPolicy` and `RBAC` + to minimize exposure of the keytab files. See [reference]( + https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) + ``` + $ ssh root@kube-n1.mycompany.com mkdir /hdfs-credentials + $ scp root@kube-n1.hdfs.keytab kube-n1.mycompany.com:/hdfs-credentials/hdfs.keytab + $ ssh root@kube-n1.mycompany.com chmod 0600 /hdfs-credentials/hdfs.keytab + ``` + + 3. Launch this namenode helm chart, `hdfs-namenode-k8s`. ``` $ helm install -n my-hdfs-namenode hdfs-namenode-k8s ``` - 3. Confirm the daemon is launched. + If enabling Kerberos, specify necessary options. For instance, + ``` + $ helm install -n my-hdfs-namenode \ + --set kerberosEnabled=true,kerberosRealm=MYCOMPANY.COM hdfs-namenode-k8s + ``` + The two variables above are required. For other variables, see values.yaml. + + 4. Confirm the daemon is launched. ``` $ kubectl get pods | grep hdfs-namenode diff --git a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml index 702714b..3b40dbe 100644 --- a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml +++ b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -51,21 +51,49 @@ spec: - name: hdfs-namenode image: uhopper/hadoop-namenode:2.7.2 env: - - name: CLUSTER_NAME - value: hdfs-k8s - # We now add custom hadoop configuration provided + # The following env vars are listed according to low-to-high precendence order. + # i.e. Whoever comes last will override the earlier value of the same variable. + {{- if .Values.kerberosEnabled }} + - name: CORE_CONF_hadoop_security_authentication + value: kerberos + - name: CORE_CONF_hadoop_security_authorization + value: "true" + - name: CORE_CONF_hadoop_rpc_protection + value: privacy + - name: HDFS_CONF_dfs_block_access_token_enable + value: "true" + - name: HDFS_CONF_dfs_encrypt_data_transfer + value: "true" + - name: HDFS_CONF_dfs_namenode_kerberos_principal + value: hdfs/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }} + - name: HDFS_CONF_dfs_namenode_kerberos_https_principal + value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }} + - name: HDFS_CONF_dfs_web_authentication_kerberos_principal + value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }} + - name: HDFS_CONF_dfs_namenode_keytab_file + value: /etc/security/hdfs.keytab + {{- end }} {{- range $key, $value := .Values.customHadoopConfig }} - {{- if ne $key "CLUSTER_NAME" }} - name: {{ $key | quote }} value: {{ $value | quote }} {{- end }} - {{- end }} + - name: CLUSTER_NAME + value: hdfs-k8s ports: - containerPort: 8020 name: fs volumeMounts: - name: hdfs-name mountPath: /hadoop/dfs/name + {{- if .Values.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab + mountPath: /etc/security/hdfs.keytab + readOnly: true + {{- end }} # Pin the pod to a node. You can label your node like below: # $ kubectl label nodes YOUR-NODE hdfs-namenode-selector=hdfs-namenode-0 nodeSelector: @@ -75,3 +103,11 @@ spec: - name: hdfs-name hostPath: path: {{ .Values.nameNodeHostPath }} + {{- if .Values.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ .Values.kerberosConfigMap }} + - name: kerberos-keytab + hostPath: + path: {{ .Values.kerberosKeytabHostPath }} + {{- end }} diff --git a/charts/hdfs-namenode-k8s/values.yaml b/charts/hdfs-namenode-k8s/values.yaml index 3ff5b91..000504a 100644 --- a/charts/hdfs-namenode-k8s/values.yaml +++ b/charts/hdfs-namenode-k8s/values.yaml @@ -29,3 +29,22 @@ nameNodeHostPath: /hdfs-name customHadoopConfig: {} # Set variables through a hash where env variable is the key, e.g. # HDFS_CONF_dfs_datanode_use_datanode_hostname: "false" + +# Whether or not Kerberos support is enabled. +kerberosEnabled: false + +# Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name. +# This should match the realm name in your Kerberos config file. +kerberosRealm: "" + +# Effective only if Kerberos is enabled. Name of the k8s config map containing +# the kerberos config file. +kerberosConfigMap: kerberos-config + +# Effective only if Kerberos is enabled. Name of the kerberos config file inside +# the config map. +kerberosConfigFileName: krb5.conf + +# Effective only if Kerberos is enabled. Path of the kerberos keytab file on +# k8s cluster nodes. +kerberosKeytabHostPath: /hdfs-credentials/hdfs.keytab From 00a8039a23282f7edc47123b481be35f1530a74b Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Fri, 8 Sep 2017 11:38:08 -0700 Subject: [PATCH 2/5] Use a secret for keytabs --- charts/hdfs-datanode-k8s/README.md | 49 +++---------------- .../templates/datanode-daemonset.yaml | 31 +++++++++--- charts/hdfs-datanode-k8s/values.yaml | 10 ++-- charts/hdfs-namenode-k8s/README.md | 44 +++++++++++------ .../templates/namenode-statefulset.yaml | 30 ++++++++++-- charts/hdfs-namenode-k8s/values.yaml | 10 ++-- 6 files changed, 100 insertions(+), 74 deletions(-) diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md index 50c2d37..c65d6e5 100644 --- a/charts/hdfs-datanode-k8s/README.md +++ b/charts/hdfs-datanode-k8s/README.md @@ -10,44 +10,8 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for ``` 2. (Skip this if you do not plan to enable Kerberos) - Prepare Kerberos setup, following the steps below. - - - Create a config map containg your Kerberos config file, if you have - not done this already as part of the namenode launch. The config - map will be mounted onto the namenode and datanode pods. - - ``` - $ kubectl create configmap kerberos-config --from-file=/etc/krb5.conf - ``` - - - Generate the principal account and password keytab file for your datanode - daemons. This is typically done in your Kerberos KDC host. For example, - if one of your datanodes will run on the k8s cluster node - kube-n2.mycompany.com, and your Kerberos realm is MYCOMPANY.COM, then - - ``` - $ kadmin.local -q "addprinc -randkey hdfs/kube-n2.mycompany.com@MYCOMPANY.COM" - $ kadmin.local -q "addprinc -randkey http/kube-n2.mycompany.com@MYCOMPANY.COM" - $ kadmin.local -q "ktadd -norandkey -k kube-n2.hdfs.keytab \ - hdfs/kube-n2.mycompany.com@MYCOMPANY.COM \ - http/kube-n2.mycompany.com@MYCOMPANY.COM" - ``` - Repeat the above for all of your other datanodes, applying different k8s - cluster node names. - - - Copy the keytab files to the k8s cluster nodes. The keytab files will be - mounted onto the datanode pods. (You may want to restrict which - pods can use `hostPath` using k8s `PodSecurityPolicy` and `RBAC` - to minimize exposure of the keytab files. See [reference]( - https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) - - ``` - $ ssh root@kube-n2.mycompany.com mkdir /hdfs-credentials - $ scp root@kube-n2.hdfs.keytab kube-n2.mycompany.com:/hdfs-credentials/hdfs.keytab - $ ssh root@kube-n2.mycompany.com chmod 0600 /hdfs-credentials/hdfs.keytab - ``` - Repeat the above for all of your other datanodes, applying different k8s - cluster node names. + Conduct the Kerberos setups described in the namenode README.md, if you + have not done that already. 3. Launch this helm chart, `hdfs-datanode-k8s`. @@ -72,7 +36,10 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for ``` `Datanode` daemons run on every cluster node. They also mount k8s `hostPath` -local disk volumes. +local disk volumes. You may want to restrict access of `hostPath` +using `pod security policy`. +See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) + `Datanodes` are using `hostNetwork` to register to `namenode` using physical IPs. @@ -82,6 +49,4 @@ Note they run under the `default` namespace. ###Credits This chart is using public Hadoop docker images hosted by -[uhopper](https://hub.docker.com/u/uhopper/). When Kerberos is enabled, -we also use `jsvc` in a public docker image hosted by -[mschlimb](https://hub.docker.com/r/mschlimb). +[uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml index 9d68054..ef58781 100644 --- a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml +++ b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -110,8 +110,8 @@ spec: mountPath: /etc/krb5.conf subPath: {{ .Values.kerberosConfigFileName }} readOnly: true - - name: kerberos-keytab - mountPath: /etc/security/hdfs.keytab + - name: kerberos-keytab-copy + mountPath: /etc/security/ readOnly: true {{- if .Values.jsvcEnabled }} - name: jsvc-home @@ -120,10 +120,27 @@ spec: {{- end }} {{- if and .Values.kerberosEnabled .Values.jsvcEnabled }} initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy - name: copy-jsvc # Pull by digest because the image doesn't have tags to pin. image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef - command: ['sh', '-c', 'cp /usr/bin/jsvc /jsvc-home/jsvc'] + command: ['sh', '-c'] + args: + - cp /usr/bin/jsvc /jsvc-home/jsvc volumeMounts: - name: jsvc-home mountPath: /jsvc-home @@ -146,9 +163,11 @@ spec: - name: kerberos-config configMap: name: {{ .Values.kerberosConfigMap }} - - name: kerberos-keytab - hostPath: - path: {{ .Values.kerberosKeytabHostPath }} + - name: kerberos-keytabs + secret: + secretName: {{ .Values.kerberosKeytabsSecret }} + - name: kerberos-keytab-copy + emptyDir: {} {{- if .Values.jsvcEnabled }} - name: jsvc-home emptyDir: {} diff --git a/charts/hdfs-datanode-k8s/values.yaml b/charts/hdfs-datanode-k8s/values.yaml index 608b1b3..e07419d 100644 --- a/charts/hdfs-datanode-k8s/values.yaml +++ b/charts/hdfs-datanode-k8s/values.yaml @@ -47,9 +47,13 @@ kerberosConfigMap: kerberos-config # the config map. kerberosConfigFileName: krb5.conf -# Effective only if Kerberos is enabled. Path of the kerberos keytab file on -# k8s cluster nodes. -kerberosKeytabHostPath: /hdfs-credentials/hdfs.keytab +# Effective only if Kerberos is enabled. Name of the k8s secret containing +# the kerberos keytab files of per-host hdfs principals. The secret should +# have multiple data items. Each data item name should be formatted as: +# `HOST-NAME.keytab` +# where HOST-NAME should match the cluster node +# host name that each per-host HDFS principal is associated with. +kerberosKeytabsSecret: hdfs-kerberos-keytabs # Effective only if Kerberos is enabled. Enable protection of datanodes using # the jsvc utility. See the reference doc at diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md index 98a7a7d..a6db6a9 100644 --- a/charts/hdfs-namenode-k8s/README.md +++ b/charts/hdfs-namenode-k8s/README.md @@ -22,28 +22,40 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for $ kubectl create configmap kerberos-config --from-file=/etc/krb5.conf ``` - - Generate the principal account and password keytab file for the namenode - daemon. This is typically done in your Kerberos KDC host. For example, - if the namenode will run on the k8s cluster node kube-n1.mycompany.com, - and your Kerberos realm is MYCOMPANY.COM, then + - Generate per-host principal accounts and password keytab files for the namenode + and datanode daemons. This is typically done in your Kerberos KDC host. For example, + suppose the namenode will run on the k8s cluster node kube-n1.mycompany.com, + and your datanodes will run on kube-n1.mycompany.com and kube-n2.mycompany.com. + And your Kerberos realm is MYCOMPANY.COM, then ``` $ kadmin.local -q "addprinc -randkey hdfs/kube-n1.mycompany.com@MYCOMPANY.COM" $ kadmin.local -q "addprinc -randkey http/kube-n1.mycompany.com@MYCOMPANY.COM" - $ kadmin.local -q "ktadd -norandkey -k kube-n1.hdfs.keytab \ + $ mkdir hdfs-keytabs + $ kadmin.local -q "ktadd -norandkey \ + -k hdfs-keytabs/kube-n1.mycompany.com.keytab \ hdfs/kube-n1.mycompany.com@MYCOMPANY.COM \ http/kube-n1.mycompany.com@MYCOMPANY.COM" + + $ kadmin.local -q "addprinc -randkey hdfs/kube-n2.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "addprinc -randkey http/kube-n2.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "ktadd -norandkey \ + -k hdfs-keytabs/kube-n2.mycompany.com.keytab \ + hdfs/kube-n2.mycompany.com@MYCOMPANY.COM \ + http/kube-n2.mycompany.com@MYCOMPANY.COM" + $ kadmin.local -q "ktadd -norandkey \ + -k hdfs-keytabs/kube-n2.mycompany.com.keytab \ + hdfs/kube-n2.mycompany.com@MYCOMPANY.COM \ + http/kube-n2.mycompany.com@MYCOMPANY.COM" ``` - - Copy the keytab file to the k8s cluster node. This will be mounted - onto the namenode pod as `hostPath`. (You may want to restrict which - pods can use `hostPath` using k8s `PodSecurityPolicy` and `RBAC` - to minimize exposure of the keytab files. See [reference]( - https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) + - Create a k8s secret containing all the keytab files. This will be mounted + onto the namenode and datanode pods. (You may want to restrict access to + this secret using k8s RBAC, to minimize exposure of the keytab files. ``` - $ ssh root@kube-n1.mycompany.com mkdir /hdfs-credentials - $ scp root@kube-n1.hdfs.keytab kube-n1.mycompany.com:/hdfs-credentials/hdfs.keytab - $ ssh root@kube-n1.mycompany.com chmod 0600 /hdfs-credentials/hdfs.keytab + $ kubectl create secret generic hdfs-kerberos-keytabs \ + --from-file=kube-n1.mycompany.com.keytab \ + --from-file=kube-n2.mycompany.com.keytab ``` 3. Launch this namenode helm chart, `hdfs-namenode-k8s`. @@ -69,10 +81,12 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for There will be only one `namenode` instance. i.e. High Availability (HA) is not supported at the moment. The `namenode` instance is supposed to be pinned to a cluster host using a node label, as shown in the usage above. `Namenode` -mount a local disk directory using k8s `hostPath` volume. +mount a local disk directory using k8s `hostPath` volume. You may want to +restrict access of `hostPath` using `pod security policy`. +See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) `namenode` is using `hostNetwork` so it can see physical IPs of datanodes -without an overlay network such as weave-net mask them. +without an overlay network such as weave-net masking them. ###Credits diff --git a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml index 3b40dbe..2406cd2 100644 --- a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml +++ b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -90,10 +90,28 @@ spec: mountPath: /etc/krb5.conf subPath: {{ .Values.kerberosConfigFileName }} readOnly: true - - name: kerberos-keytab - mountPath: /etc/security/hdfs.keytab + - name: kerberos-keytab-copy + mountPath: /etc/security/ readOnly: true {{- end }} + {{- if .Values.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- end }} # Pin the pod to a node. You can label your node like below: # $ kubectl label nodes YOUR-NODE hdfs-namenode-selector=hdfs-namenode-0 nodeSelector: @@ -107,7 +125,9 @@ spec: - name: kerberos-config configMap: name: {{ .Values.kerberosConfigMap }} - - name: kerberos-keytab - hostPath: - path: {{ .Values.kerberosKeytabHostPath }} + - name: kerberos-keytabs + secret: + secretName: {{ .Values.kerberosKeytabsSecret }} + - name: kerberos-keytab-copy + emptyDir: {} {{- end }} diff --git a/charts/hdfs-namenode-k8s/values.yaml b/charts/hdfs-namenode-k8s/values.yaml index 000504a..a94cfc0 100644 --- a/charts/hdfs-namenode-k8s/values.yaml +++ b/charts/hdfs-namenode-k8s/values.yaml @@ -45,6 +45,10 @@ kerberosConfigMap: kerberos-config # the config map. kerberosConfigFileName: krb5.conf -# Effective only if Kerberos is enabled. Path of the kerberos keytab file on -# k8s cluster nodes. -kerberosKeytabHostPath: /hdfs-credentials/hdfs.keytab +# Effective only if Kerberos is enabled. Name of the k8s secret containing +# the kerberos keytab files of per-host HDFS principals. The secret should +# have multiple data items. Each data item name should be formatted as: +# `HOST-NAME.keytab` +# where HOST-NAME should match the cluster node +# host name that each per-host hdfs principal is associated with. +kerberosKeytabsSecret: hdfs-kerberos-keytabs From 57d7341d95347285d33de52b63621d496e685ece Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Mon, 18 Sep 2017 09:58:42 -0700 Subject: [PATCH 3/5] Address review comments --- charts/hdfs-datanode-k8s/README.md | 5 +++-- charts/hdfs-namenode-k8s/README.md | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md index c65d6e5..37f3fe5 100644 --- a/charts/hdfs-datanode-k8s/README.md +++ b/charts/hdfs-datanode-k8s/README.md @@ -10,8 +10,9 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for ``` 2. (Skip this if you do not plan to enable Kerberos) - Conduct the Kerberos setups described in the namenode README.md, if you - have not done that already. + Conduct the Kerberos setups described in the namenode + [README.md](../hdfs-namenode-k8s/README.md), if you have not done that + already. 3. Launch this helm chart, `hdfs-datanode-k8s`. diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md index a6db6a9..456bd41 100644 --- a/charts/hdfs-namenode-k8s/README.md +++ b/charts/hdfs-namenode-k8s/README.md @@ -83,7 +83,7 @@ supported at the moment. The `namenode` instance is supposed to be pinned to a cluster host using a node label, as shown in the usage above. `Namenode` mount a local disk directory using k8s `hostPath` volume. You may want to restrict access of `hostPath` using `pod security policy`. -See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) +See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md) `namenode` is using `hostNetwork` so it can see physical IPs of datanodes without an overlay network such as weave-net masking them. From 56ba977ed66590de864eebedfbe72094f61da567 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Mon, 25 Sep 2017 15:30:05 -0700 Subject: [PATCH 4/5] Address review comments --- charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml | 2 +- charts/hdfs-namenode-k8s/README.md | 6 ++++-- .../hdfs-namenode-k8s/templates/namenode-statefulset.yaml | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml index ef58781..cff40f2 100644 --- a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml +++ b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -32,7 +32,7 @@ spec: - name: datanode image: uhopper/hadoop-datanode:2.7.2 env: - # The following env vars are listed according to low-to-high precendence order. + # The following env vars are listed according to low-to-high precedence order. # i.e. Whoever comes last will override the earlier value of the same variable. {{- if .Values.kerberosEnabled }} - name: CORE_CONF_hadoop_security_authentication diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md index a6db6a9..fa586bd 100644 --- a/charts/hdfs-namenode-k8s/README.md +++ b/charts/hdfs-namenode-k8s/README.md @@ -15,7 +15,7 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for 2. (Skip this if you do not plan to enable Kerberos) Prepare Kerberos setup, following the steps below. - - Create a config map containg your Kerberos config file. This will be + - Create a config map containing your Kerberos config file. This will be mounted onto the namenode and datanode pods. ``` @@ -51,7 +51,9 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for - Create a k8s secret containing all the keytab files. This will be mounted onto the namenode and datanode pods. (You may want to restrict access to - this secret using k8s RBAC, to minimize exposure of the keytab files. + this secret using k8s + [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/), + to minimize exposure of the keytab files. ``` $ kubectl create secret generic hdfs-kerberos-keytabs \ --from-file=kube-n1.mycompany.com.keytab \ diff --git a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml index 2406cd2..666d27b 100644 --- a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml +++ b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -51,7 +51,7 @@ spec: - name: hdfs-namenode image: uhopper/hadoop-namenode:2.7.2 env: - # The following env vars are listed according to low-to-high precendence order. + # The following env vars are listed according to low-to-high precedence order. # i.e. Whoever comes last will override the earlier value of the same variable. {{- if .Values.kerberosEnabled }} - name: CORE_CONF_hadoop_security_authentication From 9156eb5dad13b332b246eb4ec184746fdc571647 Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Wed, 22 Nov 2017 10:04:05 +0100 Subject: [PATCH 5/5] build a jar with deps to have all needed classes when deploying in a hadoop cluster --- topology/pod-cidr/pom.xml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/topology/pod-cidr/pom.xml b/topology/pod-cidr/pom.xml index 19ff6a2..c4eedc6 100644 --- a/topology/pod-cidr/pom.xml +++ b/topology/pod-cidr/pom.xml @@ -35,6 +35,22 @@ 1.7 + + maven-assembly-plugin + + + package + + single + + + + + + jar-with-dependencies + + +