From a2b9e1508b1112cae95c498cb2929cc19f2b334c Mon Sep 17 00:00:00 2001 From: Szymon Bar Date: Fri, 14 Feb 2020 13:34:10 +0100 Subject: [PATCH 1/4] template-processors/base: check if dir exists Check if CLONED_TEMPLATE_GIT_DIR exists after cloning templates repo. Log an explicit error message if directory in remote repo does not exist because before when user had deleted files he might've thought that the empty directory was still there while it was not tracked anymore. If user creted an empty directory with .gitkeep (or similiar) file explicitly, serve such case too by printing a message in pod logs (do not fail in such a case, user knows what he is doing). Also exit from resourceManager script if manifest directory contains no yaml, yml, or json files. Reference: https://git.wiki.kernel.org/index.php/Git_FAQ#Can_I_add_empty_directories.3F Fixes #276 --- template-processors/base/bin/gitClone.sh | 7 +++++++ template-processors/base/bin/processTemplates.sh | 3 +-- template-processors/base/bin/resourceManager.sh | 12 +++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/template-processors/base/bin/gitClone.sh b/template-processors/base/bin/gitClone.sh index 85f384c0..93497df1 100755 --- a/template-processors/base/bin/gitClone.sh +++ b/template-processors/base/bin/gitClone.sh @@ -70,5 +70,12 @@ function pullFromParametersRepo() { echo Cloning Repositories pullFromTemplatesRepo +# In git, if directory contains no files, it isn't tracked: +# https://git.wiki.kernel.org/index.php/Git_FAQ#Can_I_add_empty_directories.3F +if ! [[ -d "$CLONED_TEMPLATE_GIT_DIR" ]]; then + echo "ERROR - directory ${CLONED_TEMPLATE_GIT_DIR#/git/templates/} does not exist in the remote repository. +If you want an empty directory to be tracked by git, add a .gitkeep file inside" >&2 + exit 1 +fi pullFromParametersRepo mkdir -p "$MANIFEST_DIR" diff --git a/template-processors/base/bin/processTemplates.sh b/template-processors/base/bin/processTemplates.sh index 16423c8c..86e50bb9 100755 --- a/template-processors/base/bin/processTemplates.sh +++ b/template-processors/base/bin/processTemplates.sh @@ -18,5 +18,4 @@ set -euxo pipefail echo Processing Templates -# shellcheck disable=SC2086 -cp -R "$CLONED_TEMPLATE_GIT_DIR/"* "$MANIFEST_DIR"/ +cp -R "${CLONED_TEMPLATE_GIT_DIR}/." "${MANIFEST_DIR}/" diff --git a/template-processors/base/bin/resourceManager.sh b/template-processors/base/bin/resourceManager.sh index e4d9228f..b0bd981d 100755 --- a/template-processors/base/bin/resourceManager.sh +++ b/template-processors/base/bin/resourceManager.sh @@ -44,7 +44,7 @@ function addLabels() { local timestamp="$2" local tmpdir="$(mktemp -d)" # shellcheck disable=SC2044 - for file in $(find "$MANIFEST_DIR" -iregex '.*\.\(ya?ml\|json\)'); do + for file in $(find "$MANIFEST_DIR" -regextype posix-extended -iregex '.*\.(ya?ml|json)'); do cat "$file" | yq -y -s "map(select(.!=null)|setpath([\"metadata\",\"labels\",\"$TAG_OWNER\"]; \"$owner\"))|.[]" | yq -y -s "map(select(.!=null)|setpath([\"metadata\",\"labels\",\"$TAG_APPLIED\"]; \"$timestamp\"))|.[]" \ @@ -80,6 +80,16 @@ function deleteByOldLabels() { function createUpdateResources() { local owner="$1" local timestamp="$(date +%s)" + # Check if directory contains only hidden files like .gitkeep, or .gitignore. + # This would mean that user purposefully wanted to track an empty directory in git. + # https://git.wiki.kernel.org/index.php/Git_FAQ#Can_I_add_empty_directories.3F + if [[ -z $(ls "${MANIFEST_DIR}") ]]; then + echo "Manifest directory empty, skipping" + return + elif [[ -z $(find "$MANIFEST_DIR" -regextype posix-extended -iregex '.*\.(ya?ml|json)') ]]; then + echo "ERROR - no files with .yaml, .yml, or .json extension in manifest directory" + exit 1 + fi case "$CREATE_MODE" in Apply) addLabels "$owner" "$timestamp" From 6cd133f3e3ee1964965c28b649ea8105ef896e59 Mon Sep 17 00:00:00 2001 From: Szymon Bar Date: Tue, 18 Feb 2020 10:36:54 +0100 Subject: [PATCH 2/4] test/e2e: add nonexistent and empty dirs test Add test to check if eunomia acts properly when user specifies a nonexistent TemplateSource ContextDir, or ContextDir with a hidden file (e.g. ".gitkeep"). --- test/e2e/issue276_test.go | 263 +++++++++++++++++++++ test/e2e/testdata/empty-directory/.gitkeep | 0 2 files changed, 263 insertions(+) create mode 100644 test/e2e/issue276_test.go create mode 100644 test/e2e/testdata/empty-directory/.gitkeep diff --git a/test/e2e/issue276_test.go b/test/e2e/issue276_test.go new file mode 100644 index 00000000..4a7268f0 --- /dev/null +++ b/test/e2e/issue276_test.go @@ -0,0 +1,263 @@ +// +build e2e + +/* +Copyright 2020 Kohl's Department Stores, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "os" + "strings" + "testing" + "time" + + framework "github.com/operator-framework/operator-sdk/pkg/test" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/KohlsTechnology/eunomia/pkg/apis" + gitopsv1alpha1 "github.com/KohlsTechnology/eunomia/pkg/apis/eunomia/v1alpha1" +) + +// TestIssue276NoTemplatesDir verifies that job's pod fails in such a +// way that it prints a custom error message to output when user passes +// nonexistent TemplateSource ContextDir. Then, the test also verifies +// that the Custom Recource is successfully deleted +func TestIssue276NoTemplatesDir(t *testing.T) { + if testing.Short() { + // FIXME: as of writing this test, "backoffLimit" in job.yaml is set to 4, + // which means eunomia will wait to launch deletion Job until 5 Pod retries + // fail, eventually triggering the origninal Job's failure; the back-off + // time between the runs is unfortunately exponential and non-configurable, + // which makes this test awfully long. Try to at least make it possible to + // run in parallel with other tests. + t.Skip("This test currently takes minutes to run, because of exponential backoff in kubernetes") + } + + ctx := framework.NewTestCtx(t) + defer ctx.Cleanup() + + namespace, err := ctx.GetNamespace() + if err != nil { + t.Fatalf("could not get namespace: %v", err) + } + if err = SetupRbacInNamespace(namespace); err != nil { + t.Error(err) + } + + defer DumpJobsLogsOnError(t, framework.Global, namespace) + err = framework.AddToFrameworkScheme(apis.AddToScheme, &gitopsv1alpha1.GitOpsConfigList{}) + if err != nil { + t.Fatal(err) + } + + eunomiaURI, found := os.LookupEnv("EUNOMIA_URI") + if !found { + eunomiaURI = "https://github.com/kohlstechnology/eunomia" + } + eunomiaRef, found := os.LookupEnv("EUNOMIA_REF") + if !found { + eunomiaRef = "master" + } + + // Step 1: create a CR with a nonexistent TemplateSource ContextDir + + const noDir = "test/e2e/testdata/no-directory" + gitops := &gitopsv1alpha1.GitOpsConfig{ + TypeMeta: metav1.TypeMeta{ + Kind: "GitOpsConfig", + APIVersion: "eunomia.kohls.io/v1alpha1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "gitops-issue276", + Namespace: namespace, + Finalizers: []string{ + "gitopsconfig.eunomia.kohls.io/finalizer", + }, + }, + Spec: gitopsv1alpha1.GitOpsConfigSpec{ + TemplateSource: gitopsv1alpha1.GitConfig{ + URI: eunomiaURI, + Ref: eunomiaRef, + ContextDir: noDir, + }, + ParameterSource: gitopsv1alpha1.GitConfig{ + URI: eunomiaURI, + Ref: eunomiaRef, + ContextDir: "test/e2e/testdata/empty-yaml", + }, + Triggers: []gitopsv1alpha1.GitOpsTrigger{ + {Type: "Change"}, + }, + TemplateProcessorImage: "quay.io/kohlstechnology/eunomia-base:dev", + ResourceHandlingMode: "Apply", + ResourceDeletionMode: "Delete", + ServiceAccountRef: "eunomia-operator", + }, + } + gitops.Annotations = map[string]string{"gitopsconfig.eunomia.kohls.io/initialized": "true"} + + err = framework.Global.Client.Create(context.TODO(), gitops, &framework.CleanupOptions{TestContext: ctx, Timeout: timeout, RetryInterval: retryInterval}) + if err != nil { + t.Fatal(err) + } + + // Step 2: Wait until Job's pod fails and check if it printed a clear error message + + const name = "gitopsconfig-gitops-issue276-" + err = wait.Poll(retryInterval, timeout, func() (done bool, err error) { + pod, err := GetPod(namespace, name, "quay.io/kohlstechnology/eunomia-base:dev", framework.Global.KubeClient) + switch { + case apierrors.IsNotFound(err): + t.Logf("Waiting for availability of %s pod", name) + return false, nil + case err != nil: + return false, err + case pod != nil && pod.Status.Phase == "Failed": + logs, err := GetPodLogs(pod, framework.Global.KubeClient) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(logs, fmt.Sprintf("ERROR - directory %s does not exist in the remote repository", noDir)) { + t.Fatalf("Pod %s failed in an unexpected way; logs:\n%s", pod.Name, logs) + } + return true, nil + case pod != nil: + t.Logf("Waiting for error in pod %s; status: %s", pod.Name, debugJSON(pod.Status)) + return false, nil + default: + t.Logf("Waiting for error in pod %s", pod.Name) + return false, nil + } + }) + if err != nil { + t.Error(err) + } + + // Step 3: Delete GitOpsConfig and make sure that the deletion succeeded + + t.Logf("Deleting CR") + err = framework.Global.Client.Delete(context.TODO(), gitops) + if err != nil { + t.Fatal(err) + } + + // Three minutes timeout. + err = WaitForPodAbsence(t, framework.Global, namespace, name, "quay.io/kohlstechnology/eunomia-base:dev", retryInterval, 3*time.Minute) + if err != nil { + t.Error(err) + } +} + +// TestIssue276EmptyTemplatesDir verifies that job succeeds when user +// passes a TemplateSource directory containing no resource files, but +// only a single ".gitkeep" file +func TestIssue276EmptyTemplatesDir(t *testing.T) { + ctx := framework.NewTestCtx(t) + defer ctx.Cleanup() + + namespace, err := ctx.GetNamespace() + if err != nil { + t.Fatalf("could not get namespace: %v", err) + } + if err = SetupRbacInNamespace(namespace); err != nil { + t.Error(err) + } + + defer DumpJobsLogsOnError(t, framework.Global, namespace) + err = framework.AddToFrameworkScheme(apis.AddToScheme, &gitopsv1alpha1.GitOpsConfigList{}) + if err != nil { + t.Fatal(err) + } + + eunomiaURI, found := os.LookupEnv("EUNOMIA_URI") + if !found { + eunomiaURI = "https://github.com/kohlstechnology/eunomia" + } + eunomiaRef, found := os.LookupEnv("EUNOMIA_REF") + if !found { + eunomiaRef = "master" + } + + // Step 1: create a CR with a TemplateSource ContextDir containing only a ".gitkeep" file + + gitops := &gitopsv1alpha1.GitOpsConfig{ + TypeMeta: metav1.TypeMeta{ + Kind: "GitOpsConfig", + APIVersion: "eunomia.kohls.io/v1alpha1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "gitops-issue276", + Namespace: namespace, + Finalizers: []string{ + "gitopsconfig.eunomia.kohls.io/finalizer", + }, + }, + Spec: gitopsv1alpha1.GitOpsConfigSpec{ + TemplateSource: gitopsv1alpha1.GitConfig{ + URI: eunomiaURI, + Ref: eunomiaRef, + ContextDir: "test/e2e/testdata/empty-directory", + }, + ParameterSource: gitopsv1alpha1.GitConfig{ + URI: eunomiaURI, + Ref: eunomiaRef, + ContextDir: "test/e2e/testdata/empty-yaml", + }, + Triggers: []gitopsv1alpha1.GitOpsTrigger{ + {Type: "Change"}, + }, + TemplateProcessorImage: "quay.io/kohlstechnology/eunomia-base:dev", + ResourceHandlingMode: "Apply", + ResourceDeletionMode: "Delete", + ServiceAccountRef: "eunomia-operator", + }, + } + gitops.Annotations = map[string]string{"gitopsconfig.eunomia.kohls.io/initialized": "true"} + + err = framework.Global.Client.Create(context.TODO(), gitops, &framework.CleanupOptions{TestContext: ctx, Timeout: timeout, RetryInterval: retryInterval}) + if err != nil { + t.Fatal(err) + } + + // Step 2: Wait until Job's pod succeeds + + err = wait.Poll(retryInterval, timeout, func() (done bool, err error) { + const name = "gitopsconfig-gitops-issue276-" + pod, err := GetPod(namespace, name, "quay.io/kohlstechnology/eunomia-base:dev", framework.Global.KubeClient) + switch { + case apierrors.IsNotFound(err): + t.Logf("Waiting for availability of %s pod", name) + return false, nil + case err != nil: + return false, err + case pod != nil && pod.Status.Phase == "Succeeded": + return true, nil + case pod != nil: + t.Logf("Waiting for pod %s to succeed; status: %s", pod.Name, debugJSON(pod.Status)) + return false, nil + default: + t.Logf("Waiting for pod %s", name) + return false, nil + } + }) + if err != nil { + t.Error(err) + } +} diff --git a/test/e2e/testdata/empty-directory/.gitkeep b/test/e2e/testdata/empty-directory/.gitkeep new file mode 100644 index 00000000..e69de29b From 213d80eac819246c199dca8b10a611c8d6b17627 Mon Sep 17 00:00:00 2001 From: Szymon Bar Date: Tue, 18 Feb 2020 11:40:56 +0100 Subject: [PATCH 3/4] gitopsconfig: make sure bad img blocked job Double-check if the existing job is blocked because of bad image. Add checking if there is only one active pod of a job, which occurs when nonexistent or wrong image blocks the job. Thanks to it only such case will fall under the if code block. Before, if there was a "normal" fail of a job (i.e. code inside container exited 1), and there were a few failed pods, this would fall under the if block, and jobContainerStatus function would return an error because the job had a few pods. --- pkg/controller/gitopsconfig/controller.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/controller/gitopsconfig/controller.go b/pkg/controller/gitopsconfig/controller.go index d14d954b..46af8ebb 100644 --- a/pkg/controller/gitopsconfig/controller.go +++ b/pkg/controller/gitopsconfig/controller.go @@ -422,7 +422,8 @@ func (r *Reconciler) manageDeletion(instance *gitopsv1alpha1.GitOpsConfig) (reco // assume the GitOpsConfig never managed to successfully deploy, so we can // just delete the job, remove the finalizer, and be done (#216). It may be // either action=create or action=delete job. - if len(jobs) == 1 { + // If a job is blocked because of bad image, it only has one active pod + if len(jobs) == 1 && jobs[0].Status.Succeeded == 0 && jobs[0].Status.Failed == 0 && jobs[0].Status.Active == 1 { status, err := jobContainerStatus(context.TODO(), r.client, &jobs[0]) if err != nil { log.Error(err, "GitOpsConfig finalizer unable to get job pod's status", "instance", instance.Name) From 1b7e27b1f606a3171ca76a07b903d830ac6a89e6 Mon Sep 17 00:00:00 2001 From: Szymon Bar Date: Tue, 18 Feb 2020 11:53:41 +0100 Subject: [PATCH 4/4] test/e2e/util: add GetPodLogs function Add GetPodLogs function to test utilities for the developer to be able to dump logs of a given pod. --- test/e2e/util.go | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/test/e2e/util.go b/test/e2e/util.go index bd72db92..bab3edbb 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -23,7 +23,7 @@ import ( goctx "context" "encoding/json" "fmt" - "io" + "io/ioutil" "os/exec" "path/filepath" "runtime" @@ -61,6 +61,21 @@ func GetPod(namespace, namePrefix, containsImage string, kubeclient kubernetes.I return nil, nil } +// GetPodLogs retrieves logs of a given pod +func GetPodLogs(pod *v1.Pod, kubeclient kubernetes.Interface) (string, error) { + req := kubeclient.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{Timestamps: true}) + logs, err := req.Stream() + if err != nil { + return "", xerrors.Errorf("could not get logs for pod %q: %w", pod.Name, err) + } + defer logs.Close() + b, err := ioutil.ReadAll(logs) + if err != nil { + return "", xerrors.Errorf("could not get logs for pod %q: %w", pod.Name, err) + } + return string(b), nil +} + // WaitForPod retrieves a specific pod with a known name and namespace and waits for it to be running and available func WaitForPod(t *testing.T, f *framework.Framework, namespace, name string, retryInterval, timeout time.Duration) error { err := wait.Poll(retryInterval, timeout, func() (done bool, err error) { @@ -151,8 +166,7 @@ func DumpJobsLogsOnError(t *testing.T, f *framework.Framework, namespace string) if !t.Failed() { return } - pods := f.KubeClient.CoreV1().Pods(namespace) - podsList, err := pods.List(metav1.ListOptions{}) + podsList, err := f.KubeClient.CoreV1().Pods(namespace).List(metav1.ListOptions{}) if err != nil { t.Logf("failed to list pods in namespace %s: %s", namespace, err) return @@ -169,20 +183,12 @@ func DumpJobsLogsOnError(t *testing.T, f *framework.Framework, namespace string) continue } // Retrieve pod's logs - req := pods.GetLogs(p.Name, &v1.PodLogOptions{Timestamps: true}) - logs, err := req.Stream() - if err != nil { - t.Logf("failed to retrieve logs for pod %s: %s", p.Name, err) - continue - } - buf := &bytes.Buffer{} - _, err = io.Copy(buf, logs) - logs.Close() + logs, err := GetPodLogs(&p, f.KubeClient) if err != nil { t.Logf("failed to retrieve logs for pod %s: %s", p.Name, err) continue } - t.Logf("================ POD LOGS FOR %s ================\n%s\n\n", p.Name, buf.String()) + t.Logf("================ POD LOGS FOR %s ================\n%s\n\n", p.Name, logs) } }