Skip to content

Commit 8b21497

Browse files
fix: race condition for pre-existing StackConfigPolicy (#8928)
* fix: race condition for pre-existing StackConfigPolicy * fix: move namespace checks first in DoesPolicyMatchObject * fix: log and ignore err of DoesPolicyMatchObject * fix: reword lcnChecker to licenseChecker * fix: reword code comment about file-setting handling * doc: add comment about continue on err from stackconfigpolicy.DoesPolicyMatchObject * fix: check if file-settings secret already exists in maybeReconcileEmptyFileSettingsSecret * fix: reword file-setting secret missing requeue reason * fix: move operator namespace a constant to a const in Test_maybeReconcileEmptyFileSettingsSecret * fix: add unit-test to validate that we don't requeue if file-settings secret exists * fix: remove duplicate keys from error logging of stackconfigpolicy.DoesPolicyMatchObject * fix: perform first the file-setting secret existence check in maybeReconcileEmptyFileSettingsSecret --------- Co-authored-by: Michael Morello <[email protected]>
1 parent f2dbc38 commit 8b21497

File tree

5 files changed

+460
-11
lines changed

5 files changed

+460
-11
lines changed

pkg/controller/elasticsearch/driver/driver.go

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
commonv1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/common/v1"
2424
esv1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/elasticsearch/v1"
25+
policyv1alpha1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/stackconfigpolicy/v1alpha1"
2526
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/association"
2627
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/common"
2728
commondriver "github.com/elastic/cloud-on-k8s/v3/pkg/controller/common/driver"
@@ -52,6 +53,7 @@ import (
5253
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/settings"
5354
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/stackmon"
5455
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/user"
56+
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/stackconfigpolicy"
5557
"github.com/elastic/cloud-on-k8s/v3/pkg/dev"
5658
"github.com/elastic/cloud-on-k8s/v3/pkg/utils/k8s"
5759
ulog "github.com/elastic/cloud-on-k8s/v3/pkg/utils/log"
@@ -351,11 +353,17 @@ func (d *defaultDriver) Reconcile(ctx context.Context) *reconciler.Results {
351353
return results.WithError(err)
352354
}
353355

354-
// reconcile an empty File based settings Secret if it doesn't exist
355356
if d.Version.GTE(filesettings.FileBasedSettingsMinPreVersion) {
356-
err = filesettings.ReconcileEmptyFileSettingsSecret(ctx, d.Client, d.ES, true)
357+
requeue, err := maybeReconcileEmptyFileSettingsSecret(ctx, d.Client, d.LicenseChecker, &d.ES, d.OperatorParameters.OperatorNamespace)
357358
if err != nil {
358359
return results.WithError(err)
360+
} else if requeue {
361+
results.WithReconciliationState(
362+
defaultRequeue.WithReason(
363+
fmt.Sprintf("This cluster is targeted by at least one StackConfigPolicy, expecting Secret %s to be created by StackConfigPolicy controller",
364+
esv1.FileSettingsSecretName(d.ES.Name)),
365+
),
366+
)
359367
}
360368
}
361369

@@ -417,6 +425,64 @@ func (d *defaultDriver) Reconcile(ctx context.Context) *reconciler.Results {
417425
return results.WithResults(d.reconcileNodeSpecs(ctx, esReachable, esClient, d.ReconcileState, *resourcesState, keystoreResources, meta))
418426
}
419427

428+
// maybeReconcileEmptyFileSettingsSecret reconciles an empty file-settings secret for this ES cluster
429+
// based on license status and StackConfigPolicy targeting. When enterprise features are disabled always
430+
// creates an empty file-settings secret. When enterprise features are enabled and at least one StackConfigPolicy
431+
// targets this cluster returns true to requeue and doesn't create the empty file-settings secret. If no
432+
// StackConfigPolicy targets this cluster it creates an empty file-settings secret. Note: This logic here prevents
433+
// the race condition described in https://github.com/elastic/cloud-on-k8s/issues/8912.
434+
func maybeReconcileEmptyFileSettingsSecret(ctx context.Context, c k8s.Client, licenseChecker commonlicense.Checker, es *esv1.Elasticsearch, operatorNamespace string) (bool, error) {
435+
// Check if file-settings secret already exists
436+
var currentSecret corev1.Secret
437+
if err := c.Get(ctx, types.NamespacedName{Namespace: es.Namespace, Name: esv1.FileSettingsSecretName(es.Name)}, &currentSecret); err == nil {
438+
// Secret does exist
439+
return false, nil
440+
} else if !k8serrors.IsNotFound(err) {
441+
return false, err
442+
}
443+
444+
log := ulog.FromContext(ctx)
445+
enabled, err := licenseChecker.EnterpriseFeaturesEnabled(ctx)
446+
if err != nil {
447+
return false, err
448+
}
449+
if !enabled {
450+
// If the license is not enabled, we reconcile the empty file-settings secret
451+
return false, filesettings.ReconcileEmptyFileSettingsSecret(ctx, c, *es, true)
452+
}
453+
454+
// Get all StackConfigPolicies in the cluster
455+
var policyList policyv1alpha1.StackConfigPolicyList
456+
if err := c.List(ctx, &policyList); err != nil {
457+
return false, err
458+
}
459+
460+
// Check each policy to see if it targets this ES cluster
461+
for _, policy := range policyList.Items {
462+
// Check if this policy's namespace and label selector match this ES cluster
463+
matches, err := stackconfigpolicy.DoesPolicyMatchObject(&policy, es, operatorNamespace)
464+
if err != nil {
465+
// Do not return an err here as this potentially can block ES reconciliation if any SCP in the cluster
466+
// has an invalid label selector, even if it doesn't target the current elasticsearch cluster.
467+
log.Error(err, "Failed to check if StackConfigPolicy matches object", "scp_name", policy.Name, "scp_namespace", policy.Namespace)
468+
continue
469+
} else if !matches {
470+
continue
471+
}
472+
473+
// Found a policy that targets this ES cluster but the file-settings secret does not exist.
474+
// Let the SCP controller manage it, however, return requeue true to handle the following edge case:
475+
// 1. SCP exists and targets ES cluster at creation time
476+
// 2. ES controller "defers" to SCP controller (doesn't create secret)
477+
// 3. SCP is deleted before it reconciles and creates the file-settings secret
478+
// 4. Result: ES cluster left without any file-settings secret
479+
return true, nil
480+
}
481+
482+
// No policies target this cluster, so ES controller should create the empty secret
483+
return false, filesettings.ReconcileEmptyFileSettingsSecret(ctx, c, *es, true)
484+
}
485+
420486
// apiKeyStoreSecretSource returns the Secret that holds the remote API keys, and which should be used as a secure settings source.
421487
func apiKeyStoreSecretSource(ctx context.Context, es *esv1.Elasticsearch, c k8s.Client) ([]commonv1.NamespacedSecretSource, error) {
422488
// Check if Secret exists

0 commit comments

Comments
 (0)