From 8e72748be29488f0bb8b8f77e6c9a034f5bc1d4e Mon Sep 17 00:00:00 2001 From: parth-gr Date: Tue, 25 Jun 2024 20:41:56 +0530 Subject: [PATCH 1/6] doc: add upgrade steps for external cluster curently if user want to utilise the new flags with upgrade user dont have the ability for it Add the steps so user can make use of it closes: https://github.com/rook/rook/issues/14368 Signed-off-by: parth-gr --- .../external-cluster/external-cluster.md | 73 ++++++++++++++----- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/Documentation/CRDs/Cluster/external-cluster/external-cluster.md b/Documentation/CRDs/Cluster/external-cluster/external-cluster.md index f570991750d9..adc7405f2545 100644 --- a/Documentation/CRDs/Cluster/external-cluster/external-cluster.md +++ b/Documentation/CRDs/Cluster/external-cluster/external-cluster.md @@ -121,25 +121,6 @@ The storageclass is used to create a volume in the pool matching the topology wh For more details, see the [Topology-Based Provisioning](topology-for-external-mode.md) -### Upgrade Example - -1. If consumer cluster doesn't have restricted caps, this will upgrade all the default csi-users (non-restricted): - - ```console - python3 create-external-cluster-resources.py --upgrade - ``` - -2. If the consumer cluster has restricted caps: -Restricted users created using `--restricted-auth-permission` flag need to pass mandatory flags: '`--rbd-data-pool-name`(if it is a rbd user), `--k8s-cluster-name` and `--run-as-user`' flags while upgrading, in case of cephfs users if you have passed `--cephfs-filesystem-name` flag while creating csi-users then while upgrading it will be mandatory too. In this example the user would be `client.csi-rbd-node-rookstorage-replicapool` (following the pattern `csi-user-clusterName-poolName`) - - ```console - python3 create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool - ``` - -!!! note - An existing non-restricted user cannot be converted to a restricted user by upgrading. - The upgrade flag should only be used to append new permissions to users. It shouldn't be used for changing a csi user already applied permissions. For example, you shouldn't change the pool(s) a user has access to. - ### Admin privileges If in case the cluster needs the admin keyring to configure, update the admin key `rook-ceph-mon` secret with client.admin keyring @@ -305,3 +286,57 @@ you can export the settings from this cluster with the following steps. !!! important For other clusters to connect to storage in this cluster, Rook must be configured with a networking configuration that is accessible from other clusters. Most commonly this is done by enabling host networking in the CephCluster CR so the Ceph daemons will be addressable by their host IPs. + +## Upgrades + +Upgrading the cluster would be different for restricted caps and non-restricted caps, + +1. If consumer cluster doesn't have restricted caps, this will upgrade all the default CSI users (non-restricted) + + ```console + python3 create-external-cluster-resources.py --upgrade + ``` + +2. If the consumer cluster has restricted caps + + Restricted users created using `--restricted-auth-permission` flag need to pass mandatory flags: '`--rbd-data-pool-name`(if it is a rbd user), `--k8s-cluster-name` and `--run-as-user`' flags while upgrading, in case of cephfs users if you have passed `--cephfs-filesystem-name` flag while creating CSI users then while upgrading it will be mandatory too. In this example the user would be `client.csi-rbd-node-rookstorage-replicapool` (following the pattern `csi-user-clusterName-poolName`) + + ```console + python3 create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool + ``` + + !!! note + 1) An existing non-restricted user cannot be converted to a restricted user by upgrading. + 2) The upgrade flag should only be used to append new permissions to users. It shouldn't be used for changing a CSI user already applied permissions. For example, be careful not to change pools(s) that a user has access to. + +### Upgrade cluster to utilize new feature + +Some Rook upgrades may require re-running the import steps, or may introduce new external cluster features that can be most easily enabled by re-running the import steps. + +To re-run the import steps with new options, the python script should be re-run using the same configuration options that were used for past invocations, plus the configurations that are being added or modified. + +Starting with Rook v1.15, the script stores the configuration in the external-cluster-user-command configmap for easy future reference. + +* arg: Exact arguments that were used for for processing the script. Argument that are decided using the Priority: command-line-args > config.ini file values > default values. + +#### Example `external-cluster-user-command` ConfigMap: + +1. Get the last-applied config, if its available + + ```console + $ kubectl get configmap -namespace rook-ceph external-cluster-user-command --output jsonpath='{.data.args}' + ``` + +2. Copy the output to config.ini + +3. Make any desired modifications and additions to `config.ini`` + +4. Run the python script again using the [config file](#config-file) + +5. [Copy the bash output](#2-copy-the-bash-output) + +6. Run the steps under [import-the-source-data](#import-the-source-data) + +!!! warning + If the last-applied config is unavailable, run the current version of the script again using previously-applied config and CLI flags. + Failure to reuse the same configuration options when re-invoking the python script can result in unexpected changes when re-running the import script. From 4cf349836f05ae25b66c52b1578b1fa75391205a Mon Sep 17 00:00:00 2001 From: Travis Nielsen Date: Thu, 25 Jul 2024 12:24:43 -0600 Subject: [PATCH 2/6] tests: check for correct base rook version during upgrade The daily upgrade tests were checking for a specific version of the rook image instead of the local_build tag that is set as the rook base image since 14486. Signed-off-by: Travis Nielsen --- tests/integration/ceph_upgrade_test.go | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/integration/ceph_upgrade_test.go b/tests/integration/ceph_upgrade_test.go index 3500211c4ca0..94b4fcb794a5 100644 --- a/tests/integration/ceph_upgrade_test.go +++ b/tests/integration/ceph_upgrade_test.go @@ -105,11 +105,12 @@ func (s *UpgradeSuite) TestUpgradeHelm() { } func (s *UpgradeSuite) testUpgrade(useHelm bool, initialCephVersion v1.CephVersionSpec) { - s.baseSetup(useHelm, installer.Version1_14, initialCephVersion) + baseRookImage := installer.Version1_14 + s.baseSetup(useHelm, baseRookImage, initialCephVersion) objectUserID := "upgraded-user" preFilename := "pre-upgrade-file" - numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(objectUserID, preFilename) + numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(baseRookImage, objectUserID, preFilename) clusterInfo := client.AdminTestClusterInfo(s.namespace) requireBlockImagesRemoved := false @@ -183,12 +184,13 @@ func (s *UpgradeSuite) testUpgrade(useHelm bool, initialCephVersion v1.CephVersi } func (s *UpgradeSuite) TestUpgradeCephToQuincyDevel() { - s.baseSetup(false, installer.LocalBuildTag, installer.QuincyVersion) + baseRookImage := installer.LocalBuildTag + s.baseSetup(false, baseRookImage, installer.QuincyVersion) objectUserID := "upgraded-user" preFilename := "pre-upgrade-file" s.settings.CephVersion = installer.QuincyVersion - numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(objectUserID, preFilename) + numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(baseRookImage, objectUserID, preFilename) clusterInfo := client.AdminTestClusterInfo(s.namespace) requireBlockImagesRemoved := false defer func() { @@ -216,12 +218,13 @@ func (s *UpgradeSuite) TestUpgradeCephToQuincyDevel() { } func (s *UpgradeSuite) TestUpgradeCephToReefDevel() { - s.baseSetup(false, installer.LocalBuildTag, installer.ReefVersion) + baseRookImage := installer.LocalBuildTag + s.baseSetup(false, baseRookImage, installer.ReefVersion) objectUserID := "upgraded-user" preFilename := "pre-upgrade-file" s.settings.CephVersion = installer.ReefVersion - numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(objectUserID, preFilename) + numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(baseRookImage, objectUserID, preFilename) clusterInfo := client.AdminTestClusterInfo(s.namespace) requireBlockImagesRemoved := false defer func() { @@ -249,12 +252,13 @@ func (s *UpgradeSuite) TestUpgradeCephToReefDevel() { } func (s *UpgradeSuite) TestUpgradeCephToSquidDevel() { - s.baseSetup(false, installer.LocalBuildTag, installer.SquidVersion) + baseRookImage := installer.LocalBuildTag + s.baseSetup(false, baseRookImage, installer.SquidVersion) objectUserID := "upgraded-user" preFilename := "pre-upgrade-file" s.settings.CephVersion = installer.SquidVersion - numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(objectUserID, preFilename) + numOSDs, rbdFilesToRead, cephfsFilesToRead := s.deployClusterforUpgrade(baseRookImage, objectUserID, preFilename) clusterInfo := client.AdminTestClusterInfo(s.namespace) requireBlockImagesRemoved := false defer func() { @@ -281,7 +285,7 @@ func (s *UpgradeSuite) TestUpgradeCephToSquidDevel() { checkCephObjectUser(&s.Suite, s.helper, s.k8sh, s.namespace, installer.ObjectStoreName, objectUserID, true, false) } -func (s *UpgradeSuite) deployClusterforUpgrade(objectUserID, preFilename string) (int, []string, []string) { +func (s *UpgradeSuite) deployClusterforUpgrade(baseRookImage, objectUserID, preFilename string) (int, []string, []string) { // // Create block, object, and file storage before the upgrade // The helm chart already created these though. @@ -330,7 +334,7 @@ func (s *UpgradeSuite) deployClusterforUpgrade(objectUserID, preFilename string) require.True(s.T(), created) // verify that we're actually running the right pre-upgrade image - s.verifyOperatorImage(installer.Version1_14) + s.verifyOperatorImage(baseRookImage) assert.NoError(s.T(), s.k8sh.WriteToPod("", rbdPodName, preFilename, simpleTestMessage)) assert.NoError(s.T(), s.k8sh.ReadFromPod("", rbdPodName, preFilename, simpleTestMessage)) From 0952e36ba1633a264ab523a75a00e78830e261d9 Mon Sep 17 00:00:00 2001 From: nicofnt <16099943+nicofnt@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:20:28 +0200 Subject: [PATCH 3/6] docs: add new topic in troubleshooting guide for the LimitNOFILE issue Create new topic in the troubleshooting guide to explain how to fix a new rook-ceph deployment that could remain in an unhealthy state or fails to config because of the LimitNOFILE value specified in the containerd.service configuration. Signed-off-by: Nicola Fantacuzzi --- .../Troubleshooting/ceph-common-issues.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Documentation/Troubleshooting/ceph-common-issues.md b/Documentation/Troubleshooting/ceph-common-issues.md index b7735e773375..8cca5195b43f 100644 --- a/Documentation/Troubleshooting/ceph-common-issues.md +++ b/Documentation/Troubleshooting/ceph-common-issues.md @@ -67,6 +67,9 @@ title: Ceph Common Issues - [Symptoms](#symptoms-11) - [Investigation](#investigation-7) - [Solution](#solution-12) +- [The cluster is in an unhealthy state or fails to configure when LimitNOFILE=infinity in containerd](#the-cluster-is-in-an-unhealthy-state-or-fails-to-configure-when-limitnofileinfinity-in-containerd) + - [Symptoms](#symptoms-12) + - [Solution](#solution-13) Many of these problem cases are hard to summarize down to a short phrase that adequately describes the problem. Each problem will start with a bulleted list of symptoms. Keep in mind that all symptoms may not apply depending on the configuration of Rook. If the majority of the symptoms are seen there is a fair chance you are experiencing that problem. @@ -774,3 +777,36 @@ data: {} ``` If the ConfigMap exists, remove any keys that you wish to configure through the environment. + +## The cluster is in an unhealthy state or fails to configure when LimitNOFILE=infinity in containerd + +### Symptoms + +When trying to create a new deployment, Ceph mons keep crashing and the cluster fails to configure or remains in an unhealthy state. The nodes' CPUs are stuck at 100%. + +```console +NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH EXTERNAL FSID +rook-ceph /var/lib/rook 3 4m6s Ready Failed to configure ceph cluster HEALTH_ERR +``` + +### Solution + +Before systemd v240, systemd would leave `fs.nr_open` as-is because it had no mechanism to set a safe upper limit for it. The kernel hard-coded value for the default number of max open files is **1048576**. Starting from systemd v240, when `LimitNOFILE=infinity` is specified in the containerd.service configuration, this value will typically be set to **~1073741816** (INT_MAX for x86_64 divided by two). + +To fix this, set LimitNOFILE in the systemd service configuration to **1048576**. + +Create an override.conf file with the new LimitNOFILE value: + +```console +$ vim /etc/systemd/system/containerd.service.d/override.conf +[Service] +LimitNOFILE=1048576 +``` + +Reload systemd manager configuration, restart containerd and restart all monitors deployments: + +```console +$ systemctl daemon-reload +$ systemctl restart containerd +$ kubectl rollout restart deployment rook-ceph-mon-a rook-ceph-mon-b rook-ceph-mon-c -n rook-ceph +``` From ed74d22f1f5f1dd3f46ca7f60de2410f0efceea9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:38:51 +0000 Subject: [PATCH 4/6] build(deps): bump the github-dependencies group with 3 updates Bumps the github-dependencies group with 3 updates: [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go), [github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring](https://github.com/prometheus-operator/prometheus-operator) and [github.com/prometheus-operator/prometheus-operator/pkg/client](https://github.com/prometheus-operator/prometheus-operator). Updates `github.com/aws/aws-sdk-go` from 1.54.20 to 1.55.3 - [Release notes](https://github.com/aws/aws-sdk-go/releases) - [Commits](https://github.com/aws/aws-sdk-go/compare/v1.54.20...v1.55.3) Updates `github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring` from 0.75.1 to 0.75.2 - [Release notes](https://github.com/prometheus-operator/prometheus-operator/releases) - [Changelog](https://github.com/prometheus-operator/prometheus-operator/blob/main/CHANGELOG.md) - [Commits](https://github.com/prometheus-operator/prometheus-operator/compare/v0.75.1...v0.75.2) Updates `github.com/prometheus-operator/prometheus-operator/pkg/client` from 0.75.1 to 0.75.2 - [Release notes](https://github.com/prometheus-operator/prometheus-operator/releases) - [Changelog](https://github.com/prometheus-operator/prometheus-operator/blob/main/CHANGELOG.md) - [Commits](https://github.com/prometheus-operator/prometheus-operator/compare/v0.75.1...v0.75.2) --- updated-dependencies: - dependency-name: github.com/aws/aws-sdk-go dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github-dependencies - dependency-name: github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github-dependencies - dependency-name: github.com/prometheus-operator/prometheus-operator/pkg/client dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github-dependencies ... Signed-off-by: dependabot[bot] --- go.mod | 6 +++--- go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index d03bf16e5004..b6b1c9aa1121 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ replace ( require ( github.com/IBM/keyprotect-go-client v0.14.3 - github.com/aws/aws-sdk-go v1.54.20 + github.com/aws/aws-sdk-go v1.55.3 github.com/banzaicloud/k8s-objectmatcher v1.8.0 github.com/ceph/go-ceph v0.28.0 github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8 @@ -30,8 +30,8 @@ require ( github.com/kube-object-storage/lib-bucket-provisioner v0.0.0-20221122204822-d1a8c34382f1 github.com/libopenstorage/secrets v0.0.0-20240416031220-a17cf7f72c6c github.com/pkg/errors v0.9.1 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.1 - github.com/prometheus-operator/prometheus-operator/pkg/client v0.75.1 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 + github.com/prometheus-operator/prometheus-operator/pkg/client v0.75.2 github.com/rook/rook/pkg/apis v0.0.0-20231204200402-5287527732f7 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 diff --git a/go.sum b/go.sum index 9ff972827c03..0716004f7e2c 100644 --- a/go.sum +++ b/go.sum @@ -144,8 +144,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkY github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/aws/aws-sdk-go v1.44.164/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= -github.com/aws/aws-sdk-go v1.54.20 h1:FZ2UcXya7bUkvkpf7TaPmiL7EubK0go1nlXGLRwEsoo= -github.com/aws/aws-sdk-go v1.54.20/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= +github.com/aws/aws-sdk-go v1.55.3 h1:0B5hOX+mIx7I5XPOrjrHlKSDQV/+ypFZpIHOx5LOk3E= +github.com/aws/aws-sdk-go v1.55.3/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= github.com/banzaicloud/k8s-objectmatcher v1.8.0 h1:Nugn25elKtPMTA2br+JgHNeSQ04sc05MDPmpJnd1N2A= github.com/banzaicloud/k8s-objectmatcher v1.8.0/go.mod h1:p2LSNAjlECf07fbhDyebTkPUIYnU05G+WfGgkTmgeMg= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= @@ -770,11 +770,11 @@ github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4 github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.44.1/go.mod h1:3WYi4xqXxGGXWDdQIITnLNmuDzO5n6wYva9spVhR4fg= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.46.0/go.mod h1:3WYi4xqXxGGXWDdQIITnLNmuDzO5n6wYva9spVhR4fg= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.1 h1:+iiljhJV6niK7MuifJs/n3NeLxikd85nrQfn53sLJkU= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.1/go.mod h1:XYrdZw5dW12Cjkt4ndbeNZZTBp4UCHtW0ccR9+sTtPU= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 h1:6UsAv+jAevuGO2yZFU/BukV4o9NKnFMOuoouSA4G0ns= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2/go.mod h1:XYrdZw5dW12Cjkt4ndbeNZZTBp4UCHtW0ccR9+sTtPU= github.com/prometheus-operator/prometheus-operator/pkg/client v0.46.0/go.mod h1:k4BrWlVQQsvBiTcDnKEMgyh/euRxyxgrHdur/ZX/sdA= -github.com/prometheus-operator/prometheus-operator/pkg/client v0.75.1 h1:s7GlsRYGLWP+L1eQKy6RmLatX+k3v9NQwutUix4l5uM= -github.com/prometheus-operator/prometheus-operator/pkg/client v0.75.1/go.mod h1:qca3qWGdknRpHvPyThepe5a6QYAh38IQ2ml93E6V3NY= +github.com/prometheus-operator/prometheus-operator/pkg/client v0.75.2 h1:71GOmhZFA2/17maXqCcuJEzpJDyqPty8SpEOGZWyVec= +github.com/prometheus-operator/prometheus-operator/pkg/client v0.75.2/go.mod h1:Sv6XsfGGkR9gKnhP92F5dNXEpsSePn0W+7JwYP0NVkc= github.com/prometheus/client_golang v0.9.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= From ee5e710912b2b78ef7bb1fa03a55beaf94184268 Mon Sep 17 00:00:00 2001 From: sp98 Date: Tue, 23 Jul 2024 21:31:19 +0530 Subject: [PATCH 5/6] core: set min-compat-client to reef for upmap-read If upmap-read balancer mode is required, then set min-compat-client to reef Signed-off-by: sp98 --- .../charts/rook-ceph/templates/resources.yaml | 1 + deploy/examples/crds.yaml | 1 + pkg/apis/ceph.rook.io/v1/types.go | 2 +- pkg/daemon/ceph/client/mgr.go | 36 +++++++++++++++---- pkg/daemon/ceph/client/mgr_test.go | 35 ++++++++++++++++++ 5 files changed, 68 insertions(+), 7 deletions(-) diff --git a/deploy/charts/rook-ceph/templates/resources.yaml b/deploy/charts/rook-ceph/templates/resources.yaml index a9ea8ce8d59f..561a39976f11 100644 --- a/deploy/charts/rook-ceph/templates/resources.yaml +++ b/deploy/charts/rook-ceph/templates/resources.yaml @@ -1601,6 +1601,7 @@ spec: - "" - crush-compat - upmap + - read - upmap-read type: string type: object diff --git a/deploy/examples/crds.yaml b/deploy/examples/crds.yaml index 62e5496c1909..db0f8875322a 100644 --- a/deploy/examples/crds.yaml +++ b/deploy/examples/crds.yaml @@ -1599,6 +1599,7 @@ spec: - "" - crush-compat - upmap + - read - upmap-read type: string type: object diff --git a/pkg/apis/ceph.rook.io/v1/types.go b/pkg/apis/ceph.rook.io/v1/types.go index 8a376257334e..745da47cf592 100755 --- a/pkg/apis/ceph.rook.io/v1/types.go +++ b/pkg/apis/ceph.rook.io/v1/types.go @@ -679,7 +679,7 @@ type Module struct { type ModuleSettings struct { // BalancerMode sets the `balancer` module with different modes like `upmap`, `crush-compact` etc - // +kubebuilder:validation:Enum="";crush-compat;upmap;upmap-read + // +kubebuilder:validation:Enum="";crush-compat;upmap;read;upmap-read BalancerMode string `json:"balancerMode,omitempty"` } diff --git a/pkg/daemon/ceph/client/mgr.go b/pkg/daemon/ceph/client/mgr.go index d1dca7a19187..06ec3d8ad86d 100755 --- a/pkg/daemon/ceph/client/mgr.go +++ b/pkg/daemon/ceph/client/mgr.go @@ -22,12 +22,18 @@ import ( "github.com/pkg/errors" "github.com/rook/rook/pkg/clusterd" + cephver "github.com/rook/rook/pkg/operator/ceph/version" ) var ( moduleEnableWaitTime = 5 * time.Second ) +const ( + readBalancerMode = "read" + upmapReadBalancerMode = "upmap-read" +) + func CephMgrMap(context *clusterd.Context, clusterInfo *ClusterInfo) (*MgrMap, error) { args := []string{"mgr", "dump"} buf, err := NewCephCommand(context, clusterInfo, args).Run() @@ -132,12 +138,12 @@ func setBalancerMode(context *clusterd.Context, clusterInfo *ClusterInfo, mode s return nil } -// setMinCompatClientLuminous set the minimum compatibility for clients to Luminous -func setMinCompatClientLuminous(context *clusterd.Context, clusterInfo *ClusterInfo) error { - args := []string{"osd", "set-require-min-compat-client", "luminous", "--yes-i-really-mean-it"} +// setMinCompatClient set the minimum compatibility for clients +func setMinCompatClient(context *clusterd.Context, clusterInfo *ClusterInfo, version string) error { + args := []string{"osd", "set-require-min-compat-client", version, "--yes-i-really-mean-it"} _, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrap(err, "failed to set set-require-min-compat-client to luminous") + return errors.Wrapf(err, "failed to set set-require-min-compat-client to %q", version) } return nil @@ -165,8 +171,12 @@ func mgrSetBalancerMode(context *clusterd.Context, clusterInfo *ClusterInfo, bal // ConfigureBalancerModule configures the balancer module func ConfigureBalancerModule(context *clusterd.Context, clusterInfo *ClusterInfo, balancerModuleMode string) error { - // Set min compat client to luminous before enabling the balancer mode "upmap" - err := setMinCompatClientLuminous(context, clusterInfo) + minCompatClientVersion, err := desiredMinCompatClientVersion(clusterInfo, balancerModuleMode) + if err != nil { + return errors.Wrap(err, "failed to get minimum compatibility client version") + } + + err = setMinCompatClient(context, clusterInfo, minCompatClientVersion) if err != nil { return errors.Wrap(err, "failed to set minimum compatibility client") } @@ -179,3 +189,17 @@ func ConfigureBalancerModule(context *clusterd.Context, clusterInfo *ClusterInfo return nil } + +func desiredMinCompatClientVersion(clusterInfo *ClusterInfo, balancerModuleMode string) (string, error) { + // Set min compat client to luminous before enabling the balancer mode "upmap" + minCompatClientVersion := "luminous" + if balancerModuleMode == readBalancerMode || balancerModuleMode == upmapReadBalancerMode { + if !clusterInfo.CephVersion.IsAtLeast(cephver.CephVersion{Major: 19}) { + return "", errors.New("minimum ceph v19 (Squid) is required for upmap-read or read balancer modes") + } + // Set min compat client to reef before enabling the balancer mode "upmap-read" or "read" + minCompatClientVersion = "reef" + } + + return minCompatClientVersion, nil +} diff --git a/pkg/daemon/ceph/client/mgr_test.go b/pkg/daemon/ceph/client/mgr_test.go index 3df997dca823..762cd58749b4 100644 --- a/pkg/daemon/ceph/client/mgr_test.go +++ b/pkg/daemon/ceph/client/mgr_test.go @@ -21,6 +21,7 @@ import ( "github.com/pkg/errors" "github.com/rook/rook/pkg/clusterd" + cephver "github.com/rook/rook/pkg/operator/ceph/version" exectest "github.com/rook/rook/pkg/util/exec/test" "github.com/stretchr/testify/assert" ) @@ -135,3 +136,37 @@ func TestSetBalancerMode(t *testing.T) { err := setBalancerMode(&clusterd.Context{Executor: executor}, AdminTestClusterInfo("mycluster"), "upmap") assert.NoError(t, err) } + +func TestGetMinCompatClientVersion(t *testing.T) { + clusterInfo := AdminTestClusterInfo("mycluster") + t.Run("upmap-read balancer mode with ceph v19", func(t *testing.T) { + clusterInfo.CephVersion = cephver.CephVersion{Major: 19} + result, err := desiredMinCompatClientVersion(clusterInfo, upmapReadBalancerMode) + assert.NoError(t, err) + assert.Equal(t, "reef", result) + }) + + t.Run("read balancer mode with ceph v19", func(t *testing.T) { + clusterInfo.CephVersion = cephver.CephVersion{Major: 19} + result, err := desiredMinCompatClientVersion(clusterInfo, readBalancerMode) + assert.NoError(t, err) + assert.Equal(t, "reef", result) + }) + t.Run("upmap-read balancer mode with ceph below v19 should fail", func(t *testing.T) { + clusterInfo.CephVersion = cephver.CephVersion{Major: 18} + _, err := desiredMinCompatClientVersion(clusterInfo, upmapReadBalancerMode) + assert.Error(t, err) + }) + t.Run("read balancer mode with ceph below v19 should fail", func(t *testing.T) { + clusterInfo.CephVersion = cephver.CephVersion{Major: 18} + _, err := desiredMinCompatClientVersion(clusterInfo, readBalancerMode) + assert.Error(t, err) + }) + + t.Run("upmap balancer set min compat client to luminous", func(t *testing.T) { + clusterInfo.CephVersion = cephver.CephVersion{Major: 19} + result, err := desiredMinCompatClientVersion(clusterInfo, "upmap") + assert.NoError(t, err) + assert.Equal(t, "luminous", result) + }) +} From a36554d03b1c09cb0e1db314130c57c22ea86d02 Mon Sep 17 00:00:00 2001 From: Ceph Jenkins Date: Tue, 30 Jul 2024 04:02:36 -0400 Subject: [PATCH 6/6] csv: add additional csv changes that other commits bring add generated csv changes Signed-off-by: Ceph Jenkins --- build/csv/ceph/ceph.rook.io_cephclusters.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/build/csv/ceph/ceph.rook.io_cephclusters.yaml b/build/csv/ceph/ceph.rook.io_cephclusters.yaml index e6bd6b6fc665..1c76ccee2c36 100644 --- a/build/csv/ceph/ceph.rook.io_cephclusters.yaml +++ b/build/csv/ceph/ceph.rook.io_cephclusters.yaml @@ -449,6 +449,7 @@ spec: - "" - crush-compat - upmap + - read - upmap-read type: string type: object