From 27e710e5170328e94cb57a5c514e1b54695a32d6 Mon Sep 17 00:00:00 2001 From: David Grove Date: Tue, 28 Jan 2025 16:53:09 -0500 Subject: [PATCH] RHOAI 2.10 is EOL; remove setup instructions (#125) --- SETUP.md | 4 - setup.RHOAI-v2.10/CLUSTER-SETUP.md | 102 ------- setup.RHOAI-v2.10/TEAM-SETUP.md | 91 ------ setup.RHOAI-v2.10/UNINSTALL.md | 23 -- .../coscheduler-priority-patch.yaml | 3 - setup.RHOAI-v2.10/default-flavor.yaml | 4 - setup.RHOAI-v2.10/mlbatch-dsc.yaml | 32 -- setup.RHOAI-v2.10/mlbatch-dsci.yaml | 14 - setup.RHOAI-v2.10/mlbatch-edit-role.yaml | 151 ---------- setup.RHOAI-v2.10/mlbatch-priorities.yaml | 26 -- setup.RHOAI-v2.10/mlbatch-subscription.yaml | 285 ------------------ setup.RHOAI-v2.13/CLUSTER-SETUP.md | 2 +- setup.RHOAI-v2.15/CLUSTER-SETUP.md | 2 +- setup.RHOAI-v2.16/CLUSTER-SETUP.md | 2 +- setup.tmpl/CLUSTER-SETUP.md.tmpl | 4 +- setup.tmpl/Makefile | 2 - setup.tmpl/RHOAI-v2.10.yaml | 7 - 17 files changed, 5 insertions(+), 749 deletions(-) delete mode 100644 setup.RHOAI-v2.10/CLUSTER-SETUP.md delete mode 100644 setup.RHOAI-v2.10/TEAM-SETUP.md delete mode 100644 setup.RHOAI-v2.10/UNINSTALL.md delete mode 100644 setup.RHOAI-v2.10/coscheduler-priority-patch.yaml delete mode 100644 setup.RHOAI-v2.10/default-flavor.yaml delete mode 100644 setup.RHOAI-v2.10/mlbatch-dsc.yaml delete mode 100644 setup.RHOAI-v2.10/mlbatch-dsci.yaml delete mode 100644 setup.RHOAI-v2.10/mlbatch-edit-role.yaml delete mode 100644 setup.RHOAI-v2.10/mlbatch-priorities.yaml delete mode 100644 setup.RHOAI-v2.10/mlbatch-subscription.yaml delete mode 100644 setup.tmpl/RHOAI-v2.10.yaml diff --git a/SETUP.md b/SETUP.md index 88b419d..29e5c6b 100644 --- a/SETUP.md +++ b/SETUP.md @@ -42,10 +42,6 @@ Instructions are provided for the following Red Hat OpenShift AI ***stable*** re + [UPGRADING from RHOAI 2.10](./setup.RHOAI-v2.13/UPGRADE-STABLE.md) + [UPGRADING from RHOAI 2.12](./setup.RHOAI-v2.13/UPGRADE-FAST.md) + [RHOAI 2.13 Uninstall](./setup.RHOAI-v2.13/UNINSTALL.md) -+ Red Hat OpenShift AI 2.10 - + [RHOAI 2.10 Cluster Setup](./setup.RHOAI-v2.10/CLUSTER-SETUP.md) - + [RHOAI 2.10 Team Setup](./setup.RHOAI-v2.10/TEAM-SETUP.md) - + [RHOAI 2.10 Uninstall](./setup.RHOAI-v2.10/UNINSTALL.md) Instructions are provided for the following Red Hat OpenShift AI ***fast*** releases: + Red Hat OpenShift AI 2.15 diff --git a/setup.RHOAI-v2.10/CLUSTER-SETUP.md b/setup.RHOAI-v2.10/CLUSTER-SETUP.md deleted file mode 100644 index d928ec7..0000000 --- a/setup.RHOAI-v2.10/CLUSTER-SETUP.md +++ /dev/null @@ -1,102 +0,0 @@ -# Cluster Setup - -The cluster setup installs Red Hat OpenShift AI and Coscheduler, configures Kueue, -cluster roles, and priority classes. - -## Priorities - -Create `default-priority`, `high-priority`, and `low-priority` priority classes: -```sh -oc apply -f setup.RHOAI-v2.10/mlbatch-priorities.yaml -``` - -## Coscheduler - -Install Coscheduler v0.28.9 as a secondary scheduler and configure packing: -```sh -helm install scheduler-plugins --namespace scheduler-plugins --create-namespace \ - scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \ - --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/gpu","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]' -``` -Patch Coscheduler pod priorities: -```sh -oc patch deployment -n scheduler-plugins --type=json --patch-file setup.RHOAI-v2.10/coscheduler-priority-patch.yaml scheduler-plugins-controller -oc patch deployment -n scheduler-plugins --type=json --patch-file setup.RHOAI-v2.10/coscheduler-priority-patch.yaml scheduler-plugins-scheduler -``` - -## Red Hat OpenShift AI - -Create the Red Hat OpenShift AI subscription: -```sh -oc apply -f setup.RHOAI-v2.10/mlbatch-subscription.yaml -```` -Identify install plan: -```sh -oc get ip -n redhat-ods-operator -``` -``` -NAMESPACE NAME CSV APPROVAL APPROVED -redhat-ods-operator install-kmh8w rhods-operator.2.10.0 Manual false -``` -Approve install plan replacing the generated plan name below with the actual -value: -```sh -oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-kmh8w -``` -Create DSC Initialization: -```sh -oc apply -f setup.RHOAI-v2.10/mlbatch-dsci.yaml -``` -Create Data Science Cluster: -```sh -oc apply -f setup.RHOAI-v2.10/mlbatch-dsc.yaml -``` -The provided DSCI and DSC are intended to install a minimal set of Red Hat OpenShift -AI managed components: `codeflare`, `kueue`, `ray`, and `trainingoperator`. The -remaining components such as `dashboard` can be optionally enabled. - -The configuration of the managed components differs from the default Red Hat OpenShift -AI configuration as follows: -- Kubeflow Training Operator: - - `gang-scheduler-name` is set to `scheduler-plugins-scheduler`, -- Kueue: - - `manageJobsWithoutQueueName` is enabled, - - `batch/job` integration is disabled, - - `waitForPodsReady` is disabled, - - `LendingLimit` feature gate is enabled, - - `enableClusterQueueResources` metrics is enabled, -- Codeflare operator: - - the AppWrapper controller is enabled and configured as follows: - - `userRBACAdmissionCheck` is disabled, - - `schedulerName` is set to `scheduler-plugins-scheduler`, - - `queueName` is set to `default-queue`, -- pod priorities, resource requests and limits have been adjusted. - -To work around https://issues.redhat.com/browse/RHOAIENG-7887 (a race condition -in Red Hat OpenShift AI installation), do a rolling restart of the Kueue manager. -```sh -oc rollout restart deployment/kueue-controller-manager -n redhat-ods-applications -``` - -After doing the restart, verify that you see the following lines in the -kueue-controller-manager's log: -```sh -{"level":"info","ts":"2024-06-25T20:17:25.689638786Z","logger":"controller-runtime.builder","caller":"builder/webhook.go:189","msg":"Registering a validating webhook","GVK":"kubeflow.org/v1, Kind=PyTorchJob","path":"/validate-kubeflow-org-v1-pytorchjob"} -{"level":"info","ts":"2024-06-25T20:17:25.689698615Z","logger":"controller-runtime.webhook","caller":"webhook/server.go:183","msg":"Registering webhook","path":"/validate-kubeflow-org-v1-pytorchjob"} -{"level":"info","ts":"2024-06-25T20:17:25.689743757Z","logger":"setup","caller":"jobframework/setup.go:81","msg":"Set up controller and webhook for job framework","jobFrameworkName":"kubeflow.org/pytorchjob"} - -``` - -## Kueue Configuration - -Create Kueue's default flavor: -```sh -oc apply -f setup.RHOAI-v2.10/default-flavor.yaml -``` - -## Cluster Role - -Create `mlbatch-edit` role: -```sh -oc apply -f setup.RHOAI-v2.10/mlbatch-edit-role.yaml -``` diff --git a/setup.RHOAI-v2.10/TEAM-SETUP.md b/setup.RHOAI-v2.10/TEAM-SETUP.md deleted file mode 100644 index 85c9429..0000000 --- a/setup.RHOAI-v2.10/TEAM-SETUP.md +++ /dev/null @@ -1,91 +0,0 @@ -# Team Setup - -A *team* in MLBatch is a group of users that share a resource quota. - -Before setting up your teams and quotas, please read [QUOTA_MAINTENANCE.md](../QUOTA_MAINTENANCE.md) -for a discussion of our recommended best practices. - - -Setting up a new team requires the cluster admin to create a project, -a user group, a quota, a queue, and the required role bindings as described below. - -Create project: -```sh -oc new-project team1 -``` -Create user group: -```sh -oc adm groups new team1-edit-group -``` -Add users to group for example: -```sh -oc adm groups add-users team1-edit-group user1 -``` -Bind cluster role to group in namespace: -```sh -oc adm policy add-role-to-group mlbatch-edit team1-edit-group --role-namespace="" --namespace team1 -``` - -Specify the intended quota for the namespace by creating a `ClusterQueue`: -```sh -oc apply -f- << EOF -apiVersion: kueue.x-k8s.io/v1beta1 -kind: ClusterQueue -metadata: - name: team1-cluster-queue -spec: - namespaceSelector: {} - cohort: default-cohort - preemption: - withinClusterQueue: LowerOrNewerEqualPriority - reclaimWithinCohort: Any - borrowWithinCohort: - policy: Never - resourceGroups: - - coveredResources: ["cpu", "memory", "nvidia.com/gpu", "nvidia.com/roce_gdr", "pods"] - flavors: - - name: default-flavor - resources: - - name: "cpu" - nominalQuota: 8000m - # borrowingLimit: 0 - # lendingLimit: 0 - - name: "memory" - nominalQuota: 128Gi - # borrowingLimit: 0 - # lendingLimit: 0 - - name: "nvidia.com/gpu" - nominalQuota: 16 - # borrowingLimit: 0 - # lendingLimit: 0 - - name: "nvidia.com/roce_gdr" - nominalQuota: 4 - # borrowingLimit: 0 - # lendingLimit: 0 - - name: "pods" - nominalQuota: 100 - # borrowingLimit: 0 - # lendingLimit: 0 -EOF -``` -Edit the above quantities to adjust the quota to the desired values. Pod counts -are optional and can be omitted from the list of covered resources. - -Uncomment all `borrowingLimit` lines to prevent this namespace from borrowing -quota from other namespaces. Uncomment all `lendingLimit` lines to prevent other -namespaces from borrowing quota from this namespace. - -Create a `LocalQueue` to bind the `ClusterQueue` to the namespace: -```sh -oc apply -n team1 -f- << EOF -apiVersion: kueue.x-k8s.io/v1beta1 -kind: LocalQueue -metadata: - name: default-queue -spec: - clusterQueue: team1-cluster-queue -EOF -``` -We recommend naming the local queue `default-queue` as `AppWrappers` will -default to this queue name. - diff --git a/setup.RHOAI-v2.10/UNINSTALL.md b/setup.RHOAI-v2.10/UNINSTALL.md deleted file mode 100644 index 776045d..0000000 --- a/setup.RHOAI-v2.10/UNINSTALL.md +++ /dev/null @@ -1,23 +0,0 @@ -# Uninstall - -***First, remove all team projects and corresponding cluster queues.*** - -Then to uninstall the MLBatch controllers and reclaim the corresponding -namespaces, run: -```sh -# OpenShift AI uninstall -oc delete dsc mlbatch-dsc -oc delete dsci mlbatch-dsci -oc delete subscription -n redhat-ods-operator rhods-operator -oc delete csv -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -oc delete crd featuretrackers.features.opendatahub.io \ - dscinitializations.dscinitialization.opendatahub.io \ - datascienceclusters.datasciencecluster.opendatahub.io -oc delete operators rhods-operator.redhat-ods-operator -oc delete operatorgroup -n redhat-ods-operator rhods-operator -oc delete namespace redhat-ods-applications redhat-ods-monitoring redhat-ods-operator - -# Coscheduler uninstall -helm uninstall -n scheduler-plugins scheduler-plugins -oc delete namespace scheduler-plugins -``` diff --git a/setup.RHOAI-v2.10/coscheduler-priority-patch.yaml b/setup.RHOAI-v2.10/coscheduler-priority-patch.yaml deleted file mode 100644 index 278802f..0000000 --- a/setup.RHOAI-v2.10/coscheduler-priority-patch.yaml +++ /dev/null @@ -1,3 +0,0 @@ -- op: add - path: /spec/template/spec/priorityClassName - value: system-node-critical diff --git a/setup.RHOAI-v2.10/default-flavor.yaml b/setup.RHOAI-v2.10/default-flavor.yaml deleted file mode 100644 index 6cbccf3..0000000 --- a/setup.RHOAI-v2.10/default-flavor.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: kueue.x-k8s.io/v1beta1 -kind: ResourceFlavor -metadata: - name: default-flavor diff --git a/setup.RHOAI-v2.10/mlbatch-dsc.yaml b/setup.RHOAI-v2.10/mlbatch-dsc.yaml deleted file mode 100644 index 66336bc..0000000 --- a/setup.RHOAI-v2.10/mlbatch-dsc.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: datasciencecluster.opendatahub.io/v1 -kind: DataScienceCluster -metadata: - name: mlbatch-dsc -spec: - components: - codeflare: - managementState: Managed - dashboard: - managementState: Removed - datasciencepipelines: - managementState: Removed - kserve: - managementState: Removed - serving: - ingressGateway: - certificate: - type: SelfSigned - managementState: Removed - name: knative-serving - kueue: - managementState: Managed - modelmeshserving: - managementState: Removed - ray: - managementState: Managed - trainingoperator: - managementState: Managed - trustyai: - managementState: Removed - workbenches: - managementState: Removed diff --git a/setup.RHOAI-v2.10/mlbatch-dsci.yaml b/setup.RHOAI-v2.10/mlbatch-dsci.yaml deleted file mode 100644 index 77785c3..0000000 --- a/setup.RHOAI-v2.10/mlbatch-dsci.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: dscinitialization.opendatahub.io/v1 -kind: DSCInitialization -metadata: - name: mlbatch-dsci -spec: - applicationsNamespace: redhat-ods-applications - monitoring: - managementState: Managed - namespace: redhat-ods-monitoring - serviceMesh: - managementState: Removed - trustedCABundle: - customCABundle: "" - managementState: Managed diff --git a/setup.RHOAI-v2.10/mlbatch-edit-role.yaml b/setup.RHOAI-v2.10/mlbatch-edit-role.yaml deleted file mode 100644 index 4bb9e45..0000000 --- a/setup.RHOAI-v2.10/mlbatch-edit-role.yaml +++ /dev/null @@ -1,151 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: mlbatch-edit -rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - delete - - get - - list - - watch -- apiGroups: - - apps - resources: - - deployments - - statefulsets - verbs: - - delete - - get - - list - - watch -- apiGroups: - - "" - resources: - - services - - secrets - - configmaps - - persistentvolumeclaims - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kueue.x-k8s.io - resources: - - "*" - verbs: - - get - - list - - watch -- apiGroups: - - kubeflow.org - resources: - - pytorchjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayjobs - - rayclusters - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - batch - resources: - - jobs - verbs: - - delete - - get - - list - - watch -- apiGroups: - - workload.codeflare.dev - resources: - - appwrappers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - scheduling.k8s.io - resources: - - priorityclasses - verbs: - - get - - list - - watch -- apiGroups: - - scheduling.x-k8s.io - resources: - - podgroups - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - events - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - namespaces - - pods/logs - verbs: - - get -- apiGroups: - - "" - resources: - - pods/exec - - pods/portforward - verbs: - - create -- apiGroups: - - route.openshift.io - resources: - - routes - verbs: - - get - - list - - watch - - delete -- apiGroups: - - "" - - project.openshift.io - resources: - - projects - verbs: - - get diff --git a/setup.RHOAI-v2.10/mlbatch-priorities.yaml b/setup.RHOAI-v2.10/mlbatch-priorities.yaml deleted file mode 100644 index 77c8f3b..0000000 --- a/setup.RHOAI-v2.10/mlbatch-priorities.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: low-priority -value: 1 -preemptionPolicy: PreemptLowerPriority -globalDefault: false -description: "This is the priority class for all lower priority jobs." ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: default-priority -value: 5 -preemptionPolicy: PreemptLowerPriority -globalDefault: true -description: "This is the priority class for all jobs (default priority)." ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: high-priority -value: 10 -preemptionPolicy: PreemptLowerPriority -globalDefault: false -description: "This is the priority class defined for highly important jobs that would evict lower and default priority jobs." diff --git a/setup.RHOAI-v2.10/mlbatch-subscription.yaml b/setup.RHOAI-v2.10/mlbatch-subscription.yaml deleted file mode 100644 index f9cb18e..0000000 --- a/setup.RHOAI-v2.10/mlbatch-subscription.yaml +++ /dev/null @@ -1,285 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: redhat-ods-operator ---- -apiVersion: v1 -kind: Namespace -metadata: - name: redhat-ods-applications ---- -apiVersion: operators.coreos.com/v1 -kind: OperatorGroup -metadata: - name: rhods-operator - namespace: redhat-ods-operator ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: mlbatch-codeflare - namespace: redhat-ods-operator -data: - manager.yaml: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: manager - namespace: system - spec: - selector: - matchLabels: - app.kubernetes.io/name: codeflare-operator - app.kubernetes.io/part-of: codeflare - replicas: 1 - template: - metadata: - annotations: - kubectl.kubernetes.io/default-container: manager - labels: - app.kubernetes.io/name: codeflare-operator - app.kubernetes.io/part-of: codeflare - spec: - priorityClassName: system-node-critical - securityContext: - runAsNonRoot: true - # TODO(user): For common cases that do not require escalating privileges - # it is recommended to ensure that all your Pods/Containers are restrictive. - # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted - # Please uncomment the following code if your project does NOT have to work on old Kubernetes - # versions < 1.20 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ). - # seccompProfile: - # type: RuntimeDefault - containers: - - command: - - /manager - image: $(codeflare_operator_controller_image) - imagePullPolicy: Always - name: manager - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - "ALL" - env: - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - ports: - - containerPort: 8080 - protocol: TCP - name: metrics - livenessProbe: - httpGet: - path: /healthz - port: 8081 - initialDelaySeconds: 15 - periodSeconds: 20 - readinessProbe: - httpGet: - path: /readyz - port: 8081 - initialDelaySeconds: 5 - periodSeconds: 10 - resources: - limits: - cpu: "1" - memory: 1Gi - requests: - cpu: "1" - memory: 1Gi - serviceAccountName: controller-manager - terminationGracePeriodSeconds: 10 ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: codeflare-operator-config - namespace: redhat-ods-applications -data: - config.yaml: | - appwrapper: - Config: - manageJobsWithoutQueueName: true - userRBACAdmissionCheck: false - schedulerName: scheduler-plugins-scheduler - queueName: default-queue - enabled: true ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: mlbatch-kuberay - namespace: redhat-ods-operator -data: - kuberay-operator-image-patch.yaml: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: kuberay-operator - spec: - template: - spec: - priorityClassName: system-node-critical - containers: - - name: kuberay-operator - image: $(image) ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: mlbatch-kueue - namespace: redhat-ods-operator -data: - controller_manager_config.yaml: | - apiVersion: config.kueue.x-k8s.io/v1beta1 - kind: Configuration - health: - healthProbeBindAddress: :8081 - metrics: - bindAddress: :8080 - enableClusterQueueResources: true - webhook: - port: 9443 - leaderElection: - leaderElect: true - resourceName: c1f6bfd2.kueue.x-k8s.io - controller: - groupKindConcurrency: - Job.batch: 5 - Pod: 5 - Workload.kueue.x-k8s.io: 5 - LocalQueue.kueue.x-k8s.io: 1 - ClusterQueue.kueue.x-k8s.io: 1 - ResourceFlavor.kueue.x-k8s.io: 1 - clientConnection: - qps: 50 - burst: 100 - #pprofBindAddress: :8082 - waitForPodsReady: - enable: false - blockAdmission: false - manageJobsWithoutQueueName: true - #internalCertManagement: - # enable: false - # webhookServiceName: "" - # webhookSecretName: "" - integrations: - frameworks: - # - "batch/job" - - "kubeflow.org/mpijob" - - "ray.io/rayjob" - - "ray.io/raycluster" - - "jobset.x-k8s.io/jobset" - - "kubeflow.org/mxjob" - - "kubeflow.org/paddlejob" - - "kubeflow.org/pytorchjob" - - "kubeflow.org/tfjob" - - "kubeflow.org/xgboostjob" - # - "pod" - # podOptions: - # namespaceSelector: - # matchExpressions: - # - key: kubernetes.io/metadata.name - # operator: NotIn - # values: [ kube-system, kueue-system ] - manager_config_patch.yaml: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: controller-manager - namespace: system - spec: - template: - spec: - priorityClassName: system-node-critical - containers: - - name: manager - image: $(image) - args: - - "--config=/controller_manager_config.yaml" - - "--zap-log-level=2" - - "--feature-gates=LendingLimit=true" - volumeMounts: - - name: manager-config - mountPath: /controller_manager_config.yaml - subPath: controller_manager_config.yaml - volumes: - - name: manager-config - configMap: - name: manager-config ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: mlbatch-training-operator - namespace: redhat-ods-operator -data: - manager_config_patch.yaml: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: training-operator - spec: - template: - spec: - priorityClassName: system-node-critical - containers: - - name: training-operator - image: $(image) - args: - - "--zap-log-level=2" - - "--gang-scheduler-name=scheduler-plugins-scheduler" - resources: - requests: - cpu: 100m - memory: 100Mi - limits: - cpu: 500m - memory: 1000Mi ---- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: rhods-operator - namespace: redhat-ods-operator -spec: - channel: stable - installPlanApproval: Manual - name: rhods-operator - source: redhat-operators - sourceNamespace: openshift-marketplace - startingCSV: rhods-operator.2.10.0 - config: - env: - - name: "DISABLE_DSC_CONFIG" - volumeMounts: - - name: mlbatch-codeflare - mountPath: /opt/manifests/codeflare/manager/manager.yaml - subPath: manager.yaml - - name: mlbatch-kuberay - mountPath: /opt/manifests/ray/openshift/kuberay-operator-image-patch.yaml - subPath: kuberay-operator-image-patch.yaml - - name: mlbatch-kueue - mountPath: /opt/manifests/kueue/components/manager/controller_manager_config.yaml - subPath: controller_manager_config.yaml - - name: mlbatch-kueue - mountPath: /opt/manifests/kueue/rhoai/manager_config_patch.yaml - subPath: manager_config_patch.yaml - - name: mlbatch-training-operator - mountPath: /opt/manifests/trainingoperator/rhoai/manager_config_patch.yaml - subPath: manager_config_patch.yaml - volumes: - - name: mlbatch-codeflare - configMap: - name: mlbatch-codeflare - - name: mlbatch-kuberay - configMap: - name: mlbatch-kuberay - - name: mlbatch-kueue - configMap: - name: mlbatch-kueue - - name: mlbatch-training-operator - configMap: - name: mlbatch-training-operator diff --git a/setup.RHOAI-v2.13/CLUSTER-SETUP.md b/setup.RHOAI-v2.13/CLUSTER-SETUP.md index af5820a..a178853 100644 --- a/setup.RHOAI-v2.13/CLUSTER-SETUP.md +++ b/setup.RHOAI-v2.13/CLUSTER-SETUP.md @@ -36,7 +36,7 @@ oc get ip -n redhat-ods-operator ``` ``` NAMESPACE NAME CSV APPROVAL APPROVED -redhat-ods-operator install-kmh8w rhods-operator.2.10.0 Manual false +redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false ``` Approve install plan replacing the generated plan name below with the actual value: diff --git a/setup.RHOAI-v2.15/CLUSTER-SETUP.md b/setup.RHOAI-v2.15/CLUSTER-SETUP.md index c52146f..1a4680b 100644 --- a/setup.RHOAI-v2.15/CLUSTER-SETUP.md +++ b/setup.RHOAI-v2.15/CLUSTER-SETUP.md @@ -36,7 +36,7 @@ oc get ip -n redhat-ods-operator ``` ``` NAMESPACE NAME CSV APPROVAL APPROVED -redhat-ods-operator install-kmh8w rhods-operator.2.10.0 Manual false +redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false ``` Approve install plan replacing the generated plan name below with the actual value: diff --git a/setup.RHOAI-v2.16/CLUSTER-SETUP.md b/setup.RHOAI-v2.16/CLUSTER-SETUP.md index 7fb4c1c..cebd9dd 100644 --- a/setup.RHOAI-v2.16/CLUSTER-SETUP.md +++ b/setup.RHOAI-v2.16/CLUSTER-SETUP.md @@ -36,7 +36,7 @@ oc get ip -n redhat-ods-operator ``` ``` NAMESPACE NAME CSV APPROVAL APPROVED -redhat-ods-operator install-kmh8w rhods-operator.2.10.0 Manual false +redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false ``` Approve install plan replacing the generated plan name below with the actual value: diff --git a/setup.tmpl/CLUSTER-SETUP.md.tmpl b/setup.tmpl/CLUSTER-SETUP.md.tmpl index 6a4b9ed..0e8e4c3 100644 --- a/setup.tmpl/CLUSTER-SETUP.md.tmpl +++ b/setup.tmpl/CLUSTER-SETUP.md.tmpl @@ -49,7 +49,7 @@ Identify install plan: ``` ``` NAMESPACE NAME CSV APPROVAL APPROVED -redhat-ods-operator install-kmh8w rhods-operator.2.10.0 Manual false +redhat-ods-operator install-kmh8w rhods-operator.2.16.0 Manual false ``` Approve install plan replacing the generated plan name below with the actual value: @@ -91,7 +91,7 @@ AI configuration as follows: {{- end }} - pod priorities, resource requests and limits have been adjusted. -{{ if or (eq .VERSION "RHOAI-v2.10") (eq .VERSION "RHOAI-v2.13") -}} +{{ if (eq .VERSION "RHOAI-v2.13") -}} To work around https://issues.redhat.com/browse/RHOAIENG-7887 (a race condition in Red Hat OpenShift AI installation), do a rolling restart of the Kueue manager. ```sh diff --git a/setup.tmpl/Makefile b/setup.tmpl/Makefile index bc86801..1629ad8 100644 --- a/setup.tmpl/Makefile +++ b/setup.tmpl/Makefile @@ -21,8 +21,6 @@ help: ## Display this help. ##@ Generate Documentation docs: gotmpl - ../tools/gotmpl/gotmpl -input ./CLUSTER-SETUP.md.tmpl -output ../setup.RHOAI-v2.10/CLUSTER-SETUP.md -values RHOAI-v2.10.yaml - ../tools/gotmpl/gotmpl -input ./TEAM-SETUP.md.tmpl -output ../setup.RHOAI-v2.10/TEAM-SETUP.md -values RHOAI-v2.10.yaml ../tools/gotmpl/gotmpl -input ./CLUSTER-SETUP.md.tmpl -output ../setup.RHOAI-v2.13/CLUSTER-SETUP.md -values RHOAI-v2.13.yaml ../tools/gotmpl/gotmpl -input ./TEAM-SETUP.md.tmpl -output ../setup.RHOAI-v2.13/TEAM-SETUP.md -values RHOAI-v2.13.yaml ../tools/gotmpl/gotmpl -input ./CLUSTER-SETUP.md.tmpl -output ../setup.RHOAI-v2.15/CLUSTER-SETUP.md -values RHOAI-v2.15.yaml diff --git a/setup.tmpl/RHOAI-v2.10.yaml b/setup.tmpl/RHOAI-v2.10.yaml deleted file mode 100644 index 141eadd..0000000 --- a/setup.tmpl/RHOAI-v2.10.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Values for RHOAI 2.10 - -OPENSHIFT: true -VERSION: RHOAI-v2.10 -KUBECTL: oc -SLACKCQ: false -FAIRSHARE: false \ No newline at end of file