-
Notifications
You must be signed in to change notification settings - Fork 510
/
Copy pathraycluster_webhook.go
159 lines (133 loc) · 6.01 KB
/
raycluster_webhook.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
package v1
import (
"fmt"
"regexp"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/validation/field"
ctrl "sigs.k8s.io/controller-runtime"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
// log is for logging in this package.
var (
rayclusterlog = logf.Log.WithName("raycluster-resource")
nameRegex, _ = regexp.Compile("^[a-z]([-a-z0-9]*[a-z0-9])?$")
)
func (r *RayCluster) SetupWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).
For(r).
Complete()
}
// TODO(user): change verbs to "verbs=create;update;delete" if you want to enable deletion validation.
//+kubebuilder:webhook:path=/validate-ray-io-v1-raycluster,mutating=false,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=vraycluster.kb.io,admissionReviewVersions=v1
var _ webhook.Validator = &RayCluster{}
// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (r *RayCluster) ValidateCreate() (admission.Warnings, error) {
rayclusterlog.Info("validate create", "name", r.Name)
return nil, r.validateRayCluster()
}
// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
func (r *RayCluster) ValidateUpdate(_ runtime.Object) (admission.Warnings, error) {
rayclusterlog.Info("validate update", "name", r.Name)
return nil, r.validateRayCluster()
}
// ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (r *RayCluster) ValidateDelete() (admission.Warnings, error) {
rayclusterlog.Info("validate delete", "name", r.Name)
return nil, nil
}
func (r *RayCluster) validateRayCluster() error {
var allErrs field.ErrorList
if err := r.validateName(); err != nil {
allErrs = append(allErrs, err)
}
if err := r.validateWorkerGroups(); err != nil {
allErrs = append(allErrs, err)
}
if err := r.ValidateRayClusterSpec(); err != nil {
allErrs = append(allErrs, err)
}
if len(allErrs) == 0 {
return nil
}
return apierrors.NewInvalid(
schema.GroupKind{Group: "ray.io", Kind: "RayCluster"},
r.Name, allErrs)
}
func (r *RayCluster) validateName() *field.Error {
if !nameRegex.MatchString(r.Name) {
return field.Invalid(field.NewPath("metadata").Child("name"), r.Name, "name must consist of lower case alphanumeric characters or '-', start with an alphabetic character, and end with an alphanumeric character (e.g. 'my-name', or 'abc-123', regex used for validation is '[a-z]([-a-z0-9]*[a-z0-9])?')")
}
return nil
}
func (r *RayCluster) validateWorkerGroups() *field.Error {
workerGroupNames := make(map[string]bool)
for i, workerGroup := range r.Spec.WorkerGroupSpecs {
if _, ok := workerGroupNames[workerGroup.GroupName]; ok {
return field.Invalid(field.NewPath("spec").Child("workerGroupSpecs").Index(i), workerGroup, "worker group names must be unique")
}
workerGroupNames[workerGroup.GroupName] = true
}
return nil
}
func (r *RayCluster) ValidateRayClusterSpec() *field.Error {
if len(r.Spec.HeadGroupSpec.Template.Spec.Containers) == 0 {
return field.Invalid(
field.NewPath("spec").Child("headGroupSpec").Child("template").Child("spec").Child("containers"),
r.Spec.HeadGroupSpec.Template.Spec.Containers,
"headGroupSpec should have at least one container",
)
}
for i, workerGroup := range r.Spec.WorkerGroupSpecs {
if len(workerGroup.Template.Spec.Containers) == 0 {
return field.Invalid(
field.NewPath("spec").Child("workerGroupSpecs").Index(i),
workerGroup,
"workerGroupSpec should have at least one container",
)
}
}
if r.Annotations[RayFTEnabledAnnotationKey] != "" && r.Spec.GcsFaultToleranceOptions != nil {
return field.Invalid(
field.NewPath("metadata").Child("annotations").Child(RayFTEnabledAnnotationKey),
r.Annotations[RayFTEnabledAnnotationKey],
fmt.Sprintf("%s annotation and GcsFaultToleranceOptions are both set. "+
"Please use only GcsFaultToleranceOptions to configure GCS fault tolerance", RayFTEnabledAnnotationKey),
)
}
if !IsGCSFaultToleranceEnabled(*r) {
if EnvVarExists(RAY_REDIS_ADDRESS, r.Spec.HeadGroupSpec.Template.Spec.Containers[RayContainerIndex].Env) {
return field.Invalid(
field.NewPath("spec").Child("headGroupSpec").Child("template").Child("spec").Child("containers").Index(RayContainerIndex).Child("env"),
r.Spec.HeadGroupSpec.Template.Spec.Containers[RayContainerIndex].Env,
fmt.Sprintf("%s is set which implicitly enables GCS fault tolerance, "+
"but GcsFaultToleranceOptions is not set. Please set GcsFaultToleranceOptions "+
"to enable GCS fault tolerance", RAY_REDIS_ADDRESS),
)
}
}
if r.Spec.GcsFaultToleranceOptions != nil {
if redisPassword := r.Spec.HeadGroupSpec.RayStartParams["redis-password"]; redisPassword != "" {
return field.Invalid(
field.NewPath("spec").Child("headGroupSpec").Child("rayStartParams"),
r.Spec.HeadGroupSpec.RayStartParams,
"cannot set `redis-password` in rayStartParams when GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead",
)
}
headContainer := r.Spec.HeadGroupSpec.Template.Spec.Containers[RayContainerIndex]
if EnvVarExists(REDIS_PASSWORD, headContainer.Env) {
return field.Invalid(
field.NewPath("spec").Child("headGroupSpec").Child("template").Child("spec").Child("containers").Index(RayContainerIndex).Child("env"),
headContainer.Env,
"cannot set `REDIS_PASSWORD` env var in head Pod when GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead",
)
}
}
// TODO (kevin85421): If GcsFaultToleranceOptions is set, users should use `GcsFaultToleranceOptions.RedisAddress` instead of `RAY_REDIS_ADDRESS`.
// TODO (kevin85421): If GcsFaultToleranceOptions is set, users should use `GcsFaultToleranceOptions.ExternalStorageNamespace` instead of
// the annotation `ray.io/external-storage-namespace`.
return nil
}