diff --git a/PROJECT b/PROJECT index 285a36e..0b5288e 100644 --- a/PROJECT +++ b/PROJECT @@ -2,19 +2,37 @@ # This file is used to track the info used to scaffold your project # and allow the plugins properly work. # More info: https://book.kubebuilder.io/reference/project-config.html -domain: inftyai.io +domain: llmaz.io layout: - go.kubebuilder.io/v4 +multigroup: true projectName: llmaz -repo: inftyai.io/llmaz +repo: inftyai.com/llmaz resources: - api: crdVersion: v1 namespaced: true controller: true - domain: inftyai.io - group: llmaz - kind: Inference - path: inftyai.io/llmaz/api/v1alpha1 + domain: llmaz.io + group: inference + kind: Service + path: inftyai.com/llmaz/api/inference/v1alpha1 + version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: llmaz.io + group: inference + kind: Playground + path: inftyai.com/llmaz/api/inference/v1alpha1 + version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: llmaz.io + kind: Model + path: inftyai.com/llmaz/api/v1alpha1 version: v1alpha1 version: "3" diff --git a/README.md b/README.md index 65d4dbf..43aa1c8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # llmaz -llmaz, pronounced as /lima:z/, is a building block for users to serve their LLMs on Kubernetes in a breeze. +☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving. + +## Roadmap + +- Serverless support +- CLI tool +- Gateway support diff --git a/api/inference/v1alpha1/groupversion_info.go b/api/inference/v1alpha1/groupversion_info.go new file mode 100644 index 0000000..6de3201 --- /dev/null +++ b/api/inference/v1alpha1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=inference.llmaz.io +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go new file mode 100644 index 0000000..7aa3c26 --- /dev/null +++ b/api/inference/v1alpha1/playground_types.go @@ -0,0 +1,71 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + api "inftyai.com/llmaz/api/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// PlaygroundSpec defines the desired state of Playground +type PlaygroundSpec struct { + // Replicas represents the replica number of inference workloads. + // +kubebuilder:default=1 + // +optional + Replicas *int32 `json:"replicas,omitempty"` + // ModelsClaim represents the references to multiple models. + ModelsClaim api.ModelsClaim `json:"modelsClaim"` + // BackendConfig represents the inference backend configuration + // under the hood, e.g. vLLM, which is the default backend. + // +optional + BackendConfig *BackendConfig `json:"backendConfig,omitempty"` + // ElasticConfig defines the configuration for elastic usage, + // e.g. the max/min replicas. Default to 0 ~ Inf+. + // +optional + ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` +} + +// PlaygroundStatus defines the observed state of Playground +type PlaygroundStatus struct { + // Conditions represents the Inference condition. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// Playground is the Schema for the playgrounds API +type Playground struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PlaygroundSpec `json:"spec,omitempty"` + Status PlaygroundStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// PlaygroundList contains a list of Playground +type PlaygroundList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Playground `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Playground{}, &PlaygroundList{}) +} diff --git a/api/inference/v1alpha1/service_types.go b/api/inference/v1alpha1/service_types.go new file mode 100644 index 0000000..834e36c --- /dev/null +++ b/api/inference/v1alpha1/service_types.go @@ -0,0 +1,71 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + lws "sigs.k8s.io/lws/api/leaderworkerset/v1" + + api "inftyai.com/llmaz/api/v1alpha1" +) + +// ServiceSpec defines the desired state of Service. +// Service controller will maintain multi-flavor of workloads with +// different accelerators for cost or performance considerations. +type ServiceSpec struct { + // ModelsClaim represents the references to multiple models. + // Note: properties (nodeSelectors, resources, e.g.) of the model flavors + // will be applied to the workload if not exist. + ModelsClaim api.ModelsClaim `json:"modelsClaim"` + // WorkloadTemplate defines the underlying workload layout and configuration. + WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"` + // ElasticConfig defines the configuration for elastic usage, + // e.g. the max/min replicas. Default to 0 ~ Inf+. + // +optional + ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` +} + +// ServiceStatus defines the observed state of Service +type ServiceStatus struct { + // Conditions represents the Inference condition. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// Service is the Schema for the services API +type Service struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ServiceSpec `json:"spec,omitempty"` + Status ServiceStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// ServiceList contains a list of Service +type ServiceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Service `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Service{}, &ServiceList{}) +} diff --git a/api/inference/v1alpha1/types.go b/api/inference/v1alpha1/types.go new file mode 100644 index 0000000..a2725e7 --- /dev/null +++ b/api/inference/v1alpha1/types.go @@ -0,0 +1,50 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import corev1 "k8s.io/api/core/v1" + +type BackendName string + +type BackendConfig struct { + // Name represents the inference backend under the hood, e.g. vLLM. + // +kubebuilder:validation:Enum={vllm} + // +kubebuilder:default=vllm + // +optional + Name *BackendName `json:"name"` + // Version represents the backend version if you want a different one + // from the default version. + // +optional + Version *string `json:"version,omitempty"` + // Args represents the arguments passed to the backend. + // +optional + Args []string `json:"args,omitempty"` + // Envs represents the environments set to the container. + // +optional + Envs []corev1.EnvVar `json:"envs,omitempty"` +} + +type ElasticConfig struct { + // MinReplicas indicates the minimum number of inference workloads based on the traffic. + // Default to nil means we can scale down the instances to 0. + // +optional + MinReplicas *int32 `json:"minReplicas,omitempty"` + // MaxReplicas indicates the maximum number of inference workloads based on the traffic. + // Default to nil means there's no limit for the instance number. + // +optional + MaxReplicas *int32 `json:"maxReplicas,omitempty"` +} diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 0000000..a5adfd5 --- /dev/null +++ b/api/inference/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,304 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BackendConfig) DeepCopyInto(out *BackendConfig) { + *out = *in + if in.Name != nil { + in, out := &in.Name, &out.Name + *out = new(BackendName) + **out = **in + } + if in.Version != nil { + in, out := &in.Version, &out.Version + *out = new(string) + **out = **in + } + if in.Args != nil { + in, out := &in.Args, &out.Args + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Envs != nil { + in, out := &in.Envs, &out.Envs + *out = make([]corev1.EnvVar, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendConfig. +func (in *BackendConfig) DeepCopy() *BackendConfig { + if in == nil { + return nil + } + out := new(BackendConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) { + *out = *in + if in.MinReplicas != nil { + in, out := &in.MinReplicas, &out.MinReplicas + *out = new(int32) + **out = **in + } + if in.MaxReplicas != nil { + in, out := &in.MaxReplicas, &out.MaxReplicas + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig. +func (in *ElasticConfig) DeepCopy() *ElasticConfig { + if in == nil { + return nil + } + out := new(ElasticConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Playground) DeepCopyInto(out *Playground) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Playground. +func (in *Playground) DeepCopy() *Playground { + if in == nil { + return nil + } + out := new(Playground) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Playground) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlaygroundList) DeepCopyInto(out *PlaygroundList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Playground, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundList. +func (in *PlaygroundList) DeepCopy() *PlaygroundList { + if in == nil { + return nil + } + out := new(PlaygroundList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PlaygroundList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlaygroundSpec) DeepCopyInto(out *PlaygroundSpec) { + *out = *in + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } + in.ModelsClaim.DeepCopyInto(&out.ModelsClaim) + if in.BackendConfig != nil { + in, out := &in.BackendConfig, &out.BackendConfig + *out = new(BackendConfig) + (*in).DeepCopyInto(*out) + } + if in.ElasticConfig != nil { + in, out := &in.ElasticConfig, &out.ElasticConfig + *out = new(ElasticConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundSpec. +func (in *PlaygroundSpec) DeepCopy() *PlaygroundSpec { + if in == nil { + return nil + } + out := new(PlaygroundSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlaygroundStatus) DeepCopyInto(out *PlaygroundStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundStatus. +func (in *PlaygroundStatus) DeepCopy() *PlaygroundStatus { + if in == nil { + return nil + } + out := new(PlaygroundStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Service) DeepCopyInto(out *Service) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Service. +func (in *Service) DeepCopy() *Service { + if in == nil { + return nil + } + out := new(Service) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Service) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceList) DeepCopyInto(out *ServiceList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Service, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceList. +func (in *ServiceList) DeepCopy() *ServiceList { + if in == nil { + return nil + } + out := new(ServiceList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ServiceList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceSpec) DeepCopyInto(out *ServiceSpec) { + *out = *in + in.ModelsClaim.DeepCopyInto(&out.ModelsClaim) + in.WorkloadTemplate.DeepCopyInto(&out.WorkloadTemplate) + if in.ElasticConfig != nil { + in, out := &in.ElasticConfig, &out.ElasticConfig + *out = new(ElasticConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceSpec. +func (in *ServiceSpec) DeepCopy() *ServiceSpec { + if in == nil { + return nil + } + out := new(ServiceSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceStatus) DeepCopyInto(out *ServiceStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceStatus. +func (in *ServiceStatus) DeepCopy() *ServiceStatus { + if in == nil { + return nil + } + out := new(ServiceStatus) + in.DeepCopyInto(out) + return out +} diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go index b9bf182..683c06e 100644 --- a/api/v1alpha1/groupversion_info.go +++ b/api/v1alpha1/groupversion_info.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package v1alpha1 contains API Schema definitions for the llmaz v1alpha1 API group +// Package v1alpha1 contains API Schema definitions for the v1alpha1 API group // +kubebuilder:object:generate=true -// +groupName=llmaz.inftyai.io +// +groupName=llmaz.io package v1alpha1 import ( @@ -26,7 +26,7 @@ import ( var ( // GroupVersion is group version used to register these objects - GroupVersion = schema.GroupVersion{Group: "llmaz.inftyai.io", Version: "v1alpha1"} + GroupVersion = schema.GroupVersion{Group: "llmaz.io", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/api/v1alpha1/inference_types.go b/api/v1alpha1/inference_types.go deleted file mode 100644 index f229c87..0000000 --- a/api/v1alpha1/inference_types.go +++ /dev/null @@ -1,88 +0,0 @@ -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package v1alpha1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - lws "sigs.k8s.io/lws/api/leaderworkerset/v1" -) - -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - -// InferenceSpec defines the desired state of Inference -type InferenceSpec struct { - // ModelNameOrPath represents the model name or the local path. - ModelNameOrPath string `json:"modelNameOrPath,omitempty"` - // Backend indicates the inference backend under the hood, e.g. vLLM. - // Default to use huggingface library. - // - // +optional - // +kubebuilder:validation:Enum={vllm,huggingface} - // +kubebuilder:default=huggingface - Backend *string `json:"backend,omitempty"` - // WorkloadTemplate defines the underlying workload layout and configuration, - // e.g. the leader/worker templates and replicas. - WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"` - // ElasticConfig defines the configuration for elastic usage, - // e.g. the max/min replicas. - // Default to 0 ~ Inf+. - // +optional - ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` -} - -type ElasticConfig struct { - // MinReplicas indicates the minimum number of Inference instances based on the traffic. - // Default to nil means we can scale down the instances to 0. - // +optional - MinReplicas *int32 `json:"minReplicas,omitempty"` - // MaxReplicas indicates the maximum number of Inference instances based on the traffic. - // Default to nil means there's no limit for the instance number. - // +optional - MaxReplicas *int32 `json:"maxReplicas,omitempty"` -} - -// InferenceStatus defines the observed state of Inference -type InferenceStatus struct { - // Conditions represents the Inference condition. - Conditions []metav1.Condition `json:"conditions,omitempty"` -} - -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status - -// Inference is the Schema for the inferences API -type Inference struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - - Spec InferenceSpec `json:"spec,omitempty"` - Status InferenceStatus `json:"status,omitempty"` -} - -//+kubebuilder:object:root=true - -// InferenceList contains a list of Inference -type InferenceList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []Inference `json:"items"` -} - -func init() { - SchemeBuilder.Register(&Inference{}, &InferenceList{}) -} diff --git a/api/v1alpha1/model_types.go b/api/v1alpha1/model_types.go new file mode 100644 index 0000000..056cc0b --- /dev/null +++ b/api/v1alpha1/model_types.go @@ -0,0 +1,128 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// DataSource represents where to load the model. +// Only one data source will be used. +type DataSource struct { + // URL represents the URL link than contains the data sources. + // +optional + URL *string `json:"url,omitempty"` + // The mounted volume that contains the data. + // +optional + Volume *v1.VolumeSource `json:"volumeSource,omitempty"` + // Image represents the the image address that contains the source data. + // +optional + Image *string `json:"image,omitempty"` + // ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image. + // +optional + ImagePullSecrets []string `json:"imagePullSecrets,omitempty"` +} + +type FlavorName string + +// Flavor defines the accelerator requirements for a model and the necessary parameters +// in autoscaling. Right now, it will be used in two places: +// - Pod scheduling with node selectors specified. +// - Cluster autoscaling with essential parameters provided. +type Flavor struct { + // Name represents the flavor name, which will be used in model claim. + Name FlavorName `json:"name"` + // Requests defines the required resources to serve the model, like nvidia.com/gpu: 8. + // Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here, + // or a default value will be used based on the community recommendations. + Requests v1.ResourceList `json:"requests,omitempty"` + // NodeSelector defines the labels to filter specified nodes, like + // cloud-provider.com/accelerator: nvidia-a100. + // NodeSelector will be auto injected to the Pods as scheduling primitives. + // +optional + NodeSelector []v1.NodeSelector `json:"nodeSelector,omitempty"` + // Params stores other useful parameters and will be consumed by the autoscaling components + // like cluster-autoscaler, Karpenter. + // E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with + // instance-type: p4d.24xlarge for AWS. + // +optional + Params map[string]string `json:"params,omitempty"` +} + +type ModelName string + +// ModelsClaim represents the references to multiple models, +// as well as the configured flavors. +type ModelsClaim struct { + // ModelNames represents a list of models, there maybe multiple models here + // to support state-of-the-art technologies like speculative decoding. + // +kubebuilder:validation:MinItems=1 + ModelNames []ModelName `json:"modelNames,omitempty"` + // InferenceFlavors represents a list of flavors with fungibility supported + // to serve the model. + // - If not set and multiple models claimed, apply with the 0-index model by default. + // - If set, the flavor names will refer to the 0-index model. + // This is just for simplicity, if needed, will refactor this part in the future. + // +optional + InferenceFlavors []FlavorName `json:"inferenceFlavors,omitempty"` +} + +// ModelSpec defines the desired state of Model +type ModelSpec struct { + // FamilyName represents the model type, like llama2, which will be auto injected + // to the labels with the key of `llmaz.io/model-family-name`. + FamilyName ModelName `json:"familyName"` + // DataSource represents where the model stores, there're several ways like + // loading from huggingface, host path, s3 and so on. + DataSource DataSource `json:"dataSource"` + // InferenceFlavors represents the accelerator requirements to serve the model. + // Flavors are fungible following the priority of slice order. + // +optional + InferenceFlavors []Flavor `json:"inferenceFlavors,omitempty"` +} + +// ModelStatus defines the observed state of Model +type ModelStatus struct { + // Conditions represents the Inference condition. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// Model is the Schema for the models API +type Model struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ModelSpec `json:"spec,omitempty"` + Status ModelStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// ModelList contains a list of Model +type ModelList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Model `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Model{}, &ModelList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 1741e3b..1ad2cfe 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1,7 +1,7 @@ //go:build !ignore_autogenerated /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,37 +21,84 @@ limitations under the License. package v1alpha1 import ( - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) { +func (in *DataSource) DeepCopyInto(out *DataSource) { *out = *in - if in.MinReplicas != nil { - in, out := &in.MinReplicas, &out.MinReplicas - *out = new(int32) + if in.URL != nil { + in, out := &in.URL, &out.URL + *out = new(string) **out = **in } - if in.MaxReplicas != nil { - in, out := &in.MaxReplicas, &out.MaxReplicas - *out = new(int32) + if in.Volume != nil { + in, out := &in.Volume, &out.Volume + *out = new(v1.VolumeSource) + (*in).DeepCopyInto(*out) + } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(string) **out = **in } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataSource. +func (in *DataSource) DeepCopy() *DataSource { + if in == nil { + return nil + } + out := new(DataSource) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Flavor) DeepCopyInto(out *Flavor) { + *out = *in + if in.Requests != nil { + in, out := &in.Requests, &out.Requests + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make([]v1.NodeSelector, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Params != nil { + in, out := &in.Params, &out.Params + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig. -func (in *ElasticConfig) DeepCopy() *ElasticConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor. +func (in *Flavor) DeepCopy() *Flavor { if in == nil { return nil } - out := new(ElasticConfig) + out := new(Flavor) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Inference) DeepCopyInto(out *Inference) { +func (in *Model) DeepCopyInto(out *Model) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) @@ -59,18 +106,18 @@ func (in *Inference) DeepCopyInto(out *Inference) { in.Status.DeepCopyInto(&out.Status) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Inference. -func (in *Inference) DeepCopy() *Inference { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Model. +func (in *Model) DeepCopy() *Model { if in == nil { return nil } - out := new(Inference) + out := new(Model) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *Inference) DeepCopyObject() runtime.Object { +func (in *Model) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -78,31 +125,31 @@ func (in *Inference) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *InferenceList) DeepCopyInto(out *InferenceList) { +func (in *ModelList) DeepCopyInto(out *ModelList) { *out = *in out.TypeMeta = in.TypeMeta in.ListMeta.DeepCopyInto(&out.ListMeta) if in.Items != nil { in, out := &in.Items, &out.Items - *out = make([]Inference, len(*in)) + *out = make([]Model, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceList. -func (in *InferenceList) DeepCopy() *InferenceList { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelList. +func (in *ModelList) DeepCopy() *ModelList { if in == nil { return nil } - out := new(InferenceList) + out := new(ModelList) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *InferenceList) DeepCopyObject() runtime.Object { +func (in *ModelList) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -110,49 +157,71 @@ func (in *InferenceList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *InferenceSpec) DeepCopyInto(out *InferenceSpec) { +func (in *ModelSpec) DeepCopyInto(out *ModelSpec) { *out = *in - if in.Backend != nil { - in, out := &in.Backend, &out.Backend - *out = new(string) - **out = **in - } - in.WorkloadTemplate.DeepCopyInto(&out.WorkloadTemplate) - if in.ElasticConfig != nil { - in, out := &in.ElasticConfig, &out.ElasticConfig - *out = new(ElasticConfig) - (*in).DeepCopyInto(*out) + in.DataSource.DeepCopyInto(&out.DataSource) + if in.InferenceFlavors != nil { + in, out := &in.InferenceFlavors, &out.InferenceFlavors + *out = make([]Flavor, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceSpec. -func (in *InferenceSpec) DeepCopy() *InferenceSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec. +func (in *ModelSpec) DeepCopy() *ModelSpec { if in == nil { return nil } - out := new(InferenceSpec) + out := new(ModelSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *InferenceStatus) DeepCopyInto(out *InferenceStatus) { +func (in *ModelStatus) DeepCopyInto(out *ModelStatus) { *out = *in if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions - *out = make([]v1.Condition, len(*in)) + *out = make([]metav1.Condition, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceStatus. -func (in *InferenceStatus) DeepCopy() *InferenceStatus { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelStatus. +func (in *ModelStatus) DeepCopy() *ModelStatus { + if in == nil { + return nil + } + out := new(ModelStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelsClaim) DeepCopyInto(out *ModelsClaim) { + *out = *in + if in.ModelNames != nil { + in, out := &in.ModelNames, &out.ModelNames + *out = make([]ModelName, len(*in)) + copy(*out, *in) + } + if in.InferenceFlavors != nil { + in, out := &in.InferenceFlavors, &out.InferenceFlavors + *out = make([]FlavorName, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelsClaim. +func (in *ModelsClaim) DeepCopy() *ModelsClaim { if in == nil { return nil } - out := new(InferenceStatus) + out := new(ModelsClaim) in.DeepCopyInto(out) return out } diff --git a/cmd/main.go b/cmd/main.go index 78bd50e..0a5307b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,8 +32,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - llmaz "inftyai.io/llmaz/api/v1alpha1" - "inftyai.io/llmaz/internal/controller" + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" + llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1" + "inftyai.com/llmaz/internal/controller" + inferencecontroller "inftyai.com/llmaz/internal/controller/inference" //+kubebuilder:scaffold:imports ) @@ -45,7 +47,8 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(llmaz.AddToScheme(scheme)) + utilruntime.Must(inferencev1alpha1.AddToScheme(scheme)) + utilruntime.Must(llmaziov1alpha1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme } @@ -71,7 +74,7 @@ func main() { Metrics: metricsserver.Options{BindAddress: metricsAddr}, HealthProbeBindAddress: probeAddr, LeaderElection: enableLeaderElection, - LeaderElectionID: "05d9997c.inftyai.io", + LeaderElectionID: "fbb36db9.llmaz.io", // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily // when the Manager ends. This requires the binary to immediately end when the // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly @@ -89,11 +92,25 @@ func main() { os.Exit(1) } - if err = (&controller.InferenceReconciler{ + if err = (&inferencecontroller.ServiceReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "Inference") + setupLog.Error(err, "unable to create controller", "controller", "Service") + os.Exit(1) + } + if err = (&inferencecontroller.PlaygroundReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Playground") + os.Exit(1) + } + if err = (&controller.ModelReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Model") os.Exit(1) } //+kubebuilder:scaffold:builder diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml new file mode 100644 index 0000000..7346084 --- /dev/null +++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml @@ -0,0 +1,305 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: playgrounds.inference.llmaz.io +spec: + group: inference.llmaz.io + names: + kind: Playground + listKind: PlaygroundList + plural: playgrounds + singular: playground + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Playground is the Schema for the playgrounds API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PlaygroundSpec defines the desired state of Playground + properties: + backendConfig: + description: |- + BackendConfig represents the inference backend configuration + under the hood, e.g. vLLM, which is the default backend. + properties: + args: + description: Args represents the arguments passed to the backend. + items: + type: string + type: array + envs: + description: Envs represents the environments set to the container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be a + C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: Specify whether the ConfigMap or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the + exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: Specify whether the Secret or its key + must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + name: + default: vllm + description: Name represents the inference backend under the hood, + e.g. vLLM. + enum: + - vllm + type: string + version: + description: |- + Version represents the backend version if you want a different one + from the default version. + type: string + type: object + elasticConfig: + description: |- + ElasticConfig defines the configuration for elastic usage, + e.g. the max/min replicas. Default to 0 ~ Inf+. + properties: + maxReplicas: + description: |- + MaxReplicas indicates the maximum number of inference workloads based on the traffic. + Default to nil means there's no limit for the instance number. + format: int32 + type: integer + minReplicas: + description: |- + MinReplicas indicates the minimum number of inference workloads based on the traffic. + Default to nil means we can scale down the instances to 0. + format: int32 + type: integer + type: object + modelsClaim: + description: ModelsClaim represents the references to multiple models. + properties: + inferenceFlavors: + description: |- + InferenceFlavors represents a list of flavors with fungibility supported + to serve the model. + - If not set and multiple models claimed, apply with the 0-index model by default. + - If set, the flavor names will refer to the 0-index model. + This is just for simplicity, if needed, will refactor this part in the future. + items: + type: string + type: array + modelNames: + description: |- + ModelNames represents a list of models, there maybe multiple models here + to support state-of-the-art technologies like speculative decoding. + items: + type: string + minItems: 1 + type: array + type: object + replicas: + default: 1 + description: Replicas represents the replica number of inference workloads. + format: int32 + type: integer + required: + - modelsClaim + type: object + status: + description: PlaygroundStatus defines the observed state of Playground + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/llmaz.inftyai.io_inferences.yaml b/config/crd/bases/inference.llmaz.io_services.yaml similarity index 99% rename from config/crd/bases/llmaz.inftyai.io_inferences.yaml rename to config/crd/bases/inference.llmaz.io_services.yaml index eafaa3f..d64c65b 100644 --- a/config/crd/bases/llmaz.inftyai.io_inferences.yaml +++ b/config/crd/bases/inference.llmaz.io_services.yaml @@ -4,20 +4,20 @@ kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.14.0 - name: inferences.llmaz.inftyai.io + name: services.inference.llmaz.io spec: - group: llmaz.inftyai.io + group: inference.llmaz.io names: - kind: Inference - listKind: InferenceList - plural: inferences - singular: inference + kind: Service + listKind: ServiceList + plural: services + singular: service scope: Namespaced versions: - name: v1alpha1 schema: openAPIV3Schema: - description: Inference is the Schema for the inferences API + description: Service is the Schema for the services API properties: apiVersion: description: |- @@ -37,44 +37,57 @@ spec: metadata: type: object spec: - description: InferenceSpec defines the desired state of Inference + description: |- + ServiceSpec defines the desired state of Service. + Service controller will maintain multi-flavor of workloads with + different accelerators for cost or performance considerations. properties: - backend: - default: huggingface - description: |- - Backend indicates the inference backend under the hood, e.g. vLLM. - Default to use huggingface library. - enum: - - vllm - - huggingface - type: string elasticConfig: description: |- ElasticConfig defines the configuration for elastic usage, - e.g. the max/min replicas. - Default to 0 ~ Inf+. + e.g. the max/min replicas. Default to 0 ~ Inf+. properties: maxReplicas: description: |- - MaxReplicas indicates the maximum number of Inference instances based on the traffic. + MaxReplicas indicates the maximum number of inference workloads based on the traffic. Default to nil means there's no limit for the instance number. format: int32 type: integer minReplicas: description: |- - MinReplicas indicates the minimum number of Inference instances based on the traffic. + MinReplicas indicates the minimum number of inference workloads based on the traffic. Default to nil means we can scale down the instances to 0. format: int32 type: integer type: object - modelNameOrPath: - description: ModelNameOrPath represents the model name or the local - path. - type: string - workloadTemplate: + modelsClaim: description: |- - WorkloadTemplate defines the underlying workload layout and configuration, - e.g. the leader/worker templates and replicas. + ModelsClaim represents the references to multiple models. + Note: properties (nodeSelectors, resources, e.g.) of the model flavors + will be applied to the workload if not exist. + properties: + inferenceFlavors: + description: |- + InferenceFlavors represents a list of flavors with fungibility supported + to serve the model. + - If not set and multiple models claimed, apply with the 0-index model by default. + - If set, the flavor names will refer to the 0-index model. + This is just for simplicity, if needed, will refactor this part in the future. + items: + type: string + type: array + modelNames: + description: |- + ModelNames represents a list of models, there maybe multiple models here + to support state-of-the-art technologies like speculative decoding. + items: + type: string + minItems: 1 + type: array + type: object + workloadTemplate: + description: WorkloadTemplate defines the underlying workload layout + and configuration. properties: leaderWorkerTemplate: description: LeaderWorkerTemplate defines the template for leader/worker @@ -7746,16 +7759,38 @@ spec: type: object type: object restartPolicy: + default: Default description: RestartPolicy defines the restart policy when pod failures happen. + enum: + - Default + - RecreateGroupOnPodRestart type: string size: + default: 1 description: |- Number of pods to create. It is the total number of pods in each group. The minimum is 1 which represent the leader. When set to 1, the leader pod is created for each group as well as a 0-replica StatefulSet for the workers. + Default to 1. format: int32 type: integer + subGroupPolicy: + description: |- + SubGroupPolicy describes the policy that will be applied when creating subgroups + in each replica. + properties: + subGroupSize: + description: |- + The number of pods per subgroup. This value is immutable, + and must not be greater than LeaderWorkerSet.Spec.Size. + Size must be divisible by subGroupSize in which case the + subgroups will be of equal size. Or size - 1 is divisible + by subGroupSize, in which case the leader is considered as + the extra pod, and will be part of the first subgroup. + format: int32 + type: integer + type: object workerTemplate: description: WorkerTemplate defines the pod template for worker pods. @@ -15422,10 +15457,10 @@ spec: type: object type: object required: - - size - workerTemplate type: object replicas: + default: 1 description: |- Number of leader-workers groups. A scale subresource is available to enable HPA. The selector for HPA will be that of the leader pod, and so practically HPA will be looking up the @@ -15433,16 +15468,81 @@ spec: the rest of the group and expose them as a summary custom metric representing the whole group. On scale down, the leader pod as well as the workers statefulset will be deleted. + Default to 1. format: int32 type: integer + rolloutStrategy: + description: |- + RolloutStrategy defines the strategy that will be applied to update replicas + when a revision is made to the leaderWorkerTemplate. + properties: + rollingUpdateConfiguration: + description: RollingUpdateConfiguration defines the parameters + to be used when type is RollingUpdateStrategyType. + properties: + maxSurge: + anyOf: + - type: integer + - type: string + default: 0 + description: |- + The maximum number of replicas that can be scheduled above the original number of + replicas. + Value can be an absolute number (ex: 5) or a percentage of total replicas at + the start of the update (ex: 10%). + Absolute number is calculated from percentage by rounding up. + By default, a value of 0 is used. + Example: when this is set to 30%, the new replicas can be scaled up by 30% + immediately when the rolling update starts. Once old replicas have been deleted, + new replicas can be scaled up further, ensuring that total number of replicas running + at any time during the update is at most 130% of original replicas. + When rolling update completes, replicas will fall back to the original replicas. + x-kubernetes-int-or-string: true + maxUnavailable: + anyOf: + - type: integer + - type: string + default: 1 + description: |- + The maximum number of replicas that can be unavailable during the update. + Value can be an absolute number (ex: 5) or a percentage of total replicas at the start of update (ex: 10%). + Absolute number is calculated from percentage by rounding down. + This can not be 0 if MaxSurge is 0. + By default, a fixed value of 1 is used. + Example: when this is set to 30%, the old replicas can be scaled down by 30% + immediately when the rolling update starts. Once new replicas are ready, old replicas + can be scaled down further, followed by scaling up the new replicas, ensuring + that at least 70% of original number of replicas are available at all times + during the update. + x-kubernetes-int-or-string: true + type: object + type: + default: RollingUpdate + description: Type defines the rollout strategy, it can only + be “RollingUpdate” for now. + enum: + - RollingUpdate + type: string + required: + - type + type: object + startupPolicy: + default: LeaderCreated + description: StartupPolicy determines the startup policy for the + worker statefulset. + enum: + - LeaderCreated + - LeaderReady + type: string required: - leaderWorkerTemplate type: object required: + - modelsClaim - workloadTemplate type: object status: - description: InferenceStatus defines the observed state of Inference + description: ServiceStatus defines the observed state of Service properties: conditions: description: Conditions represents the Inference condition. diff --git a/config/crd/bases/llmaz.io_modelproviders.yaml b/config/crd/bases/llmaz.io_modelproviders.yaml new file mode 100644 index 0000000..c7f057e --- /dev/null +++ b/config/crd/bases/llmaz.io_modelproviders.yaml @@ -0,0 +1,293 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: modelproviders.llmaz.io +spec: + group: llmaz.io + names: + kind: ModelProvider + listKind: ModelProviderList + plural: modelproviders + singular: modelprovider + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: ModelProvider is the Schema for the modelProvider API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ModelProviderSpec defines the desired state of ModelProvider + properties: + dataSource: + description: DataSource represents where to fetch the models, huggingface, + hostpath, s3, etc.. + properties: + image: + description: |- + The name of the image that contains the source data. The assumption is that the source data locates in the + `data` directory in the image. + type: string + imagePullSecrets: + description: ImagePullSecrets is a list of secret names in the + same namespace used for pulling the data image. + items: + type: string + type: array + name: + description: |- + The name of the dataset. The same name will be used as a container name. + It must be a valid DNS subdomain value, + type: string + urls: + description: URLs specifies the links to the public data sources. + E.g., files in a public github repository. + items: + type: string + type: array + volumeSource: + description: The mounted volume that contains the data. + x-kubernetes-preserve-unknown-fields: true + type: object + familyName: + description: |- + FamilyName represents the model type, like llama2, which will be auto injected + to the labels with the key of `llmaz.io/model-family-name`. + type: string + inferenceFlavors: + description: |- + InferenceFlavors represents the hardware requirements to serve the model. + Flavors are fungible following the priority of slice order. + items: + description: |- + Flavor represents the hardware requirements for one model. + Generally, it will be used in two places: + - Pod scheduling with node selectors specified. + - Cluster autoscaling with essential parameters provided. + Flavor is useful because the hardware requirements for models + are usually clear, like llama2-70B needs 8x Nvidia A100. + properties: + name: + description: Name represents the flavor name. + type: string + nodeSelector: + description: |- + NodeSelector refers to the nodes with specified accelerators equipped to + serve the model, like cloud-provider.com/accelerator: nvidia-a100, + NodeSelector will be auto injected to the Pods as scheduling primitives. + items: + description: |- + A node selector represents the union of the results of one or more label queries + over a set of nodes; that is, it represents the OR of the selectors represented + by the node selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. + The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + x-kubernetes-map-type: atomic + type: array + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: array + params: + additionalProperties: + type: string + description: |- + Params stores other useful parameters and will be consumed by the autoscaling components + like cluster-autoscaler, Karpenter. + E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with + instance-type: p4d.24xlarge for AWS. + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests represents the required resources to serve the model, like nvidia.com/gpu: 8. + Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here, + or a default value will be used based on the community recommendations. + type: object + required: + - name + - requests + type: object + type: array + required: + - dataSource + - familyName + type: object + status: + description: ModelProviderStatus defines the observed state of ModelProvider + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/llmaz.io_models.yaml b/config/crd/bases/llmaz.io_models.yaml new file mode 100644 index 0000000..9416f0f --- /dev/null +++ b/config/crd/bases/llmaz.io_models.yaml @@ -0,0 +1,1954 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: models.llmaz.io +spec: + group: llmaz.io + names: + kind: Model + listKind: ModelList + plural: models + singular: model + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Model is the Schema for the models API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ModelSpec defines the desired state of Model + properties: + dataSource: + description: |- + DataSource represents where the model stores, there're several ways like + loading from huggingface, host path, s3 and so on. + properties: + image: + description: Image represents the the image address that contains + the source data. + type: string + imagePullSecrets: + description: ImagePullSecrets represents a list of secret names + in the same namespace used for pulling the image. + items: + type: string + type: array + url: + description: URL represents the URL link than contains the data + sources. + type: string + volumeSource: + description: The mounted volume that contains the data. + properties: + awsElasticBlockStore: + description: |- + awsElasticBlockStore represents an AWS Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + format: int32 + type: integer + readOnly: + description: |- + readOnly value true will force the readOnly setting in VolumeMounts. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: boolean + volumeID: + description: |- + volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + required: + - volumeID + type: object + azureDisk: + description: azureDisk represents an Azure Data Disk mount + on the host and bind mount to the pod. + properties: + cachingMode: + description: 'cachingMode is the Host Caching mode: None, + Read Only, Read Write.' + type: string + diskName: + description: diskName is the Name of the data disk in + the blob storage + type: string + diskURI: + description: diskURI is the URI of data disk in the blob + storage + type: string + fsType: + description: |- + fsType is Filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + kind: + description: 'kind expected values are Shared: multiple + blob disks per storage account Dedicated: single blob + disk per storage account Managed: azure managed data + disk (only in managed availability set). defaults to + shared' + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + required: + - diskName + - diskURI + type: object + azureFile: + description: azureFile represents an Azure File Service mount + on the host and bind mount to the pod. + properties: + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretName: + description: secretName is the name of secret that contains + Azure Storage Account Name and Key + type: string + shareName: + description: shareName is the azure share Name + type: string + required: + - secretName + - shareName + type: object + cephfs: + description: cephFS represents a Ceph FS mount on the host + that shares a pod's lifetime + properties: + monitors: + description: |- + monitors is Required: Monitors is a collection of Ceph monitors + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + items: + type: string + type: array + path: + description: 'path is Optional: Used as the mounted root, + rather than the full Ceph tree, default is /' + type: string + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: boolean + secretFile: + description: |- + secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + secretRef: + description: |- + secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is optional: User is the rados user name, default is admin + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + required: + - monitors + type: object + cinder: + description: |- + cinder represents a cinder volume attached and mounted on kubelets host machine. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: boolean + secretRef: + description: |- + secretRef is optional: points to a secret object containing parameters used to connect + to OpenStack. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + volumeID: + description: |- + volumeID used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + required: + - volumeID + type: object + configMap: + description: configMap represents a configMap that should + populate this volume + properties: + defaultMode: + description: |- + defaultMode is optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: optional specify whether the ConfigMap or + its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + csi: + description: csi (Container Storage Interface) represents + ephemeral storage that is handled by certain external CSI + drivers (Beta feature). + properties: + driver: + description: |- + driver is the name of the CSI driver that handles this volume. + Consult with your admin for the correct name as registered in the cluster. + type: string + fsType: + description: |- + fsType to mount. Ex. "ext4", "xfs", "ntfs". + If not provided, the empty value is passed to the associated CSI driver + which will determine the default filesystem to apply. + type: string + nodePublishSecretRef: + description: |- + nodePublishSecretRef is a reference to the secret object containing + sensitive information to pass to the CSI driver to complete the CSI + NodePublishVolume and NodeUnpublishVolume calls. + This field is optional, and may be empty if no secret is required. If the + secret object contains more than one secret, all secret references are passed. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + readOnly: + description: |- + readOnly specifies a read-only configuration for the volume. + Defaults to false (read/write). + type: boolean + volumeAttributes: + additionalProperties: + type: string + description: |- + volumeAttributes stores driver-specific properties that are passed to the CSI + driver. Consult your driver's documentation for supported values. + type: object + required: + - driver + type: object + downwardAPI: + description: downwardAPI represents downward API about the + pod that should populate this volume + properties: + defaultMode: + description: |- + Optional: mode bits to use on created files by default. Must be a + Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: Items is a list of downward API volume file + items: + description: DownwardAPIVolumeFile represents information + to create the file containing the pod field + properties: + fieldRef: + description: 'Required: Selects a field of the pod: + only annotations, labels, name and namespace are + supported.' + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the relative path + name of the file to be created. Must not be absolute + or contain the ''..'' path. Must be utf-8 encoded. + The first item of the relative path must not start + with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + type: object + emptyDir: + description: |- + emptyDir represents a temporary directory that shares a pod's lifetime. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + properties: + medium: + description: |- + medium represents what type of storage medium should back this directory. + The default is "" which means to use the node's default medium. + Must be an empty string (default) or Memory. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + type: string + sizeLimit: + anyOf: + - type: integer + - type: string + description: |- + sizeLimit is the total amount of local storage required for this EmptyDir volume. + The size limit is also applicable for memory medium. + The maximum usage on memory medium EmptyDir would be the minimum value between + the SizeLimit specified here and the sum of memory limits of all containers in a pod. + The default is nil which means that the limit is undefined. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + description: |- + ephemeral represents a volume that is handled by a cluster storage driver. + The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, + and deleted when the pod is removed. + + + Use this if: + a) the volume is only needed while the pod runs, + b) features of normal volumes like restoring from snapshot or capacity + tracking are needed, + c) the storage driver is specified through a storage class, and + d) the storage driver supports dynamic volume provisioning through + a PersistentVolumeClaim (see EphemeralVolumeSource for more + information on the connection between this volume type + and PersistentVolumeClaim). + + + Use PersistentVolumeClaim or one of the vendor-specific + APIs for volumes that persist for longer than the lifecycle + of an individual pod. + + + Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to + be used that way - see the documentation of the driver for + more information. + + + A pod can use both types of ephemeral volumes and + persistent volumes at the same time. + properties: + volumeClaimTemplate: + description: |- + Will be used to create a stand-alone PVC to provision the volume. + The pod in which this EphemeralVolumeSource is embedded will be the + owner of the PVC, i.e. the PVC will be deleted together with the + pod. The name of the PVC will be `-` where + `` is the name from the `PodSpec.Volumes` array + entry. Pod validation will reject the pod if the concatenated name + is not valid for a PVC (for example, too long). + + + An existing PVC with that name that is not owned by the pod + will *not* be used for the pod to avoid using an unrelated + volume by mistake. Starting the pod is then blocked until + the unrelated PVC is removed. If such a pre-created PVC is + meant to be used by the pod, the PVC has to updated with an + owner reference to the pod once the pod exists. Normally + this should not be necessary, but it may be useful when + manually reconstructing a broken cluster. + + + This field is read-only and no changes will be made by Kubernetes + to the PVC after it has been created. + + + Required, must not be nil. + properties: + metadata: + description: |- + May contain labels and annotations that will be copied into the PVC + when creating it. No other fields are allowed and will be rejected during + validation. + type: object + spec: + description: |- + The specification for the PersistentVolumeClaim. The entire content is + copied unchanged into the PVC that gets created from this + template. The same fields as in a PersistentVolumeClaim + are also valid here. + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over volumes + to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list of + label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#volumeattributesclass + (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled. + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object + required: + - spec + type: object + type: object + fc: + description: fc represents a Fibre Channel resource that is + attached to a kubelet's host machine and then exposed to + the pod. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + lun: + description: 'lun is Optional: FC target lun number' + format: int32 + type: integer + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + targetWWNs: + description: 'targetWWNs is Optional: FC target worldwide + names (WWNs)' + items: + type: string + type: array + wwids: + description: |- + wwids Optional: FC volume world wide identifiers (wwids) + Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. + items: + type: string + type: array + type: object + flexVolume: + description: |- + flexVolume represents a generic volume resource that is + provisioned/attached using an exec based plugin. + properties: + driver: + description: driver is the name of the driver to use for + this volume. + type: string + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. + type: string + options: + additionalProperties: + type: string + description: 'options is Optional: this field holds extra + command options if any.' + type: object + readOnly: + description: |- + readOnly is Optional: defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef is Optional: secretRef is reference to the secret object containing + sensitive information to pass to the plugin scripts. This may be + empty if no secret object is specified. If the secret object + contains more than one secret, all secrets are passed to the plugin + scripts. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + required: + - driver + type: object + flocker: + description: flocker represents a Flocker volume attached + to a kubelet's host machine. This depends on the Flocker + control service being running + properties: + datasetName: + description: |- + datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker + should be considered as deprecated + type: string + datasetUUID: + description: datasetUUID is the UUID of the dataset. This + is unique identifier of a Flocker dataset + type: string + type: object + gcePersistentDisk: + description: |- + gcePersistentDisk represents a GCE Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + properties: + fsType: + description: |- + fsType is filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + format: int32 + type: integer + pdName: + description: |- + pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: boolean + required: + - pdName + type: object + gitRepo: + description: |- + gitRepo represents a git repository at a particular revision. + DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an + EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir + into the Pod's container. + properties: + directory: + description: |- + directory is the target directory name. + Must not contain or start with '..'. If '.' is supplied, the volume directory will be the + git repository. Otherwise, if specified, the volume will contain the git repository in + the subdirectory with the given name. + type: string + repository: + description: repository is the URL + type: string + revision: + description: revision is the commit hash for the specified + revision. + type: string + required: + - repository + type: object + glusterfs: + description: |- + glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/glusterfs/README.md + properties: + endpoints: + description: |- + endpoints is the endpoint name that details Glusterfs topology. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + path: + description: |- + path is the Glusterfs volume path. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + readOnly: + description: |- + readOnly here will force the Glusterfs volume to be mounted with read-only permissions. + Defaults to false. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: boolean + required: + - endpoints + - path + type: object + hostPath: + description: |- + hostPath represents a pre-existing file or directory on the host + machine that is directly exposed to the container. This is generally + used for system agents or other privileged things that are allowed + to see the host machine. Most containers will NOT need this. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + --- + TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not + mount host directories as read/write. + properties: + path: + description: |- + path of the directory on the host. + If the path is a symlink, it will follow the link to the real path. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + type: + description: |- + type for HostPath Volume + Defaults to "" + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + required: + - path + type: object + iscsi: + description: |- + iscsi represents an ISCSI Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://examples.k8s.io/volumes/iscsi/README.md + properties: + chapAuthDiscovery: + description: chapAuthDiscovery defines whether support + iSCSI Discovery CHAP authentication + type: boolean + chapAuthSession: + description: chapAuthSession defines whether support iSCSI + Session CHAP authentication + type: boolean + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + initiatorName: + description: |- + initiatorName is the custom iSCSI Initiator Name. + If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface + : will be created for the connection. + type: string + iqn: + description: iqn is the target iSCSI Qualified Name. + type: string + iscsiInterface: + description: |- + iscsiInterface is the interface Name that uses an iSCSI transport. + Defaults to 'default' (tcp). + type: string + lun: + description: lun represents iSCSI Target Lun number. + format: int32 + type: integer + portals: + description: |- + portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + items: + type: string + type: array + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + type: boolean + secretRef: + description: secretRef is the CHAP Secret for iSCSI target + and initiator authentication + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + targetPortal: + description: |- + targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + type: string + required: + - iqn + - lun + - targetPortal + type: object + nfs: + description: |- + nfs represents an NFS mount on the host that shares a pod's lifetime + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + properties: + path: + description: |- + path that is exported by the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + readOnly: + description: |- + readOnly here will force the NFS export to be mounted with read-only permissions. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: boolean + server: + description: |- + server is the hostname or IP address of the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + required: + - path + - server + type: object + persistentVolumeClaim: + description: |- + persistentVolumeClaimVolumeSource represents a reference to a + PersistentVolumeClaim in the same namespace. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + properties: + claimName: + description: |- + claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + type: string + readOnly: + description: |- + readOnly Will force the ReadOnly setting in VolumeMounts. + Default false. + type: boolean + required: + - claimName + type: object + photonPersistentDisk: + description: photonPersistentDisk represents a PhotonController + persistent disk attached and mounted on kubelets host machine + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + pdID: + description: pdID is the ID that identifies Photon Controller + persistent disk + type: string + required: + - pdID + type: object + portworxVolume: + description: portworxVolume represents a portworx volume attached + and mounted on kubelets host machine + properties: + fsType: + description: |- + fSType represents the filesystem type to mount + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + volumeID: + description: volumeID uniquely identifies a Portworx volume + type: string + required: + - volumeID + type: object + projected: + description: projected items for all in one resources secrets, + configmaps, and downward API + properties: + defaultMode: + description: |- + defaultMode are the mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + sources: + description: sources is the list of volume projections + items: + description: Projection that may be projected along + with other supported volume types + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. + + + Alpha, gated by the ClusterTrustBundleProjection feature gate. + + + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. + + + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from the volume root + to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about the configMap + data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: optional specify whether the ConfigMap + or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + description: downwardAPI information about the downwardAPI + data to project + properties: + items: + description: Items is a list of DownwardAPIVolume + file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing + the pod field + properties: + fieldRef: + description: 'Required: Selects a field + of the pod: only annotations, labels, + name and namespace are supported.' + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the relative + path name of the file to be created. + Must not be absolute or contain the + ''..'' path. Must be utf-8 encoded. + The first item of the relative path + must not start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to + select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + type: object + secret: + description: secret information about the secret + data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: optional field specify whether + the Secret or its key must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is information + about the serviceAccountToken data to project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object + type: object + type: array + type: object + quobyte: + description: quobyte represents a Quobyte mount on the host + that shares a pod's lifetime + properties: + group: + description: |- + group to map volume access to + Default is no group + type: string + readOnly: + description: |- + readOnly here will force the Quobyte volume to be mounted with read-only permissions. + Defaults to false. + type: boolean + registry: + description: |- + registry represents a single or multiple Quobyte Registry services + specified as a string as host:port pair (multiple entries are separated with commas) + which acts as the central registry for volumes + type: string + tenant: + description: |- + tenant owning the given Quobyte volume in the Backend + Used with dynamically provisioned Quobyte volumes, value is set by the plugin + type: string + user: + description: |- + user to map volume access to + Defaults to serivceaccount user + type: string + volume: + description: volume is a string that references an already + created Quobyte volume by name. + type: string + required: + - registry + - volume + type: object + rbd: + description: |- + rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/rbd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + image: + description: |- + image is the rados image name. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + keyring: + description: |- + keyring is the path to key ring for RBDUser. + Default is /etc/ceph/keyring. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + monitors: + description: |- + monitors is a collection of Ceph monitors. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + items: + type: string + type: array + pool: + description: |- + pool is the rados pool name. + Default is rbd. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: boolean + secretRef: + description: |- + secretRef is name of the authentication secret for RBDUser. If provided + overrides keyring. + Default is nil. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is the rados user name. + Default is admin. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + required: + - image + - monitors + type: object + scaleIO: + description: scaleIO represents a ScaleIO persistent volume + attached and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". + Default is "xfs". + type: string + gateway: + description: gateway is the host address of the ScaleIO + API Gateway. + type: string + protectionDomain: + description: protectionDomain is the name of the ScaleIO + Protection Domain for the configured storage. + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef references to the secret for ScaleIO user and other + sensitive information. If this is not provided, Login operation will fail. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + sslEnabled: + description: sslEnabled Flag enable/disable SSL communication + with Gateway, default false + type: boolean + storageMode: + description: |- + storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. + Default is ThinProvisioned. + type: string + storagePool: + description: storagePool is the ScaleIO Storage Pool associated + with the protection domain. + type: string + system: + description: system is the name of the storage system + as configured in ScaleIO. + type: string + volumeName: + description: |- + volumeName is the name of a volume already created in the ScaleIO system + that is associated with this volume source. + type: string + required: + - gateway + - secretRef + - system + type: object + secret: + description: |- + secret represents a secret that should populate this volume. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + properties: + defaultMode: + description: |- + defaultMode is Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values + for mode bits. Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items If unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + optional: + description: optional field specify whether the Secret + or its keys must be defined + type: boolean + secretName: + description: |- + secretName is the name of the secret in the pod's namespace to use. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + type: string + type: object + storageos: + description: storageOS represents a StorageOS volume attached + and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef specifies the secret to use for obtaining the StorageOS API + credentials. If not specified, default values will be attempted. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + volumeName: + description: |- + volumeName is the human-readable name of the StorageOS volume. Volume + names are only unique within a namespace. + type: string + volumeNamespace: + description: |- + volumeNamespace specifies the scope of the volume within StorageOS. If no + namespace is specified then the Pod's namespace will be used. This allows the + Kubernetes name scoping to be mirrored within StorageOS for tighter integration. + Set VolumeName to any name to override the default behaviour. + Set to "default" if you are not using namespaces within StorageOS. + Namespaces that do not pre-exist within StorageOS will be created. + type: string + type: object + vsphereVolume: + description: vsphereVolume represents a vSphere volume attached + and mounted on kubelets host machine + properties: + fsType: + description: |- + fsType is filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + storagePolicyID: + description: storagePolicyID is the storage Policy Based + Management (SPBM) profile ID associated with the StoragePolicyName. + type: string + storagePolicyName: + description: storagePolicyName is the storage Policy Based + Management (SPBM) profile name. + type: string + volumePath: + description: volumePath is the path that identifies vSphere + volume vmdk + type: string + required: + - volumePath + type: object + type: object + type: object + familyName: + description: |- + FamilyName represents the model type, like llama2, which will be auto injected + to the labels with the key of `llmaz.io/model-family-name`. + type: string + inferenceFlavors: + description: |- + InferenceFlavors represents the accelerator requirements to serve the model. + Flavors are fungible following the priority of slice order. + items: + description: |- + Flavor defines the accelerator requirements for a model and the necessary parameters + in autoscaling. Right now, it will be used in two places: + - Pod scheduling with node selectors specified. + - Cluster autoscaling with essential parameters provided. + properties: + name: + description: Name represents the flavor name, which will be + used in model claim. + type: string + nodeSelector: + description: |- + NodeSelector defines the labels to filter specified nodes, like + cloud-provider.com/accelerator: nvidia-a100. + NodeSelector will be auto injected to the Pods as scheduling primitives. + items: + description: |- + A node selector represents the union of the results of one or more label queries + over a set of nodes; that is, it represents the OR of the selectors represented + by the node selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. + The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + x-kubernetes-map-type: atomic + type: array + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: array + params: + additionalProperties: + type: string + description: |- + Params stores other useful parameters and will be consumed by the autoscaling components + like cluster-autoscaler, Karpenter. + E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with + instance-type: p4d.24xlarge for AWS. + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests defines the required resources to serve the model, like nvidia.com/gpu: 8. + Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here, + or a default value will be used based on the community recommendations. + type: object + required: + - name + type: object + type: array + required: + - dataSource + - familyName + type: object + status: + description: ModelStatus defines the observed state of Model + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index b425cc1..cc49ef3 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -2,18 +2,24 @@ # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default resources: -- bases/llmaz.inftyai.io_inferences.yaml +- bases/inference.llmaz.io_services.yaml +- bases/inference.llmaz.io_playgrounds.yaml +- bases/llmaz.io_models.yaml #+kubebuilder:scaffold:crdkustomizeresource patches: # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. # patches here are for enabling the conversion webhook for each CRD -#- path: patches/webhook_in_inferences.yaml +#- path: patches/webhook_in_inference_services.yaml +#- path: patches/webhook_in_inference_playgrounds.yaml +#- path: patches/webhook_in__models.yaml #+kubebuilder:scaffold:crdkustomizewebhookpatch # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. # patches here are for enabling the CA injection for each CRD -#- path: patches/cainjection_in_inferences.yaml +#- path: patches/cainjection_in_inference_services.yaml +#- path: patches/cainjection_in_inference_playgrounds.yaml +#- path: patches/cainjection_in__models.yaml #+kubebuilder:scaffold:crdkustomizecainjectionpatch # [WEBHOOK] To enable webhook, uncomment the following section diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 77a8cc3..52e9051 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -7,8 +7,8 @@ metadata: app.kubernetes.io/name: servicemonitor app.kubernetes.io/instance: controller-manager-metrics-monitor app.kubernetes.io/component: metrics - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: controller-manager-metrics-monitor namespace: system diff --git a/config/rbac/_model_editor_role.yaml b/config/rbac/_model_editor_role.yaml new file mode 100644 index 0000000..7c51e1a --- /dev/null +++ b/config/rbac/_model_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit models. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: model-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: model-editor-role +rules: +- apiGroups: + - llmaz.io + resources: + - models + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - llmaz.io + resources: + - models/status + verbs: + - get diff --git a/config/rbac/_model_viewer_role.yaml b/config/rbac/_model_viewer_role.yaml new file mode 100644 index 0000000..b8741f4 --- /dev/null +++ b/config/rbac/_model_viewer_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to view models. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: model-viewer-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: model-viewer-role +rules: +- apiGroups: + - llmaz.io + resources: + - models + verbs: + - get + - list + - watch +- apiGroups: + - llmaz.io + resources: + - models/status + verbs: + - get diff --git a/config/rbac/auth_proxy_client_clusterrole.yaml b/config/rbac/auth_proxy_client_clusterrole.yaml index 2fd2c1c..a6b698e 100644 --- a/config/rbac/auth_proxy_client_clusterrole.yaml +++ b/config/rbac/auth_proxy_client_clusterrole.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrole app.kubernetes.io/instance: metrics-reader app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: metrics-reader rules: diff --git a/config/rbac/auth_proxy_role.yaml b/config/rbac/auth_proxy_role.yaml index a43ea4c..3eca335 100644 --- a/config/rbac/auth_proxy_role.yaml +++ b/config/rbac/auth_proxy_role.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrole app.kubernetes.io/instance: proxy-role app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: proxy-role rules: diff --git a/config/rbac/auth_proxy_role_binding.yaml b/config/rbac/auth_proxy_role_binding.yaml index bc79daf..53c2976 100644 --- a/config/rbac/auth_proxy_role_binding.yaml +++ b/config/rbac/auth_proxy_role_binding.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrolebinding app.kubernetes.io/instance: proxy-rolebinding app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: proxy-rolebinding roleRef: diff --git a/config/rbac/auth_proxy_service.yaml b/config/rbac/auth_proxy_service.yaml index 46f6f74..30bd201 100644 --- a/config/rbac/auth_proxy_service.yaml +++ b/config/rbac/auth_proxy_service.yaml @@ -6,8 +6,8 @@ metadata: app.kubernetes.io/name: service app.kubernetes.io/instance: controller-manager-metrics-service app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: controller-manager-metrics-service namespace: system diff --git a/config/rbac/inference_editor_role.yaml b/config/rbac/inference_editor_role.yaml index 0b43afa..70cbcba 100644 --- a/config/rbac/inference_editor_role.yaml +++ b/config/rbac/inference_editor_role.yaml @@ -1,20 +1,20 @@ -# permissions for end users to edit inferences. +# permissions for end users to edit services. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: app.kubernetes.io/name: clusterrole - app.kubernetes.io/instance: inference-editor-role + app.kubernetes.io/instance: service-editor-role app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize - name: inference-editor-role + name: service-editor-role rules: - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - inferences + - services verbs: - create - delete @@ -24,8 +24,8 @@ rules: - update - watch - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - inferences/status + - services/status verbs: - get diff --git a/config/rbac/inference_playground_editor_role.yaml b/config/rbac/inference_playground_editor_role.yaml new file mode 100644 index 0000000..2638557 --- /dev/null +++ b/config/rbac/inference_playground_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit playgrounds. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: playground-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: playground-editor-role +rules: +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds/status + verbs: + - get diff --git a/config/rbac/inference_playground_viewer_role.yaml b/config/rbac/inference_playground_viewer_role.yaml new file mode 100644 index 0000000..6e2237e --- /dev/null +++ b/config/rbac/inference_playground_viewer_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to view playgrounds. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: playground-viewer-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: playground-viewer-role +rules: +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds + verbs: + - get + - list + - watch +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds/status + verbs: + - get diff --git a/config/rbac/inference_service_editor_role.yaml b/config/rbac/inference_service_editor_role.yaml new file mode 100644 index 0000000..70cbcba --- /dev/null +++ b/config/rbac/inference_service_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit services. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: service-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: service-editor-role +rules: +- apiGroups: + - inference.llmaz.io + resources: + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - inference.llmaz.io + resources: + - services/status + verbs: + - get diff --git a/config/rbac/inference_viewer_role.yaml b/config/rbac/inference_viewer_role.yaml index a7adab2..580bcaf 100644 --- a/config/rbac/inference_viewer_role.yaml +++ b/config/rbac/inference_viewer_role.yaml @@ -1,27 +1,47 @@ +<<<<<<<< Updated upstream:config/rbac/inference_viewer_role.yaml # permissions for end users to view inferences. +======== +# permissions for end users to view services. +>>>>>>>> Stashed changes:config/rbac/inference_service_viewer_role.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: app.kubernetes.io/name: clusterrole +<<<<<<<< Updated upstream:config/rbac/inference_viewer_role.yaml app.kubernetes.io/instance: inference-viewer-role +======== + app.kubernetes.io/instance: service-viewer-role +>>>>>>>> Stashed changes:config/rbac/inference_service_viewer_role.yaml app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize +<<<<<<<< Updated upstream:config/rbac/inference_viewer_role.yaml name: inference-viewer-role +======== + name: service-viewer-role +>>>>>>>> Stashed changes:config/rbac/inference_service_viewer_role.yaml rules: - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: +<<<<<<<< Updated upstream:config/rbac/inference_viewer_role.yaml - inferences +======== + - services +>>>>>>>> Stashed changes:config/rbac/inference_service_viewer_role.yaml verbs: - get - list - watch - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: +<<<<<<<< Updated upstream:config/rbac/inference_viewer_role.yaml - inferences/status +======== + - services/status +>>>>>>>> Stashed changes:config/rbac/inference_service_viewer_role.yaml verbs: - get diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index 1d231ab..1b4f59e 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -6,8 +6,8 @@ metadata: app.kubernetes.io/name: role app.kubernetes.io/instance: leader-election-role app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: leader-election-role rules: diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index 40c0ba5..9e982d1 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: rolebinding app.kubernetes.io/instance: leader-election-rolebinding app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: leader-election-rolebinding roleRef: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 9437041..da058b7 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -5,9 +5,9 @@ metadata: name: manager-role rules: - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - inferences + - playgrounds verbs: - create - delete @@ -17,15 +17,67 @@ rules: - update - watch - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - inferences/finalizers + - playgrounds/finalizers verbs: - update - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - inferences/status + - playgrounds/status + verbs: + - get + - patch + - update +- apiGroups: + - inference.llmaz.io + resources: + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - inference.llmaz.io + resources: + - services/finalizers + verbs: + - update +- apiGroups: + - inference.llmaz.io + resources: + - services/status + verbs: + - get + - patch + - update +- apiGroups: + - llmaz.io + resources: + - models + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - llmaz.io + resources: + - models/finalizers + verbs: + - update +- apiGroups: + - llmaz.io + resources: + - models/status verbs: - get - patch diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index 6e4138e..6911993 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrolebinding app.kubernetes.io/instance: manager-rolebinding app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: manager-rolebinding roleRef: diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index 3a2627b..09904ae 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: serviceaccount app.kubernetes.io/instance: controller-manager-sa app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: controller-manager namespace: system diff --git a/config/samples/llmaz_v1alpha1_inference.yaml b/config/samples/_v1alpha1_model.yaml similarity index 51% rename from config/samples/llmaz_v1alpha1_inference.yaml rename to config/samples/_v1alpha1_model.yaml index 99e3c89..1c08323 100644 --- a/config/samples/llmaz_v1alpha1_inference.yaml +++ b/config/samples/_v1alpha1_model.yaml @@ -1,12 +1,12 @@ -apiVersion: llmaz.inftyai.io/v1alpha1 -kind: Inference +apiVersion: llmaz.io/v1alpha1 +kind: Model metadata: labels: - app.kubernetes.io/name: inference - app.kubernetes.io/instance: inference-sample + app.kubernetes.io/name: model + app.kubernetes.io/instance: model-sample app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize app.kubernetes.io/created-by: llmaz - name: inference-sample + name: model-sample spec: # TODO(user): Add fields here diff --git a/config/samples/inference_v1alpha1_playground.yaml b/config/samples/inference_v1alpha1_playground.yaml new file mode 100644 index 0000000..e1eb086 --- /dev/null +++ b/config/samples/inference_v1alpha1_playground.yaml @@ -0,0 +1,12 @@ +apiVersion: inference.llmaz.io/v1alpha1 +kind: Playground +metadata: + labels: + app.kubernetes.io/name: playground + app.kubernetes.io/instance: playground-sample + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: llmaz + name: playground-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/inference_v1alpha1_service.yaml b/config/samples/inference_v1alpha1_service.yaml new file mode 100644 index 0000000..0d0c053 --- /dev/null +++ b/config/samples/inference_v1alpha1_service.yaml @@ -0,0 +1,12 @@ +apiVersion: inference.llmaz.io/v1alpha1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: service + app.kubernetes.io/instance: service-sample + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: llmaz + name: service-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 2770577..13cdf98 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -1,4 +1,6 @@ ## Append samples of your project ## resources: -- llmaz_v1alpha1_inference.yaml +- inference_v1alpha1_service.yaml +- inference_v1alpha1_playground.yaml +- _v1alpha1_model.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/go.mod b/go.mod index bad51f5..c975355 100644 --- a/go.mod +++ b/go.mod @@ -1,16 +1,18 @@ -module inftyai.io/llmaz +module inftyai.com/llmaz -go 1.22 +go 1.22.0 toolchain go1.22.1 require ( - github.com/onsi/ginkgo/v2 v2.16.0 - github.com/onsi/gomega v1.31.1 - k8s.io/apimachinery v0.29.2 - k8s.io/client-go v0.29.2 - sigs.k8s.io/controller-runtime v0.17.2 - sigs.k8s.io/lws v0.1.0 + github.com/onsi/ginkgo/v2 v2.19.0 + github.com/onsi/gomega v1.33.1 + k8s.io/api v0.29.5 + k8s.io/apimachinery v0.29.5 + k8s.io/client-go v0.29.5 + sigs.k8s.io/controller-runtime v0.17.3 + sigs.k8s.io/lws v0.3.0 + ) require ( @@ -25,14 +27,14 @@ require ( github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.22.3 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect github.com/google/uuid v1.3.0 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -51,24 +53,23 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.26.0 // indirect golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect - golang.org/x/net v0.20.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.17.0 // indirect + golang.org/x/tools v0.21.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.29.2 // indirect - k8s.io/apiextensions-apiserver v0.29.2 // indirect - k8s.io/component-base v0.29.2 // indirect - k8s.io/klog/v2 v2.110.1 // indirect - k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/apiextensions-apiserver v0.29.5 // indirect + k8s.io/component-base v0.29.5 // indirect + k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect diff --git a/go.sum b/go.sum index f9731c3..ca568f0 100644 --- a/go.sum +++ b/go.sum @@ -2,9 +2,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -17,7 +14,6 @@ github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1 github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -28,30 +24,27 @@ github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2Kv github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -78,10 +71,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.16.0 h1:7q1w9frJDzninhXxjZd+Y/x54XNjG/UlRLIYPZafsPM= -github.com/onsi/ginkgo/v2 v2.16.0/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= -github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -102,7 +95,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -128,8 +120,8 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -137,25 +129,24 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= -golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -164,10 +155,8 @@ gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -180,28 +169,28 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= -k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= -k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg= -k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8= -k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= -k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= -k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg= -k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA= -k8s.io/component-base v0.29.2 h1:lpiLyuvPA9yV1aQwGLENYyK7n/8t6l3nn3zAtFTJYe8= -k8s.io/component-base v0.29.2/go.mod h1:BfB3SLrefbZXiBfbM+2H1dlat21Uewg/5qtKOl8degM= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= -k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= -k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/api v0.29.5 h1:levS+umUigHCfI3riD36pMY1vQEbrzh4r1ivVWAhHaI= +k8s.io/api v0.29.5/go.mod h1:7b18TtPcJzdjk7w5zWyIHgoAtpGeRvGGASxlS7UZXdQ= +k8s.io/apiextensions-apiserver v0.29.5 h1:njDywexhE6n+1NEl3A4axT0TMQHREnndrk3/ztdWcNE= +k8s.io/apiextensions-apiserver v0.29.5/go.mod h1:pfIvij+MH9a8NQKtW7MD4EFnzvUjJ1ZQsDL8wuP8fnc= +k8s.io/apimachinery v0.29.5 h1:Hofa2BmPfpoT+IyDTlcPdCHSnHtEQMoJYGVoQpRTfv4= +k8s.io/apimachinery v0.29.5/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= +k8s.io/client-go v0.29.5 h1:nlASXmPQy190qTteaVP31g3c/wi2kycznkTP7Sv1zPc= +k8s.io/client-go v0.29.5/go.mod h1:aY5CnqUUvXYccJhm47XHoPcRyX6vouHdIBHaKZGTbK4= +k8s.io/component-base v0.29.5 h1:Ptj8AzG+p8c2a839XriHwxakDpZH9uvIgYz+o1agjg8= +k8s.io/component-base v0.29.5/go.mod h1:9nBUoPxW/yimISIgAG7sJDrUGJlu7t8HnDafIrOdU8Q= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/controller-runtime v0.17.2 h1:FwHwD1CTUemg0pW2otk7/U5/i5m2ymzvOXdbeGOUvw0= -sigs.k8s.io/controller-runtime v0.17.2/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s= +sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= +sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/lws v0.1.0 h1:fcsAHN6BcjwcH5lgjZphVRjFy2Ack550a5CpWUqBuRQ= -sigs.k8s.io/lws v0.1.0/go.mod h1:9wojYpN6WFa6JUWccK0DzNHuYvqa9a/npKKlVENmY1I= +sigs.k8s.io/lws v0.3.0 h1:PtjiDHZWCxAeMyrsmPNN0i7KAVf6ocVEQFcojPWeA+k= +sigs.k8s.io/lws v0.3.0/go.mod h1:/R1Q2LB2eg6t9mX5M6V4HLkeucxBFgOyaKkSGh/FGAY= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt index 65b8622..ff72ff2 100644 --- a/hack/boilerplate.go.txt +++ b/hack/boilerplate.go.txt @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/internal/controller/inference/playground_controller.go b/internal/controller/inference/playground_controller.go new file mode 100644 index 0000000..1b0f6e9 --- /dev/null +++ b/internal/controller/inference/playground_controller.go @@ -0,0 +1,62 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inference + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" +) + +// PlaygroundReconciler reconciles a Playground object +type PlaygroundReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Playground object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (r *PlaygroundReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // TODO(user): your logic here + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *PlaygroundReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&inferencev1alpha1.Playground{}). + Complete(r) +} diff --git a/internal/controller/inference_controller.go b/internal/controller/inference/service_controller.go similarity index 70% rename from internal/controller/inference_controller.go rename to internal/controller/inference/service_controller.go index 960800b..633710b 100644 --- a/internal/controller/inference_controller.go +++ b/internal/controller/inference/service_controller.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package controller +package inference import ( "context" @@ -24,29 +24,29 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - llmaz "inftyai.io/llmaz/api/v1alpha1" + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" ) -// InferenceReconciler reconciles a Inference object -type InferenceReconciler struct { +// ServiceReconciler reconciles a Service object +type ServiceReconciler struct { client.Client Scheme *runtime.Scheme } -//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=inferences,verbs=get;list;watch;create;update;patch;delete -//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=inferences/status,verbs=get;update;patch -//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=inferences/finalizers,verbs=update +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by -// the Inference object against the actual cluster state, and then +// the Service object against the actual cluster state, and then // perform operations to make the cluster state reflect the state specified by // the user. // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile -func (r *InferenceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = log.FromContext(ctx) // TODO(user): your logic here @@ -55,8 +55,8 @@ func (r *InferenceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } // SetupWithManager sets up the controller with the Manager. -func (r *InferenceReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *ServiceReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&llmaz.Inference{}). + For(&inferencev1alpha1.Service{}). Complete(r) } diff --git a/internal/controller/inference/suite_test.go b/internal/controller/inference/suite_test.go new file mode 100644 index 0000000..c724a03 --- /dev/null +++ b/internal/controller/inference/suite_test.go @@ -0,0 +1,90 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inference + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", + fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = inferencev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/controller/model_controller.go b/internal/controller/model_controller.go new file mode 100644 index 0000000..46a97f9 --- /dev/null +++ b/internal/controller/model_controller.go @@ -0,0 +1,62 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1" +) + +// ModelReconciler reconciles a Model object +type ModelReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=llmaz.io,resources=models,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=llmaz.io,resources=models/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=llmaz.io,resources=models/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Model object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (r *ModelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // TODO(user): your logic here + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *ModelReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&llmaziov1alpha1.Model{}). + Complete(r) +} diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 4b5708b..f8d050b 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - llmaz "inftyai.io/llmaz/api/v1alpha1" + llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1" //+kubebuilder:scaffold:imports ) @@ -72,7 +72,7 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = llmaz.AddToScheme(scheme.Scheme) + err = llmaziov1alpha1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) //+kubebuilder:scaffold:scheme