-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: kerthcet <[email protected]>
- Loading branch information
Showing
46 changed files
with
4,102 additions
and
313 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,9 @@ | ||
# llmaz | ||
|
||
llmaz, pronounced as /lima:z/, is a building block for users to serve their LLMs on Kubernetes in a breeze. | ||
☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving. | ||
|
||
## Roadmap | ||
|
||
- Serverless support | ||
- CLI tool | ||
- Gateway support |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group | ||
// +kubebuilder:object:generate=true | ||
// +groupName=inference.llmaz.io | ||
package v1alpha1 | ||
|
||
import ( | ||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
"sigs.k8s.io/controller-runtime/pkg/scheme" | ||
) | ||
|
||
var ( | ||
// GroupVersion is group version used to register these objects | ||
GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"} | ||
|
||
// SchemeBuilder is used to add go types to the GroupVersionKind scheme | ||
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} | ||
|
||
// AddToScheme adds the types in this group-version to the given scheme. | ||
AddToScheme = SchemeBuilder.AddToScheme | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
import ( | ||
api "inftyai.com/llmaz/api/v1alpha1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
// PlaygroundSpec defines the desired state of Playground | ||
type PlaygroundSpec struct { | ||
// Replicas represents the replica number of inference workloads. | ||
// +kubebuilder:default=1 | ||
// +optional | ||
Replicas *int32 `json:"replicas,omitempty"` | ||
// ModelsClaim represents the references to multiple models. | ||
ModelsClaim api.ModelsClaim `json:"modelsClaim"` | ||
// BackendConfig represents the inference backend configuration | ||
// under the hood, e.g. vLLM, which is the default backend. | ||
// +optional | ||
BackendConfig *BackendConfig `json:"backendConfig,omitempty"` | ||
// ElasticConfig defines the configuration for elastic usage, | ||
// e.g. the max/min replicas. Default to 0 ~ Inf+. | ||
// +optional | ||
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` | ||
} | ||
|
||
// PlaygroundStatus defines the observed state of Playground | ||
type PlaygroundStatus struct { | ||
// Conditions represents the Inference condition. | ||
Conditions []metav1.Condition `json:"conditions,omitempty"` | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
//+kubebuilder:subresource:status | ||
|
||
// Playground is the Schema for the playgrounds API | ||
type Playground struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
||
Spec PlaygroundSpec `json:"spec,omitempty"` | ||
Status PlaygroundStatus `json:"status,omitempty"` | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
|
||
// PlaygroundList contains a list of Playground | ||
type PlaygroundList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
Items []Playground `json:"items"` | ||
} | ||
|
||
func init() { | ||
SchemeBuilder.Register(&Playground{}, &PlaygroundList{}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
import ( | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
lws "sigs.k8s.io/lws/api/leaderworkerset/v1" | ||
|
||
api "inftyai.com/llmaz/api/v1alpha1" | ||
) | ||
|
||
// ServiceSpec defines the desired state of Service. | ||
// Service controller will maintain multi-flavor of workloads with | ||
// different accelerators for cost or performance considerations. | ||
type ServiceSpec struct { | ||
// ModelsClaim represents the references to multiple models. | ||
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors | ||
// will be applied to the workload if not exist. | ||
ModelsClaim api.ModelsClaim `json:"modelsClaim"` | ||
// WorkloadTemplate defines the underlying workload layout and configuration. | ||
WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"` | ||
// ElasticConfig defines the configuration for elastic usage, | ||
// e.g. the max/min replicas. Default to 0 ~ Inf+. | ||
// +optional | ||
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` | ||
} | ||
|
||
// ServiceStatus defines the observed state of Service | ||
type ServiceStatus struct { | ||
// Conditions represents the Inference condition. | ||
Conditions []metav1.Condition `json:"conditions,omitempty"` | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
//+kubebuilder:subresource:status | ||
|
||
// Service is the Schema for the services API | ||
type Service struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
||
Spec ServiceSpec `json:"spec,omitempty"` | ||
Status ServiceStatus `json:"status,omitempty"` | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
|
||
// ServiceList contains a list of Service | ||
type ServiceList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
Items []Service `json:"items"` | ||
} | ||
|
||
func init() { | ||
SchemeBuilder.Register(&Service{}, &ServiceList{}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
import corev1 "k8s.io/api/core/v1" | ||
|
||
type BackendName string | ||
|
||
type BackendConfig struct { | ||
// Name represents the inference backend under the hood, e.g. vLLM. | ||
// +kubebuilder:validation:Enum={vllm} | ||
// +kubebuilder:default=vllm | ||
// +optional | ||
Name *BackendName `json:"name"` | ||
// Version represents the backend version if you want a different one | ||
// from the default version. | ||
// +optional | ||
Version *string `json:"version,omitempty"` | ||
// Args represents the arguments passed to the backend. | ||
// +optional | ||
Args []string `json:"args,omitempty"` | ||
// Envs represents the environments set to the container. | ||
// +optional | ||
Envs []corev1.EnvVar `json:"envs,omitempty"` | ||
} | ||
|
||
type ElasticConfig struct { | ||
// MinReplicas indicates the minimum number of inference workloads based on the traffic. | ||
// Default to nil means we can scale down the instances to 0. | ||
// +optional | ||
MinReplicas *int32 `json:"minReplicas,omitempty"` | ||
// MaxReplicas indicates the maximum number of inference workloads based on the traffic. | ||
// Default to nil means there's no limit for the instance number. | ||
// +optional | ||
MaxReplicas *int32 `json:"maxReplicas,omitempty"` | ||
} |
Oops, something went wrong.