Skip to content

Commit

Permalink
Add Inference API
Browse files Browse the repository at this point in the history
Signed-off-by: kerthcet <[email protected]>
  • Loading branch information
kerthcet committed Jul 12, 2024
1 parent 460baf4 commit 01a112a
Show file tree
Hide file tree
Showing 46 changed files with 4,102 additions and 313 deletions.
30 changes: 24 additions & 6 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,37 @@
# This file is used to track the info used to scaffold your project
# and allow the plugins properly work.
# More info: https://book.kubebuilder.io/reference/project-config.html
domain: inftyai.io
domain: llmaz.io
layout:
- go.kubebuilder.io/v4
multigroup: true
projectName: llmaz
repo: inftyai.io/llmaz
repo: inftyai.com/llmaz
resources:
- api:
crdVersion: v1
namespaced: true
controller: true
domain: inftyai.io
group: llmaz
kind: Inference
path: inftyai.io/llmaz/api/v1alpha1
domain: llmaz.io
group: inference
kind: Service
path: inftyai.com/llmaz/api/inference/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: llmaz.io
group: inference
kind: Playground
path: inftyai.com/llmaz/api/inference/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: llmaz.io
kind: Model
path: inftyai.com/llmaz/api/v1alpha1
version: v1alpha1
version: "3"
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# llmaz

llmaz, pronounced as /lima:z/, is a building block for users to serve their LLMs on Kubernetes in a breeze.
☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving.

## Roadmap

- Serverless support
- CLI tool
- Gateway support
36 changes: 36 additions & 0 deletions api/inference/v1alpha1/groupversion_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group
// +kubebuilder:object:generate=true
// +groupName=inference.llmaz.io
package v1alpha1

import (
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
71 changes: 71 additions & 0 deletions api/inference/v1alpha1/playground_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
api "inftyai.com/llmaz/api/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// PlaygroundSpec defines the desired state of Playground
type PlaygroundSpec struct {
// Replicas represents the replica number of inference workloads.
// +kubebuilder:default=1
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// ModelsClaim represents the references to multiple models.
ModelsClaim api.ModelsClaim `json:"modelsClaim"`
// BackendConfig represents the inference backend configuration
// under the hood, e.g. vLLM, which is the default backend.
// +optional
BackendConfig *BackendConfig `json:"backendConfig,omitempty"`
// ElasticConfig defines the configuration for elastic usage,
// e.g. the max/min replicas. Default to 0 ~ Inf+.
// +optional
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
}

// PlaygroundStatus defines the observed state of Playground
type PlaygroundStatus struct {
// Conditions represents the Inference condition.
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status

// Playground is the Schema for the playgrounds API
type Playground struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec PlaygroundSpec `json:"spec,omitempty"`
Status PlaygroundStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// PlaygroundList contains a list of Playground
type PlaygroundList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []Playground `json:"items"`
}

func init() {
SchemeBuilder.Register(&Playground{}, &PlaygroundList{})
}
71 changes: 71 additions & 0 deletions api/inference/v1alpha1/service_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
lws "sigs.k8s.io/lws/api/leaderworkerset/v1"

api "inftyai.com/llmaz/api/v1alpha1"
)

// ServiceSpec defines the desired state of Service.
// Service controller will maintain multi-flavor of workloads with
// different accelerators for cost or performance considerations.
type ServiceSpec struct {
// ModelsClaim represents the references to multiple models.
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
// will be applied to the workload if not exist.
ModelsClaim api.ModelsClaim `json:"modelsClaim"`
// WorkloadTemplate defines the underlying workload layout and configuration.
WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
// ElasticConfig defines the configuration for elastic usage,
// e.g. the max/min replicas. Default to 0 ~ Inf+.
// +optional
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
}

// ServiceStatus defines the observed state of Service
type ServiceStatus struct {
// Conditions represents the Inference condition.
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status

// Service is the Schema for the services API
type Service struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec ServiceSpec `json:"spec,omitempty"`
Status ServiceStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// ServiceList contains a list of Service
type ServiceList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []Service `json:"items"`
}

func init() {
SchemeBuilder.Register(&Service{}, &ServiceList{})
}
50 changes: 50 additions & 0 deletions api/inference/v1alpha1/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import corev1 "k8s.io/api/core/v1"

type BackendName string

type BackendConfig struct {
// Name represents the inference backend under the hood, e.g. vLLM.
// +kubebuilder:validation:Enum={vllm}
// +kubebuilder:default=vllm
// +optional
Name *BackendName `json:"name"`
// Version represents the backend version if you want a different one
// from the default version.
// +optional
Version *string `json:"version,omitempty"`
// Args represents the arguments passed to the backend.
// +optional
Args []string `json:"args,omitempty"`
// Envs represents the environments set to the container.
// +optional
Envs []corev1.EnvVar `json:"envs,omitempty"`
}

type ElasticConfig struct {
// MinReplicas indicates the minimum number of inference workloads based on the traffic.
// Default to nil means we can scale down the instances to 0.
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
// Default to nil means there's no limit for the instance number.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
}
Loading

0 comments on commit 01a112a

Please sign in to comment.