Skip to content

Commit 01a112a

Browse files
committed
Add Inference API
Signed-off-by: kerthcet <[email protected]>
1 parent 460baf4 commit 01a112a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+4102
-313
lines changed

PROJECT

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,37 @@
22
# This file is used to track the info used to scaffold your project
33
# and allow the plugins properly work.
44
# More info: https://book.kubebuilder.io/reference/project-config.html
5-
domain: inftyai.io
5+
domain: llmaz.io
66
layout:
77
- go.kubebuilder.io/v4
8+
multigroup: true
89
projectName: llmaz
9-
repo: inftyai.io/llmaz
10+
repo: inftyai.com/llmaz
1011
resources:
1112
- api:
1213
crdVersion: v1
1314
namespaced: true
1415
controller: true
15-
domain: inftyai.io
16-
group: llmaz
17-
kind: Inference
18-
path: inftyai.io/llmaz/api/v1alpha1
16+
domain: llmaz.io
17+
group: inference
18+
kind: Service
19+
path: inftyai.com/llmaz/api/inference/v1alpha1
20+
version: v1alpha1
21+
- api:
22+
crdVersion: v1
23+
namespaced: true
24+
controller: true
25+
domain: llmaz.io
26+
group: inference
27+
kind: Playground
28+
path: inftyai.com/llmaz/api/inference/v1alpha1
29+
version: v1alpha1
30+
- api:
31+
crdVersion: v1
32+
namespaced: true
33+
controller: true
34+
domain: llmaz.io
35+
kind: Model
36+
path: inftyai.com/llmaz/api/v1alpha1
1937
version: v1alpha1
2038
version: "3"

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
# llmaz
22

3-
llmaz, pronounced as /lima:z/, is a building block for users to serve their LLMs on Kubernetes in a breeze.
3+
☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving.
4+
5+
## Roadmap
6+
7+
- Serverless support
8+
- CLI tool
9+
- Gateway support
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group
18+
// +kubebuilder:object:generate=true
19+
// +groupName=inference.llmaz.io
20+
package v1alpha1
21+
22+
import (
23+
"k8s.io/apimachinery/pkg/runtime/schema"
24+
"sigs.k8s.io/controller-runtime/pkg/scheme"
25+
)
26+
27+
var (
28+
// GroupVersion is group version used to register these objects
29+
GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"}
30+
31+
// SchemeBuilder is used to add go types to the GroupVersionKind scheme
32+
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
33+
34+
// AddToScheme adds the types in this group-version to the given scheme.
35+
AddToScheme = SchemeBuilder.AddToScheme
36+
)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
api "inftyai.com/llmaz/api/v1alpha1"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
)
23+
24+
// PlaygroundSpec defines the desired state of Playground
25+
type PlaygroundSpec struct {
26+
// Replicas represents the replica number of inference workloads.
27+
// +kubebuilder:default=1
28+
// +optional
29+
Replicas *int32 `json:"replicas,omitempty"`
30+
// ModelsClaim represents the references to multiple models.
31+
ModelsClaim api.ModelsClaim `json:"modelsClaim"`
32+
// BackendConfig represents the inference backend configuration
33+
// under the hood, e.g. vLLM, which is the default backend.
34+
// +optional
35+
BackendConfig *BackendConfig `json:"backendConfig,omitempty"`
36+
// ElasticConfig defines the configuration for elastic usage,
37+
// e.g. the max/min replicas. Default to 0 ~ Inf+.
38+
// +optional
39+
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
40+
}
41+
42+
// PlaygroundStatus defines the observed state of Playground
43+
type PlaygroundStatus struct {
44+
// Conditions represents the Inference condition.
45+
Conditions []metav1.Condition `json:"conditions,omitempty"`
46+
}
47+
48+
//+kubebuilder:object:root=true
49+
//+kubebuilder:subresource:status
50+
51+
// Playground is the Schema for the playgrounds API
52+
type Playground struct {
53+
metav1.TypeMeta `json:",inline"`
54+
metav1.ObjectMeta `json:"metadata,omitempty"`
55+
56+
Spec PlaygroundSpec `json:"spec,omitempty"`
57+
Status PlaygroundStatus `json:"status,omitempty"`
58+
}
59+
60+
//+kubebuilder:object:root=true
61+
62+
// PlaygroundList contains a list of Playground
63+
type PlaygroundList struct {
64+
metav1.TypeMeta `json:",inline"`
65+
metav1.ListMeta `json:"metadata,omitempty"`
66+
Items []Playground `json:"items"`
67+
}
68+
69+
func init() {
70+
SchemeBuilder.Register(&Playground{}, &PlaygroundList{})
71+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
lws "sigs.k8s.io/lws/api/leaderworkerset/v1"
22+
23+
api "inftyai.com/llmaz/api/v1alpha1"
24+
)
25+
26+
// ServiceSpec defines the desired state of Service.
27+
// Service controller will maintain multi-flavor of workloads with
28+
// different accelerators for cost or performance considerations.
29+
type ServiceSpec struct {
30+
// ModelsClaim represents the references to multiple models.
31+
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
32+
// will be applied to the workload if not exist.
33+
ModelsClaim api.ModelsClaim `json:"modelsClaim"`
34+
// WorkloadTemplate defines the underlying workload layout and configuration.
35+
WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
36+
// ElasticConfig defines the configuration for elastic usage,
37+
// e.g. the max/min replicas. Default to 0 ~ Inf+.
38+
// +optional
39+
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
40+
}
41+
42+
// ServiceStatus defines the observed state of Service
43+
type ServiceStatus struct {
44+
// Conditions represents the Inference condition.
45+
Conditions []metav1.Condition `json:"conditions,omitempty"`
46+
}
47+
48+
//+kubebuilder:object:root=true
49+
//+kubebuilder:subresource:status
50+
51+
// Service is the Schema for the services API
52+
type Service struct {
53+
metav1.TypeMeta `json:",inline"`
54+
metav1.ObjectMeta `json:"metadata,omitempty"`
55+
56+
Spec ServiceSpec `json:"spec,omitempty"`
57+
Status ServiceStatus `json:"status,omitempty"`
58+
}
59+
60+
//+kubebuilder:object:root=true
61+
62+
// ServiceList contains a list of Service
63+
type ServiceList struct {
64+
metav1.TypeMeta `json:",inline"`
65+
metav1.ListMeta `json:"metadata,omitempty"`
66+
Items []Service `json:"items"`
67+
}
68+
69+
func init() {
70+
SchemeBuilder.Register(&Service{}, &ServiceList{})
71+
}

api/inference/v1alpha1/types.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import corev1 "k8s.io/api/core/v1"
20+
21+
type BackendName string
22+
23+
type BackendConfig struct {
24+
// Name represents the inference backend under the hood, e.g. vLLM.
25+
// +kubebuilder:validation:Enum={vllm}
26+
// +kubebuilder:default=vllm
27+
// +optional
28+
Name *BackendName `json:"name"`
29+
// Version represents the backend version if you want a different one
30+
// from the default version.
31+
// +optional
32+
Version *string `json:"version,omitempty"`
33+
// Args represents the arguments passed to the backend.
34+
// +optional
35+
Args []string `json:"args,omitempty"`
36+
// Envs represents the environments set to the container.
37+
// +optional
38+
Envs []corev1.EnvVar `json:"envs,omitempty"`
39+
}
40+
41+
type ElasticConfig struct {
42+
// MinReplicas indicates the minimum number of inference workloads based on the traffic.
43+
// Default to nil means we can scale down the instances to 0.
44+
// +optional
45+
MinReplicas *int32 `json:"minReplicas,omitempty"`
46+
// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
47+
// Default to nil means there's no limit for the instance number.
48+
// +optional
49+
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
50+
}

0 commit comments

Comments
 (0)