Skip to content

Commit 3f0e419

Browse files
committed
Add webhook to Model
Signed-off-by: kerthcet <[email protected]>
1 parent 9add086 commit 3f0e419

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1051
-2084
lines changed

Dockerfile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
ARG BASE_IMAGE
2+
ARG BUILDER_IMAGE
3+
14
# Build the manager binary
2-
FROM golang:1.20 as builder
5+
FROM ${BUILDER_IMAGE} as builder
36
ARG TARGETOS
47
ARG TARGETARCH
58

@@ -14,7 +17,7 @@ RUN go mod download
1417
# Copy the go source
1518
COPY cmd/main.go cmd/main.go
1619
COPY api/ api/
17-
COPY internal/controller/ internal/controller/
20+
COPY internal/ internal/
1821

1922
# Build
2023
# the GOARCH has not a default value to allow the binary be built according to the host where the command
@@ -25,7 +28,7 @@ RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o ma
2528

2629
# Use distroless as minimal base image to package the manager binary
2730
# Refer to https://github.com/GoogleContainerTools/distroless for more details
28-
FROM gcr.io/distroless/static:nonroot
31+
FROM ${BASE_IMAGE}
2932
WORKDIR /
3033
COPY --from=builder /workspace/manager .
3134
USER 65532:65532

Makefile

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11

2-
# Image URL to use all building/pushing image targets
3-
IMG ?= controller:latest
42
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
5-
ENVTEST_K8S_VERSION = 1.28.0
3+
ENVTEST_K8S_VERSION = 1.28.3
64

75
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
86
ifeq (,$(shell go env GOBIN))
@@ -42,11 +40,39 @@ all: build
4240
help: ## Display this help.
4341
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
4442

43+
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
44+
ARTIFACTS ?= $(PROJECT_DIR)/bin
45+
GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2)
46+
GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1)
47+
48+
GINKGO = $(shell pwd)/bin/ginkgo
49+
.PHONY: ginkgo
50+
ginkgo: ## Download ginkgo locally if necessary.
51+
test -s $(LOCALBIN)/ginkgo || \
52+
GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@$(GINKGO_VERSION)
53+
54+
INTEGRATION_TARGET ?= ./test/integration/...
55+
56+
BASE_IMAGE ?= gcr.io/distroless/static:nonroot
57+
DOCKER_BUILDX_CMD ?= docker buildx
58+
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
59+
IMAGE_BUILD_EXTRA_OPTS ?=
60+
IMAGE_REGISTRY ?= docker.io/inftyai
61+
IMAGE_NAME ?= llmaz
62+
IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME)
63+
GIT_TAG ?= $(shell git describe --tags --dirty --always)
64+
IMG ?= $(IMAGE_REPO):$(GIT_TAG)
65+
BUILDER_IMAGE ?= golang:$(GO_VERSION)
66+
4567
##@ Development
4668

4769
.PHONY: manifests
4870
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
49-
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
71+
$(CONTROLLER_GEN) \
72+
rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \
73+
crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \
74+
webhook output:webhook:artifacts:config=config/webhook \
75+
paths="./..."
5076

5177
.PHONY: generate
5278
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
@@ -90,34 +116,6 @@ build: manifests generate fmt vet ## Build manager binary.
90116
run: manifests generate fmt vet ## Run a controller from your host.
91117
go run ./cmd/main.go
92118

93-
# If you wish to build the manager image targeting other platforms you can use the --platform flag.
94-
# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
95-
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
96-
.PHONY: docker-build
97-
docker-build: ## Build docker image with the manager.
98-
$(CONTAINER_TOOL) build -t ${IMG} .
99-
100-
.PHONY: docker-push
101-
docker-push: ## Push docker image with the manager.
102-
$(CONTAINER_TOOL) push ${IMG}
103-
104-
# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
105-
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
106-
# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
107-
# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
108-
# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail)
109-
# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
110-
PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
111-
.PHONY: docker-buildx
112-
docker-buildx: ## Build and push docker image for the manager for cross-platform support
113-
# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
114-
sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
115-
- $(CONTAINER_TOOL) buildx create --name project-v3-builder
116-
$(CONTAINER_TOOL) buildx use project-v3-builder
117-
- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross .
118-
- $(CONTAINER_TOOL) buildx rm project-v3-builder
119-
rm Dockerfile.cross
120-
121119
##@ Deployment
122120

123121
ifndef ignore-not-found
@@ -135,7 +133,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified
135133
.PHONY: deploy
136134
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
137135
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
138-
$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -
136+
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -
139137

140138
.PHONY: undeploy
141139
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
@@ -177,3 +175,21 @@ $(CONTROLLER_GEN): $(LOCALBIN)
177175
envtest: $(ENVTEST) ## Download envtest-setup locally if necessary.
178176
$(ENVTEST): $(LOCALBIN)
179177
test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest
178+
179+
.PHONY: image-build
180+
image-build:
181+
$(IMAGE_BUILD_CMD) -t $(IMG) \
182+
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
183+
--build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \
184+
--build-arg CGO_ENABLED=$(CGO_ENABLED) \
185+
$(PUSH) \
186+
$(IMAGE_BUILD_EXTRA_OPTS) ./
187+
188+
.PHONY: image-push
189+
image-push: PUSH=--push
190+
image-push: image-build
191+
192+
.PHONY: test-integration
193+
test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
194+
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
195+
$(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)

PROJECT

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,8 @@ resources:
3535
kind: Model
3636
path: inftyai.com/llmaz/api/v1alpha1
3737
version: v1alpha1
38+
webhooks:
39+
defaulting: true
40+
validation: true
41+
webhookVersion: v1
3842
version: "3"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
[GoReport Widget]: https://goreportcard.com/badge/github.com/inftyai/llmaz
88
[GoReport Status]: https://goreportcard.com/report/github.com/inftyai/llmaz
99

10-
llmaz, pronounced as `/lima:z/`, aims to provide a production-ready inference platform for various LLMs on Kubernetes. It tightly integrates with state-of-the-art inference backends, such as [vLLM](https://github.com/vllm-project/vllm).
10+
llmaz, pronounced as `/lima:z/`, aims to provide a production-ready inference platform for large language models on Kubernetes. It tightly integrates with state-of-the-art inference backends, such as [vLLM](https://github.com/vllm-project/vllm).
1111

1212
## Concept
1313

1414
![image](./docs/assets/overview.png)
1515

16-
## Feature Overview
16+
## Features Overview
1717

1818
- **Easy to use**: People can deploy a production-ready LLM service with minimal configurations.
1919
- **High performance**: llmaz integrates with vLLM by default for high performance inference. Other backend supports are on the way.

api/inference/v1alpha1/playground_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type PlaygroundSpec struct {
3838
// technology called splitwise, the workload template is shared by both.
3939
// ModelClaim and multiModelsClaims are exclusive configured.
4040
// +kubebuilder:validation:MinItems=1
41-
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims"`
41+
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
4242
// BackendConfig represents the inference backend configuration
4343
// under the hood, e.g. vLLM, which is the default backend.
4444
// +optional

api/inference/v1alpha1/service_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ type ServiceSpec struct {
3434
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
3535
// will be applied to the workload if not exist.
3636
// +kubebuilder:validation:MinItems=1
37-
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims"`
37+
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
3838
// WorkloadTemplate defines the underlying workload layout and configuration.
3939
// Note: the LWS spec might be twisted to support different technologies
4040
// like splitwise and accelerator fungibility and several LWSs will be created.

api/v1alpha1/model_types.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,35 @@ import (
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2222
)
2323

24+
const (
25+
ModelFamilyNameLabelKey = "llmaz.io/model-family-name"
26+
)
27+
2428
// DataSource represents where to load the model.
2529
// Only one data source will be used.
2630
type DataSource struct {
27-
// URL represents the URL link than contains the data sources.
28-
// +optional
29-
URL *string `json:"url,omitempty"`
30-
// The mounted volume that contains the data.
31-
// +optional
32-
Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
33-
// Image represents the the image address that contains the source data.
34-
// +optional
35-
Image *string `json:"image,omitempty"`
36-
// ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
31+
// ModelID refers to the model identifier on model hub,
32+
// such as meta-llama/Meta-Llama-3-8B.
33+
ModelID *string `json:"modelID,omitempty"`
34+
// ModelHub refers to the model registry, such as huggingface.
35+
// +kubebuilder:default=Huggingface
36+
// +kubebuilder:validation:Enum={Huggingface,ModelScope}
3737
// +optional
38-
ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
38+
ModelHub *string `json:"modelHub,omitempty"`
39+
40+
// TODO: support all these sources.
41+
// // URL represents the URL link than contains the data sources.
42+
// // +optional
43+
// URL *string `json:"url,omitempty"`
44+
// // The mounted volume that contains the data.
45+
// // +optional
46+
// Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
47+
// // Image represents the the image address that contains the source data.
48+
// // +optional
49+
// Image *string `json:"image,omitempty"`
50+
// // ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
51+
// // +optional
52+
// ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
3953
}
4054

4155
type FlavorName string
@@ -134,6 +148,7 @@ type ModelStatus struct {
134148

135149
//+kubebuilder:object:root=true
136150
//+kubebuilder:subresource:status
151+
//+kubebuilder:resource:scope=Cluster
137152

138153
// Model is the Schema for the models API
139154
type Model struct {

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 4 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/main.go

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ import (
3434

3535
inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
3636
llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1"
37+
"inftyai.com/llmaz/internal/cert"
3738
"inftyai.com/llmaz/internal/controller"
3839
inferencecontroller "inftyai.com/llmaz/internal/controller/inference"
40+
"inftyai.com/llmaz/internal/webhook"
3941
//+kubebuilder:scaffold:imports
4042
)
4143

@@ -92,41 +94,68 @@ func main() {
9294
os.Exit(1)
9395
}
9496

95-
if err = (&inferencecontroller.ServiceReconciler{
97+
certsReady := make(chan struct{})
98+
99+
if err = cert.CertsManager(mgr, certsReady); err != nil {
100+
setupLog.Error(err, "unable to setup cert rotation")
101+
os.Exit(1)
102+
}
103+
104+
// Cert won't be ready until manager starts, so start a goroutine here which
105+
// will block until the cert is ready before setting up the controllers.
106+
// Controllers who register after manager starts will start directly.
107+
go setupControllers(mgr, certsReady)
108+
109+
//+kubebuilder:scaffold:builder
110+
111+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
112+
setupLog.Error(err, "unable to set up health check")
113+
os.Exit(1)
114+
}
115+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
116+
setupLog.Error(err, "unable to set up ready check")
117+
os.Exit(1)
118+
}
119+
120+
setupLog.Info("starting manager")
121+
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
122+
setupLog.Error(err, "problem running manager")
123+
os.Exit(1)
124+
}
125+
}
126+
127+
func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
128+
// The controllers won't work until the webhooks are operating,
129+
// and the webhook won't work until the certs are all in places.
130+
setupLog.Info("waiting for the cert generation to complete")
131+
<-certsReady
132+
setupLog.Info("certs ready")
133+
134+
if err := (&inferencecontroller.ServiceReconciler{
96135
Client: mgr.GetClient(),
97136
Scheme: mgr.GetScheme(),
98137
}).SetupWithManager(mgr); err != nil {
99138
setupLog.Error(err, "unable to create controller", "controller", "Service")
100139
os.Exit(1)
101140
}
102-
if err = (&inferencecontroller.PlaygroundReconciler{
141+
if err := (&inferencecontroller.PlaygroundReconciler{
103142
Client: mgr.GetClient(),
104143
Scheme: mgr.GetScheme(),
105144
}).SetupWithManager(mgr); err != nil {
106145
setupLog.Error(err, "unable to create controller", "controller", "Playground")
107146
os.Exit(1)
108147
}
109-
if err = (&controller.ModelReconciler{
148+
if err := (&controller.ModelReconciler{
110149
Client: mgr.GetClient(),
111150
Scheme: mgr.GetScheme(),
112151
}).SetupWithManager(mgr); err != nil {
113152
setupLog.Error(err, "unable to create controller", "controller", "Model")
114153
os.Exit(1)
115154
}
116-
//+kubebuilder:scaffold:builder
117-
118-
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
119-
setupLog.Error(err, "unable to set up health check")
120-
os.Exit(1)
121-
}
122-
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
123-
setupLog.Error(err, "unable to set up ready check")
124-
os.Exit(1)
125-
}
126-
127-
setupLog.Info("starting manager")
128-
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
129-
setupLog.Error(err, "problem running manager")
130-
os.Exit(1)
155+
if os.Getenv("ENABLE_WEBHOOKS") != "false" {
156+
if err := webhook.SetupModelWebhook(mgr); err != nil {
157+
setupLog.Error(err, "unable to create webhook", "webhook", "Model")
158+
os.Exit(1)
159+
}
131160
}
132161
}

0 commit comments

Comments
 (0)