Skip to content

Commit

Permalink
Add webhook to Model
Browse files Browse the repository at this point in the history
Signed-off-by: kerthcet <[email protected]>
  • Loading branch information
kerthcet committed Jul 15, 2024
1 parent 9add086 commit 3f0e419
Show file tree
Hide file tree
Showing 42 changed files with 1,051 additions and 2,084 deletions.
9 changes: 6 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
ARG BASE_IMAGE
ARG BUILDER_IMAGE

# Build the manager binary
FROM golang:1.20 as builder
FROM ${BUILDER_IMAGE} as builder
ARG TARGETOS
ARG TARGETARCH

Expand All @@ -14,7 +17,7 @@ RUN go mod download
# Copy the go source
COPY cmd/main.go cmd/main.go
COPY api/ api/
COPY internal/controller/ internal/controller/
COPY internal/ internal/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand All @@ -25,7 +28,7 @@ RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o ma

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot
FROM ${BASE_IMAGE}
WORKDIR /
COPY --from=builder /workspace/manager .
USER 65532:65532
Expand Down
82 changes: 49 additions & 33 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.28.0
ENVTEST_K8S_VERSION = 1.28.3

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
Expand Down Expand Up @@ -42,11 +40,39 @@ all: build
help: ## Display this help.
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
ARTIFACTS ?= $(PROJECT_DIR)/bin
GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2)
GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1)

GINKGO = $(shell pwd)/bin/ginkgo
.PHONY: ginkgo
ginkgo: ## Download ginkgo locally if necessary.
test -s $(LOCALBIN)/ginkgo || \
GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@$(GINKGO_VERSION)

INTEGRATION_TARGET ?= ./test/integration/...

BASE_IMAGE ?= gcr.io/distroless/static:nonroot
DOCKER_BUILDX_CMD ?= docker buildx
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
IMAGE_BUILD_EXTRA_OPTS ?=
IMAGE_REGISTRY ?= docker.io/inftyai
IMAGE_NAME ?= llmaz
IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME)
GIT_TAG ?= $(shell git describe --tags --dirty --always)
IMG ?= $(IMAGE_REPO):$(GIT_TAG)
BUILDER_IMAGE ?= golang:$(GO_VERSION)

##@ Development

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
$(CONTROLLER_GEN) \
rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \
crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \
webhook output:webhook:artifacts:config=config/webhook \
paths="./..."

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
Expand Down Expand Up @@ -90,34 +116,6 @@ build: manifests generate fmt vet ## Build manager binary.
run: manifests generate fmt vet ## Run a controller from your host.
go run ./cmd/main.go

# If you wish to build the manager image targeting other platforms you can use the --platform flag.
# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
.PHONY: docker-build
docker-build: ## Build docker image with the manager.
$(CONTAINER_TOOL) build -t ${IMG} .

.PHONY: docker-push
docker-push: ## Push docker image with the manager.
$(CONTAINER_TOOL) push ${IMG}

# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail)
# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
.PHONY: docker-buildx
docker-buildx: ## Build and push docker image for the manager for cross-platform support
# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
- $(CONTAINER_TOOL) buildx create --name project-v3-builder
$(CONTAINER_TOOL) buildx use project-v3-builder
- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross .
- $(CONTAINER_TOOL) buildx rm project-v3-builder
rm Dockerfile.cross

##@ Deployment

ifndef ignore-not-found
Expand All @@ -135,7 +133,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified
.PHONY: deploy
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -

.PHONY: undeploy
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
Expand Down Expand Up @@ -177,3 +175,21 @@ $(CONTROLLER_GEN): $(LOCALBIN)
envtest: $(ENVTEST) ## Download envtest-setup locally if necessary.
$(ENVTEST): $(LOCALBIN)
test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest

.PHONY: image-build
image-build:
$(IMAGE_BUILD_CMD) -t $(IMG) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \
--build-arg CGO_ENABLED=$(CGO_ENABLED) \
$(PUSH) \
$(IMAGE_BUILD_EXTRA_OPTS) ./

.PHONY: image-push
image-push: PUSH=--push
image-push: image-build

.PHONY: test-integration
test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
$(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)
4 changes: 4 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,8 @@ resources:
kind: Model
path: inftyai.com/llmaz/api/v1alpha1
version: v1alpha1
webhooks:
defaulting: true
validation: true
webhookVersion: v1
version: "3"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
[GoReport Widget]: https://goreportcard.com/badge/github.com/inftyai/llmaz
[GoReport Status]: https://goreportcard.com/report/github.com/inftyai/llmaz

llmaz, pronounced as `/lima:z/`, aims to provide a production-ready inference platform for various LLMs on Kubernetes. It tightly integrates with state-of-the-art inference backends, such as [vLLM](https://github.com/vllm-project/vllm).
llmaz, pronounced as `/lima:z/`, aims to provide a production-ready inference platform for large language models on Kubernetes. It tightly integrates with state-of-the-art inference backends, such as [vLLM](https://github.com/vllm-project/vllm).

## Concept

![image](./docs/assets/overview.png)

## Feature Overview
## Features Overview

- **Easy to use**: People can deploy a production-ready LLM service with minimal configurations.
- **High performance**: llmaz integrates with vLLM by default for high performance inference. Other backend supports are on the way.
Expand Down
2 changes: 1 addition & 1 deletion api/inference/v1alpha1/playground_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type PlaygroundSpec struct {
// technology called splitwise, the workload template is shared by both.
// ModelClaim and multiModelsClaims are exclusive configured.
// +kubebuilder:validation:MinItems=1
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims"`
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
// BackendConfig represents the inference backend configuration
// under the hood, e.g. vLLM, which is the default backend.
// +optional
Expand Down
2 changes: 1 addition & 1 deletion api/inference/v1alpha1/service_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type ServiceSpec struct {
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
// will be applied to the workload if not exist.
// +kubebuilder:validation:MinItems=1
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims"`
MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
// WorkloadTemplate defines the underlying workload layout and configuration.
// Note: the LWS spec might be twisted to support different technologies
// like splitwise and accelerator fungibility and several LWSs will be created.
Expand Down
37 changes: 26 additions & 11 deletions api/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,35 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
ModelFamilyNameLabelKey = "llmaz.io/model-family-name"
)

// DataSource represents where to load the model.
// Only one data source will be used.
type DataSource struct {
// URL represents the URL link than contains the data sources.
// +optional
URL *string `json:"url,omitempty"`
// The mounted volume that contains the data.
// +optional
Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
// Image represents the the image address that contains the source data.
// +optional
Image *string `json:"image,omitempty"`
// ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
// ModelID refers to the model identifier on model hub,
// such as meta-llama/Meta-Llama-3-8B.
ModelID *string `json:"modelID,omitempty"`
// ModelHub refers to the model registry, such as huggingface.
// +kubebuilder:default=Huggingface
// +kubebuilder:validation:Enum={Huggingface,ModelScope}
// +optional
ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
ModelHub *string `json:"modelHub,omitempty"`

// TODO: support all these sources.
// // URL represents the URL link than contains the data sources.
// // +optional
// URL *string `json:"url,omitempty"`
// // The mounted volume that contains the data.
// // +optional
// Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
// // Image represents the the image address that contains the source data.
// // +optional
// Image *string `json:"image,omitempty"`
// // ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
// // +optional
// ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
}

type FlavorName string
Expand Down Expand Up @@ -134,6 +148,7 @@ type ModelStatus struct {

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:resource:scope=Cluster

// Model is the Schema for the models API
type Model struct {
Expand Down
18 changes: 4 additions & 14 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 47 additions & 18 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ import (

inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1"
"inftyai.com/llmaz/internal/cert"
"inftyai.com/llmaz/internal/controller"
inferencecontroller "inftyai.com/llmaz/internal/controller/inference"
"inftyai.com/llmaz/internal/webhook"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -92,41 +94,68 @@ func main() {
os.Exit(1)
}

if err = (&inferencecontroller.ServiceReconciler{
certsReady := make(chan struct{})

if err = cert.CertsManager(mgr, certsReady); err != nil {
setupLog.Error(err, "unable to setup cert rotation")
os.Exit(1)
}

// Cert won't be ready until manager starts, so start a goroutine here which
// will block until the cert is ready before setting up the controllers.
// Controllers who register after manager starts will start directly.
go setupControllers(mgr, certsReady)

//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
}

func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
// The controllers won't work until the webhooks are operating,
// and the webhook won't work until the certs are all in places.
setupLog.Info("waiting for the cert generation to complete")
<-certsReady
setupLog.Info("certs ready")

if err := (&inferencecontroller.ServiceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Service")
os.Exit(1)
}
if err = (&inferencecontroller.PlaygroundReconciler{
if err := (&inferencecontroller.PlaygroundReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Playground")
os.Exit(1)
}
if err = (&controller.ModelReconciler{
if err := (&controller.ModelReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Model")
os.Exit(1)
}
//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
if os.Getenv("ENABLE_WEBHOOKS") != "false" {
if err := webhook.SetupModelWebhook(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "Model")
os.Exit(1)
}
}
}
Loading

0 comments on commit 3f0e419

Please sign in to comment.