Skip to content

Commit

Permalink
Add PVC management (#347)
Browse files Browse the repository at this point in the history
  • Loading branch information
ruivieira authored Oct 29, 2024
1 parent 6bc45a0 commit b76a51d
Show file tree
Hide file tree
Showing 8 changed files with 910 additions and 0 deletions.
31 changes: 31 additions & 0 deletions api/lmes/v1alpha1/lmevaljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,19 @@ func (c *LMEvalContainer) GetResources() *corev1.ResourceRequirements {
return c.Resources
}

type PersistentVolumeClaimManaged struct {
Size string `json:"size,omitempty"`
}

type Outputs struct {
// Use an existing PVC to store the outputs
// +optional
PersistentVolumeClaimName *string `json:"pvcName,omitempty"`
// Create an operator managed PVC
// +optional
PersistentVolumeClaimManaged *PersistentVolumeClaimManaged `json:"pvcManaged,omitempty"`
}

type LMEvalPodSpec struct {
// Extra container data for the lm-eval container
// +optional
Expand Down Expand Up @@ -241,6 +254,24 @@ type LMEvalJobSpec struct {
// Suspend keeps the job but without pods. This is intended to be used by the Kueue integration
// +optional
Suspend bool `json:"suspend,omitempty"`
// Outputs specifies storage for evaluation results
// +optional
Outputs *Outputs `json:"outputs,omitempty"`
}

// HasCustomOutput returns whether an LMEvalJobSpec defines custom outputs or not
func (s *LMEvalJobSpec) HasCustomOutput() bool {
return s.Outputs != nil
}

// HasManagedPVC returns whether the outputs define a managed PVC
func (o *Outputs) HasManagedPVC() bool {
return o.PersistentVolumeClaimManaged != nil
}

// HasExistingPVC returns whether the outputs define an existing PVC
func (o *Outputs) HasExistingPVC() bool {
return o.PersistentVolumeClaimName != nil
}

// LMEvalJobStatus defines the observed state of LMEvalJob
Expand Down
45 changes: 45 additions & 0 deletions api/lmes/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ spec:
numFewShot:
description: Sets the number of few-shot examples to place in context
type: integer
outputs:
description: Outputs specifies storage for evaluation results
properties:
pvcManaged:
description: Create an operator managed PVC
properties:
size:
type: string
type: object
pvcName:
description: Use an existing PVC to store the outputs
type: string
type: object
pod:
description: Specify extra information for the lm-eval job's pod
properties:
Expand Down
1 change: 1 addition & 0 deletions controllers/lmes/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
const (
DriverPath = "/bin/driver"
DestDriverPath = "/opt/app-root/src/bin/driver"
OutputPath = "/opt/app-root/src/output"
PodImageKey = "lmes-pod-image"
DriverImageKey = "lmes-driver-image"
PodCheckingIntervalKey = "lmes-pod-checking-interval"
Expand Down
52 changes: 52 additions & 0 deletions controllers/lmes/lmevaljob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ func (q *syncedMap4Reconciler) remove(key string) {
// +kubebuilder:rbac:groups="",resources=pods/exec,verbs=get;list;watch;create;delete
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;watch;list
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;watch;list
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=list;get;watch
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=list;get;watch;create;update;patch;delete

func (r *LMEvalJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
Expand All @@ -172,6 +174,26 @@ func (r *LMEvalJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return r.handleSuspend(ctx, log, job)
}

// If outputs have been explicitly set
if job.Spec.HasCustomOutput() {
// If managed PVC is set
if job.Spec.Outputs.HasManagedPVC() {
if job.Spec.Outputs.HasExistingPVC() {
log.Info("LMEvalJob has both managed and existing PVCs defined. Existing PVC configuration will be ignored.")
}
err := r.handleManagedPVC(ctx, log, job)
if err != nil {
return ctrl.Result{}, err
}
} else if job.Spec.Outputs.HasExistingPVC() {
err := r.handleExistingPVC(ctx, log, job)
if err != nil {
return ctrl.Result{}, err
}
}
}
log.Info("Continuing after PVC")

// Handle the job based on its state
switch job.Status.State {
case lmesv1alpha1.NewJobState:
Expand Down Expand Up @@ -631,6 +653,15 @@ func createPod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo
},
}

if job.Spec.HasCustomOutput() {
outputPVCMount := corev1.VolumeMount{
Name: "outputs",
MountPath: OutputPath,
}
volumeMounts = append(volumeMounts, outputPVCMount)

}

var volumes = []corev1.Volume{
{
Name: "shared", VolumeSource: corev1.VolumeSource{
Expand All @@ -639,6 +670,27 @@ func createPod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo
},
}

if job.Spec.HasCustomOutput() {

var claimName string
if job.Spec.Outputs.HasManagedPVC() {
claimName = generateManagedPVCName(job)
} else if job.Spec.Outputs.HasExistingPVC() {
claimName = *job.Spec.Outputs.PersistentVolumeClaimName
}

outputPVC := corev1.Volume{
Name: "outputs",
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: claimName,
ReadOnly: false,
},
},
}
volumes = append(volumes, outputPVC)
}

volumes = append(volumes, job.Spec.Pod.GetVolumes()...)
volumeMounts = append(volumeMounts, job.Spec.Pod.GetContainer().GetVolumMounts()...)
labels := getPodLabels(job.Labels, log)
Expand Down
Loading

0 comments on commit b76a51d

Please sign in to comment.