Skip to content

Commit

Permalink
Improve test
Browse files Browse the repository at this point in the history
Signed-off-by: GonzaloSaez <[email protected]>
  • Loading branch information
GonzaloSaez committed Nov 5, 2024
1 parent a3f2e82 commit 804cfd8
Showing 1 changed file with 30 additions and 2 deletions.
32 changes: 30 additions & 2 deletions test/integration/mpi_job_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,37 @@ func testMpiJobWaitWorkers(t *testing.T, startSuspended bool) {
}
s.events.verify(t)

workerPods, err := getPodsForJob(ctx, s.kClient, mpiJob)
// The launcher job should not be created until all workers are ready even when we start in suspended mode.
job, err := getLauncherJobForMPIJob(ctx, s.kClient, mpiJob)
if err != nil {
t.Fatalf("Cannot get worker pods from job: %v", err)
t.Fatalf("Cannot get launcher job from job: %v", err)
}
if job != nil {
t.Fatalf("Launcher is created before workers")
}

if startSuspended {
// Resume the MPIJob so that the test can follow the normal path.
mpiJob.Spec.RunPolicy.Suspend = ptr.To(false)
mpiJob, err = s.mpiClient.KubeflowV2beta1().MPIJobs(mpiJob.Namespace).Update(ctx, mpiJob, metav1.UpdateOptions{})
if err != nil {
t.Fatalf("Error Updating MPIJob: %v", err)
}
}

var workerPods []corev1.Pod
if err = wait.PollUntilContextTimeout(ctx, util.WaitInterval, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
var err error
workerPods, err = getPodsForJob(ctx, s.kClient, mpiJob)
if err != nil {
return false, err
}
if len(workerPods) != 2 {
return false, nil
}
return true, nil
}); err != nil {
t.Errorf("Failed updating scheduler-plugins PodGroup: %v", err)
}

err = updatePodsToPhase(ctx, s.kClient, workerPods, corev1.PodRunning)
Expand Down

0 comments on commit 804cfd8

Please sign in to comment.