Skip to content

Commit c022221

Browse files
authored
Fix scheduler crash if a 'mig' task running accidentally on a 'hami-core' GPU (#848)
* fix scheduler crash on mig-mode task running on a hami-core gpu Signed-off-by: limengxuan <[email protected]>
1 parent ba5bad5 commit c022221

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

pkg/scheduler/scheduler.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,11 @@ func (s *Scheduler) getNodesUsage(nodes *[]string, task *corev1.Pod) (*map[strin
333333
d.Device.Usedmem += udevice.Usedmem
334334
d.Device.Usedcores += udevice.Usedcores
335335
if strings.Contains(udevice.UUID, "[") {
336+
if strings.Compare(d.Device.Mode, "hami-core") == 0 {
337+
klog.Errorf("found a mig task running on a hami-core GPU\n")
338+
d.Device.Health = false
339+
continue
340+
}
336341
tmpIdx, Instance, _ := util.ExtractMigTemplatesFromUUID(udevice.UUID)
337342
if len(d.Device.MigUsage.UsageList) == 0 {
338343
util.PlatternMIG(&d.Device.MigUsage, d.Device.MigTemplate, tmpIdx)

0 commit comments

Comments
 (0)