Skip to content

Commit

Permalink
feat: enable T2 auto health check (#16825)
Browse files Browse the repository at this point in the history
Description of PR
Enable T2 auto health check and add running container info to return data.

Summary:
Fixes # (issue) Microsoft ADO 30293537

Approach
What is the motivation for this PR?
We want to enable T2 auto health check as we have now supported T2 auto recover.

co-authorized by: [email protected]
  • Loading branch information
cyw233 authored Feb 12, 2025
1 parent 4a19378 commit 4302661
Showing 1 changed file with 5 additions and 8 deletions.
13 changes: 5 additions & 8 deletions .azure-pipelines/testbed_health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,6 @@ def pre_check(self):
if len(ipv4_not_exists_hosts) > 0:
raise HostsUnreachable(self.check_result.errmsg)

# TODO: Refactor the following code to specify a "leader" T2 Testbed and skip the check on "followers"
# Retrieve the basic facts of the DUTs
if self.is_multi_asic:
errmsg = "Not support to perform checks on multi-asic DUT now."
logger.info(errmsg)

raise SkipCurrentTestbed(errmsg)

logger.info("======================= pre_check ends =======================")

def run_check(self):
Expand Down Expand Up @@ -461,6 +453,7 @@ def check_critical_containers_running(self, critical_containers: list = None):
critical_containers = ["syncd", "swss", "bgp"]

failed = False
running_containers_facts_on_hosts = {}

logger.info("======================= check_critical_containers_running starts =======================")

Expand All @@ -479,6 +472,8 @@ def check_critical_containers_running(self, critical_containers: list = None):
running_containers = sonichost.shell(r"docker ps -f 'status=running' --format \{\{.Names\}\}")[
'stdout_lines']

running_containers_facts_on_hosts[hostname] = running_containers

containers_to_check = critical_containers
if self.is_multi_asic:
if (self.is_chassis and
Expand All @@ -505,6 +500,8 @@ def check_critical_containers_running(self, critical_containers: list = None):
# Add errlog to check result errmsg
self.check_result.errmsg.append(errlog)

self.check_result.data["running_containers_facts_on_hosts"] = running_containers_facts_on_hosts

logger.info("======================= check_critical_containers_running ends =======================")

if failed:
Expand Down

0 comments on commit 4302661

Please sign in to comment.