Skip to content

Commit 775647d

Browse files
committed
rds: Added a verbose option to extract additional debug information
The "verbose" option is added to the drgn RDS helper to extract data that is not required in general, but can help get the exact state of different objects of an RDS connection if needed. Signed-off-by: Anand Khoje <[email protected]> Signed-off-by: Pradyumn Rahar <[email protected]>
1 parent ad09667 commit 775647d

File tree

2 files changed

+230
-4
lines changed

2 files changed

+230
-4
lines changed

drgn_tools/rds.py

Lines changed: 228 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from drgn.helpers.linux import for_each_online_cpu
2929
from drgn.helpers.linux import per_cpu
3030
from drgn.helpers.linux import xa_for_each
31+
from drgn.helpers.linux.bitops import test_bit
32+
from drgn.helpers.linux.cpumask import cpu_online_mask
3133
from drgn.helpers.linux.list import hlist_for_each_entry
3234
from drgn.helpers.linux.list import list_empty
3335
from drgn.helpers.linux.list import list_for_each
@@ -654,6 +656,7 @@ def rds_stats(prog: drgn.Program, fields: Optional[str] = None) -> None:
654656
@redirectable
655657
def rds_conn_info(
656658
prog: drgn.Program,
659+
verbose: bool = False,
657660
laddr: Optional[str] = None,
658661
faddr: Optional[str] = None,
659662
tos: Optional[str] = None,
@@ -729,6 +732,108 @@ def rds_conn_info(
729732
if int(conn.c_path.cp_pending_flush):
730733
flags = flags[:3] + "E"
731734

735+
# Structures for non-up connections
736+
if verbose:
737+
with open("conn_structure_dump.txt", "a") as f:
738+
print("## rds_connection ##", file=f)
739+
if conn_state != "RDS_CONN_UP":
740+
print(
741+
f"\n\nConnection : <{conn_laddr}, {conn_faddr}, {conn_tos}>",
742+
file=f,
743+
)
744+
print(
745+
f"==== rds_connection ({hex(conn.value_())}) state={conn_state} ====",
746+
file=f,
747+
)
748+
print("## rds_connection ##", file=f)
749+
try:
750+
print(conn, file=f)
751+
except Exception as e:
752+
print(f"<Error printing rds_connection: {e}>", file=f)
753+
754+
print("\n## rds_conn_path ##", file=f)
755+
try:
756+
cp = conn.c_path
757+
print(cp, file=f)
758+
except Exception as e:
759+
print(f"<Error printing rds_conn_path: {e}>", file=f)
760+
761+
if trans_name == "infiniband" and ic is not None:
762+
print("\n## rds_ib_connection ##", file=f)
763+
try:
764+
print(ic, file=f)
765+
except Exception as e:
766+
print(
767+
f"<Error printing rds_ib_connection: {e}>",
768+
file=f,
769+
)
770+
771+
# RDMA cm id
772+
i_cm_id = ic.member_("i_cm_id")
773+
print("\n## rdma_cm_id (i_cm_id) ##", file=f)
774+
try:
775+
print(i_cm_id, file=f)
776+
except Exception as e:
777+
print(f"<Error printing rdma_cm_id: {e}>", file=f)
778+
779+
# struct rdma_id_private
780+
id_priv = None
781+
if i_cm_id:
782+
try:
783+
id_priv = container_of(
784+
i_cm_id, "struct rdma_id_private", "id"
785+
)
786+
print("\n## rdma_id_private ##", file=f)
787+
print(id_priv, file=f)
788+
except Exception as e:
789+
print(
790+
f"<Error printing rdma_id_private: {e}>",
791+
file=f,
792+
)
793+
else:
794+
print("\n## rdma_id_private ##\n<None>", file=f)
795+
796+
# ib_cm_id
797+
if id_priv:
798+
try:
799+
ib_cm_id = cast(
800+
"struct ib_cm_id *", id_priv.cm_id.ib
801+
)
802+
print("\n## ib_cm_id ##", file=f)
803+
print(ib_cm_id, file=f)
804+
except Exception as e:
805+
print(
806+
f"<Error printing ib_cm_id: {e}>", file=f
807+
)
808+
else:
809+
print("\n## ib_cm_id ##\n<None>", file=f)
810+
811+
# ibqp
812+
ibqp = (
813+
getattr(i_cm_id, "qp", None) if i_cm_id else None
814+
)
815+
print("\n## ibqp ##", file=f)
816+
try:
817+
print(ibqp, file=f)
818+
except Exception as e:
819+
print(f"<Error printing ibqp: {e}>", file=f)
820+
821+
# mlx5_ib_qp
822+
if ibqp:
823+
try:
824+
mlx5_ib_qp = container_of(
825+
ibqp, "struct mlx5_ib_qp", "ibqp"
826+
)
827+
print("\n## mlx5_ib_qp ##", file=f)
828+
print(mlx5_ib_qp, file=f)
829+
except Exception as e:
830+
print(
831+
f"<Error printing mlx5_ib_qp: {e}>", file=f
832+
)
833+
else:
834+
print("\n## mlx5_ib_qp ##\n<None>", file=f)
835+
print("\n" + "=" * 60 + "\n", file=f)
836+
732837
ib_conn_info = rds_get_ib_conn_info(ic)
733838

734839
index += 1
@@ -1629,8 +1734,114 @@ def rds_get_mr_list_info(
16291734
)
16301735

16311736

1737+
def cpu_online_state(prog: drgn.Program, cpu: int) -> str:
1738+
if test_bit(cpu, cpu_online_mask(prog).bits):
1739+
return "on"
1740+
else:
1741+
return "off"
1742+
1743+
16321744
@redirectable
1633-
def report(prog: drgn.Program) -> None:
1745+
def rds_conn_cpu_info(
1746+
prog: drgn.Program,
1747+
laddr: Optional[str] = None,
1748+
faddr: Optional[str] = None,
1749+
state: Optional[str] = None,
1750+
tos: Optional[str] = None,
1751+
) -> None:
1752+
"""
1753+
Display all RDS connections
1754+
1755+
:param prog: drgn program
1756+
:param laddr: comma separated string list of LOCAL-IP. Ex: '192.168.X.X, 10.211.X.X, ...'
1757+
:param faddr: comma separated string list of REMOTE-IP. Ex: '192.168.X.X, 10.211.X.X, ...'
1758+
:param tos: comma separated string list of TOS. Ex: '0, 3, ...'
1759+
:param state: comma separated string list of conn states. Ex 'RDS_CONN_UP, CONNECTING, ...'
1760+
:returns: None
1761+
"""
1762+
msg = ensure_debuginfo(prog, ["rds"])
1763+
if msg:
1764+
print(msg)
1765+
return None
1766+
1767+
index = -1
1768+
conn_list = Table(
1769+
[
1770+
" ", # index
1771+
"rds_conn",
1772+
"ib_conn",
1773+
"Conn Path",
1774+
"ToS",
1775+
"Local Addr",
1776+
"Remote Addr",
1777+
"State",
1778+
"preferred_send_cpu [state]",
1779+
"preferred_recv_cpu [state]",
1780+
"preferred_recv_sibling_cpu [state]",
1781+
]
1782+
)
1783+
1784+
for conn in for_each_rds_conn(prog, laddr, faddr, tos, state):
1785+
conn_val = hex(conn.value_())
1786+
trans_name = "".join(re.findall('"([^"]*)"', str(conn.c_trans.t_name)))
1787+
if trans_name == "infiniband":
1788+
ic: Any = cast(
1789+
"struct rds_ib_connection *", conn.c_path.cp_transport_data
1790+
)
1791+
ib_conn = hex(ic.value_())
1792+
else:
1793+
ic = None
1794+
ib_conn = "N/A"
1795+
conn_tos = int(conn.c_tos)
1796+
conn_path = hex(conn.c_path.value_())
1797+
conn_laddr = rds_inet_ntoa(conn.c_laddr)
1798+
conn_faddr = rds_inet_ntoa(conn.c_faddr)
1799+
conn_state = rds_conn_path_state(conn)
1800+
preferred_send_cpu = str(int(ic.i_preferred_send_cpu))
1801+
preferred_send_cpu = (
1802+
preferred_send_cpu
1803+
+ " ["
1804+
+ cpu_online_state(prog, ic.i_preferred_send_cpu)
1805+
+ "]"
1806+
)
1807+
preferred_recv_cpu = str(int(ic.i_preferred_recv_cpu))
1808+
preferred_recv_cpu = (
1809+
preferred_recv_cpu
1810+
+ " ["
1811+
+ cpu_online_state(prog, ic.i_preferred_recv_cpu)
1812+
+ "]"
1813+
)
1814+
try:
1815+
preferred_recv_sibling = str(int(ic.i_preferred_recv_sibling))
1816+
preferred_recv_sibling = (
1817+
preferred_recv_sibling
1818+
+ " ["
1819+
+ cpu_online_state(prog, ic.i_preferred_recv_sibling)
1820+
+ "]"
1821+
)
1822+
except Exception:
1823+
preferred_recv_sibling = "NA"
1824+
1825+
index += 1
1826+
conn_list.row(
1827+
index,
1828+
conn_val,
1829+
ib_conn,
1830+
conn_path,
1831+
conn_tos,
1832+
conn_laddr,
1833+
conn_faddr,
1834+
conn_state,
1835+
preferred_send_cpu,
1836+
preferred_recv_cpu,
1837+
preferred_recv_sibling,
1838+
)
1839+
print("RDS connections CPU information:")
1840+
conn_list.write()
1841+
1842+
1843+
@redirectable
1844+
def report(prog: drgn.Program, verbose: bool = False) -> None:
16341845
"""
16351846
Generate a report of RDS related data.
16361847
This functions runs all the functions in the module and saves the results to the output file provided.
@@ -1646,10 +1857,11 @@ def report(prog: drgn.Program) -> None:
16461857
# rds_dev_info(prog)
16471858
# rdma_resource_usage(prog)
16481859
rds_sock_info(prog)
1649-
rds_conn_info(prog)
1860+
rds_conn_info(prog, verbose)
16501861
rds_info_verbose(prog)
16511862
rds_conn_cq_eq_info(prog)
16521863
rds_stats(prog)
1864+
rds_conn_cpu_info(prog)
16531865
rds_print_msg_queue(prog, queue="All")
16541866

16551867

@@ -1662,6 +1874,19 @@ class Rds(CorelensModule):
16621874
# We access information from the following modules #
16631875
debuginfo_kmods = ["mlx5_core", "mlx4_core", "mlx5_ib", "mlx4_ib"]
16641876

1877+
default_args = [
1878+
[
1879+
"--verbose",
1880+
]
1881+
]
1882+
1883+
def add_args(self, parser: argparse.ArgumentParser) -> None:
1884+
parser.add_argument(
1885+
"--verbose",
1886+
action="store_true",
1887+
help="Print additional debug information",
1888+
)
1889+
16651890
def run(self, prog: Program, args: argparse.Namespace) -> None:
1666-
report(prog)
1891+
report(prog, args.verbose)
16671892
rds_ib_conn_ring_info(prog, 0xDEADBEEF)

tests/test_rds.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44

55

66
def test_run_rds(prog):
7-
rds.report(prog)
7+
verbose = True
8+
rds.report(prog, verbose)
89
rds.rds_ib_conn_ring_info(prog, 0xDEADBEEF)

0 commit comments

Comments
 (0)