Skip to content

Commit f946fb9

Browse files
committed
rds: Added a verbose option to extract additional debug information
The "verbose" option is added to the drgn RDS helper to extract data that is not required in general, but can help get the exact state of different objects of an RDS connection if needed. Signed-off-by: Anand Khoje <[email protected]>
1 parent ad09667 commit f946fb9

File tree

2 files changed

+256
-27
lines changed

2 files changed

+256
-27
lines changed

drgn_tools/rds.py

Lines changed: 252 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from drgn.helpers.linux import for_each_online_cpu
2929
from drgn.helpers.linux import per_cpu
3030
from drgn.helpers.linux import xa_for_each
31+
from drgn.helpers.linux.bitops import test_bit
32+
from drgn.helpers.linux.cpumask import cpu_online_mask
3133
from drgn.helpers.linux.list import hlist_for_each_entry
3234
from drgn.helpers.linux.list import list_empty
3335
from drgn.helpers.linux.list import list_for_each
@@ -654,6 +656,7 @@ def rds_stats(prog: drgn.Program, fields: Optional[str] = None) -> None:
654656
@redirectable
655657
def rds_conn_info(
656658
prog: drgn.Program,
659+
args: argparse.Namespace,
657660
laddr: Optional[str] = None,
658661
faddr: Optional[str] = None,
659662
tos: Optional[str] = None,
@@ -729,30 +732,132 @@ def rds_conn_info(
729732
if int(conn.c_path.cp_pending_flush):
730733
flags = flags[:3] + "E"
731734

732-
ib_conn_info = rds_get_ib_conn_info(ic)
735+
# Structures for non-up connections
736+
if args.verbose:
737+
with open("conn_structure_dump.txt", "a") as f:
738+
print("## rds_connection ##", file=f)
739+
if conn_state != "RDS_CONN_UP":
740+
print(
741+
f"\n\nConnection : <{conn_laddr}, {conn_faddr}, {conn_tos}>",
742+
file=f,
743+
)
744+
print(
745+
f"==== rds_connection ({hex(conn.value_())}) state={conn_state} ====",
746+
file=f,
747+
)
748+
print("## rds_connection ##", file=f)
749+
try:
750+
print(conn, file=f)
751+
except Exception as e:
752+
print(f"<Error printing rds_connection: {e}>", file=f)
753+
754+
print("\n## rds_conn_path ##", file=f)
755+
try:
756+
cp = conn.c_path
757+
print(cp, file=f)
758+
except Exception as e:
759+
print(f"<Error printing rds_conn_path: {e}>", file=f)
760+
761+
if trans_name == "infiniband" and ic is not None:
762+
print("\n## rds_ib_connection ##", file=f)
763+
try:
764+
print(ic, file=f)
765+
except Exception as e:
766+
print(
767+
f"<Error printing rds_ib_connection: {e}>",
768+
file=f,
769+
)
770+
771+
# RDMA cm id
772+
i_cm_id = ic.member_("i_cm_id")
773+
print("\n## rdma_cm_id (i_cm_id) ##", file=f)
774+
try:
775+
print(i_cm_id, file=f)
776+
except Exception as e:
777+
print(f"<Error printing rdma_cm_id: {e}>", file=f)
778+
779+
# struct rdma_id_private
780+
id_priv = None
781+
if i_cm_id:
782+
try:
783+
id_priv = container_of(
784+
i_cm_id, "struct rdma_id_private", "id"
785+
)
786+
print("\n## rdma_id_private ##", file=f)
787+
print(id_priv)
788+
except Exception as e:
789+
print(
790+
f"<Error printing rdma_id_private: {e}>",
791+
file=f,
792+
)
793+
else:
794+
print("\n## rdma_id_private ##\n<None>", file=f)
795+
796+
# ib_cm_id
797+
if id_priv:
798+
try:
799+
ib_cm_id = cast(
800+
"struct ib_cm_id *", id_priv.cm_id.ib
801+
)
802+
print("\n## ib_cm_id ##", file=f)
803+
print(ib_cm_id, file=f)
804+
except Exception as e:
805+
print(
806+
f"<Error printing ib_cm_id: {e}>", file=f
807+
)
808+
else:
809+
print("\n## ib_cm_id ##\n<None>", file=f)
810+
811+
# ibqp
812+
ibqp = (
813+
getattr(i_cm_id, "qp", None) if i_cm_id else None
814+
)
815+
print("\n## ibqp ##", file=f)
816+
try:
817+
print(ibqp, file=f)
818+
except Exception as e:
819+
print(f"<Error printing ibqp: {e}>", file=f)
820+
821+
# mlx5_ib_qp
822+
if ibqp:
823+
try:
824+
mlx5_ib_qp = container_of(
825+
ibqp, "struct mlx5_ib_qp", "ibqp"
826+
)
827+
print("\n## mlx5_ib_qp ##", file=f)
828+
print(mlx5_ib_qp, file=f)
829+
except Exception as e:
830+
print(
831+
f"<Error printing mlx5_ib_qp: {e}>", file=f
832+
)
833+
else:
834+
print("\n## mlx5_ib_qp ##\n<None>", file=f)
835+
print("\n" + "=" * 60 + "\n", file=f)
836+
837+
ib_conn_info = rds_get_ib_conn_info(ic)
733838

734-
index += 1
735-
ib_conn_list.append(ic)
736-
conn_list.append(
737-
[
738-
index,
739-
conn_val,
740-
ib_conn,
741-
conn_path,
742-
conn_tos,
743-
conn_laddr,
744-
conn_faddr,
745-
conn_state,
746-
conn_next_tx,
747-
conn_next_rx,
748-
flags,
749-
conn_time,
750-
hex(ib_conn_info.i_cm_id_val),
751-
ib_conn_info.rdma_cm_state_val,
752-
hex(ib_conn_info.ib_cm_id_val),
753-
ib_conn_info.ib_cm_state_val,
754-
]
755-
)
839+
index += 1
840+
ib_conn_list.append(ic)
841+
conn_list.append(
842+
[
843+
index,
844+
conn_val,
845+
ib_conn,
846+
conn_path,
847+
conn_tos,
848+
conn_laddr,
849+
conn_faddr,
850+
conn_state,
851+
conn_next_tx,
852+
conn_next_rx,
853+
flags,
854+
conn_time,
855+
hex(ib_conn_info.i_cm_id_val),
856+
ib_conn_info.rdma_cm_state_val,
857+
hex(ib_conn_info.ib_cm_id_val),
858+
ib_conn_info.ib_cm_state_val,
859+
]
860+
)
756861

757862
print_table(conn_list)
758863

@@ -1629,8 +1734,115 @@ def rds_get_mr_list_info(
16291734
)
16301735

16311736

1737+
def is_cpu_online(prog: drgn.Program, cpu: int) -> str:
1738+
if test_bit(cpu, cpu_online_mask(prog).bits):
1739+
return "on"
1740+
else:
1741+
return "off"
1742+
1743+
16321744
@redirectable
1633-
def report(prog: drgn.Program) -> None:
1745+
def rds_conn_cpu_info(
1746+
prog: drgn.Program,
1747+
laddr: Optional[str] = None,
1748+
faddr: Optional[str] = None,
1749+
state: Optional[str] = None,
1750+
tos: Optional[str] = None,
1751+
) -> None:
1752+
"""
1753+
Display all RDS connections
1754+
1755+
:param prog: drgn program
1756+
:param laddr: comma separated string list of LOCAL-IP. Ex: '192.168.X.X, 10.211.X.X, ...'
1757+
:param faddr: comma separated string list of REMOTE-IP. Ex: '192.168.X.X, 10.211.X.X, ...'
1758+
:param tos: comma separated string list of TOS. Ex: '0, 3, ...'
1759+
:param state: comma separated string list of conn states. Ex 'RDS_CONN_UP, CONNECTING, ...'
1760+
:returns: None
1761+
"""
1762+
msg = ensure_debuginfo(prog, ["rds"])
1763+
if msg:
1764+
print(msg)
1765+
return None
1766+
1767+
index = -1
1768+
conn_list: List[List[Any]] = [
1769+
[
1770+
"", # index
1771+
"rds_conn",
1772+
"ib_conn",
1773+
"Conn Path",
1774+
"ToS",
1775+
"Local Addr",
1776+
"Remote Addr",
1777+
"State",
1778+
"preferred_send_cpu [state]",
1779+
"preferred_recv_cpu [state]",
1780+
"preferred_recv_sibling_cpu [state]",
1781+
]
1782+
]
1783+
for conn in for_each_rds_conn(prog, laddr, faddr, tos, state):
1784+
conn_val = hex(conn.value_())
1785+
trans_name = "".join(re.findall('"([^"]*)"', str(conn.c_trans.t_name)))
1786+
if trans_name == "infiniband":
1787+
ic: Any = cast(
1788+
"struct rds_ib_connection *", conn.c_path.cp_transport_data
1789+
)
1790+
ib_conn = hex(ic.value_())
1791+
else:
1792+
ic = None
1793+
ib_conn = "N/A"
1794+
conn_tos = int(conn.c_tos)
1795+
conn_path = hex(conn.c_path.value_())
1796+
conn_laddr = rds_inet_ntoa(conn.c_laddr)
1797+
conn_faddr = rds_inet_ntoa(conn.c_faddr)
1798+
conn_state = rds_conn_path_state(conn)
1799+
preferred_send_cpu = str(int(ic.i_preferred_send_cpu))
1800+
preferred_send_cpu = (
1801+
preferred_send_cpu
1802+
+ " ["
1803+
+ is_cpu_online(prog, ic.i_preferred_send_cpu)
1804+
+ "]"
1805+
)
1806+
preferred_recv_cpu = str(int(ic.i_preferred_recv_cpu))
1807+
preferred_recv_cpu = (
1808+
preferred_recv_cpu
1809+
+ " ["
1810+
+ is_cpu_online(prog, ic.i_preferred_recv_cpu)
1811+
+ "]"
1812+
)
1813+
try:
1814+
preferred_recv_sibling = str(int(ic.i_preferred_recv_sibling))
1815+
preferred_recv_sibling = (
1816+
preferred_recv_sibling
1817+
+ " ["
1818+
+ is_cpu_online(prog, ic.i_preferred_recv_sibling)
1819+
+ "]"
1820+
)
1821+
except Exception:
1822+
preferred_recv_sibling = "NA"
1823+
1824+
index += 1
1825+
conn_list.append(
1826+
[
1827+
index,
1828+
conn_val,
1829+
ib_conn,
1830+
conn_path,
1831+
conn_tos,
1832+
conn_laddr,
1833+
conn_faddr,
1834+
conn_state,
1835+
preferred_send_cpu,
1836+
preferred_recv_cpu,
1837+
preferred_recv_sibling,
1838+
]
1839+
)
1840+
print("RDS connections CPU information")
1841+
print_table(conn_list)
1842+
1843+
1844+
@redirectable
1845+
def report(prog: drgn.Program, args: argparse.Namespace) -> None:
16341846
"""
16351847
Generate a report of RDS related data.
16361848
This functions runs all the functions in the module and saves the results to the output file provided.
@@ -1646,10 +1858,11 @@ def report(prog: drgn.Program) -> None:
16461858
# rds_dev_info(prog)
16471859
# rdma_resource_usage(prog)
16481860
rds_sock_info(prog)
1649-
rds_conn_info(prog)
1861+
rds_conn_info(prog, args)
16501862
rds_info_verbose(prog)
16511863
rds_conn_cq_eq_info(prog)
16521864
rds_stats(prog)
1865+
rds_conn_cpu_info(prog)
16531866
rds_print_msg_queue(prog, queue="All")
16541867

16551868

@@ -1662,6 +1875,19 @@ class Rds(CorelensModule):
16621875
# We access information from the following modules #
16631876
debuginfo_kmods = ["mlx5_core", "mlx4_core", "mlx5_ib", "mlx4_ib"]
16641877

1878+
default_args = [
1879+
[
1880+
"--verbose",
1881+
]
1882+
]
1883+
1884+
def add_args(self, parser: argparse.ArgumentParser) -> None:
1885+
parser.add_argument(
1886+
"--verbose",
1887+
action="store_true",
1888+
help="Print additional debug information",
1889+
)
1890+
16651891
def run(self, prog: Program, args: argparse.Namespace) -> None:
1666-
report(prog)
1892+
report(prog, args)
16671893
rds_ib_conn_ring_info(prog, 0xDEADBEEF)

tests/test_rds.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# Copyright (c) 2023, Oracle and/or its affiliates.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
import argparse
4+
35
from drgn_tools import rds
46

57

68
def test_run_rds(prog):
7-
rds.report(prog)
9+
args = argparse.Namespace(verbose=True)
10+
rds.report(prog, args)
811
rds.rds_ib_conn_ring_info(prog, 0xDEADBEEF)

0 commit comments

Comments
 (0)