2828from drgn .helpers .linux import for_each_online_cpu
2929from drgn .helpers .linux import per_cpu
3030from drgn .helpers .linux import xa_for_each
31+ from drgn .helpers .linux .bitops import test_bit
32+ from drgn .helpers .linux .cpumask import cpu_online_mask
3133from drgn .helpers .linux .list import hlist_for_each_entry
3234from drgn .helpers .linux .list import list_empty
3335from drgn .helpers .linux .list import list_for_each
@@ -654,6 +656,7 @@ def rds_stats(prog: drgn.Program, fields: Optional[str] = None) -> None:
654656@redirectable
655657def rds_conn_info (
656658 prog : drgn .Program ,
659+ verbose : bool = False ,
657660 laddr : Optional [str ] = None ,
658661 faddr : Optional [str ] = None ,
659662 tos : Optional [str ] = None ,
@@ -729,6 +732,108 @@ def rds_conn_info(
729732 if int (conn .c_path .cp_pending_flush ):
730733 flags = flags [:3 ] + "E"
731734
735+ # Structures for non-up connections
736+ if verbose :
737+ with open ("conn_structure_dump.txt" , "a" ) as f :
738+ print ("## rds_connection ##" , file = f )
739+ if conn_state != "RDS_CONN_UP" :
740+ print (
741+ f"\n \n Connection : <{ conn_laddr } , { conn_faddr } , { conn_tos } >" ,
742+ file = f ,
743+ )
744+ print (
745+ f"==== rds_connection ({ hex (conn .value_ ())} ) state={ conn_state } ====" ,
746+ file = f ,
747+ )
748+ print ("## rds_connection ##" , file = f )
749+ try :
750+ print (conn , file = f )
751+ except Exception as e :
752+ print (f"<Error printing rds_connection: { e } >" , file = f )
753+
754+ print ("\n ## rds_conn_path ##" , file = f )
755+ try :
756+ cp = conn .c_path
757+ print (cp , file = f )
758+ except Exception as e :
759+ print (f"<Error printing rds_conn_path: { e } >" , file = f )
760+
761+ if trans_name == "infiniband" and ic is not None :
762+ print ("\n ## rds_ib_connection ##" , file = f )
763+ try :
764+ print (ic , file = f )
765+ except Exception as e :
766+ print (
767+ f"<Error printing rds_ib_connection: { e } >" ,
768+ file = f ,
769+ )
770+
771+ # RDMA cm id
772+ i_cm_id = ic .member_ ("i_cm_id" )
773+ print ("\n ## rdma_cm_id (i_cm_id) ##" , file = f )
774+ try :
775+ print (i_cm_id , file = f )
776+ except Exception as e :
777+ print (f"<Error printing rdma_cm_id: { e } >" , file = f )
778+
779+ # struct rdma_id_private
780+ id_priv = None
781+ if i_cm_id :
782+ try :
783+ id_priv = container_of (
784+ i_cm_id , "struct rdma_id_private" , "id"
785+ )
786+ print ("\n ## rdma_id_private ##" , file = f )
787+ print (id_priv , file = f )
788+ except Exception as e :
789+ print (
790+ f"<Error printing rdma_id_private: { e } >" ,
791+ file = f ,
792+ )
793+ else :
794+ print ("\n ## rdma_id_private ##\n <None>" , file = f )
795+
796+ # ib_cm_id
797+ if id_priv :
798+ try :
799+ ib_cm_id = cast (
800+ "struct ib_cm_id *" , id_priv .cm_id .ib
801+ )
802+ print ("\n ## ib_cm_id ##" , file = f )
803+ print (ib_cm_id , file = f )
804+ except Exception as e :
805+ print (
806+ f"<Error printing ib_cm_id: { e } >" , file = f
807+ )
808+ else :
809+ print ("\n ## ib_cm_id ##\n <None>" , file = f )
810+
811+ # ibqp
812+ ibqp = (
813+ getattr (i_cm_id , "qp" , None ) if i_cm_id else None
814+ )
815+ print ("\n ## ibqp ##" , file = f )
816+ try :
817+ print (ibqp , file = f )
818+ except Exception as e :
819+ print (f"<Error printing ibqp: { e } >" , file = f )
820+
821+ # mlx5_ib_qp
822+ if ibqp :
823+ try :
824+ mlx5_ib_qp = container_of (
825+ ibqp , "struct mlx5_ib_qp" , "ibqp"
826+ )
827+ print ("\n ## mlx5_ib_qp ##" , file = f )
828+ print (mlx5_ib_qp , file = f )
829+ except Exception as e :
830+ print (
831+ f"<Error printing mlx5_ib_qp: { e } >" , file = f
832+ )
833+ else :
834+ print ("\n ## mlx5_ib_qp ##\n <None>" , file = f )
835+ print ("\n " + "=" * 60 + "\n " , file = f )
836+
732837 ib_conn_info = rds_get_ib_conn_info (ic )
733838
734839 index += 1
@@ -1629,8 +1734,114 @@ def rds_get_mr_list_info(
16291734 )
16301735
16311736
1737+ def cpu_online_state (prog : drgn .Program , cpu : int ) -> str :
1738+ if test_bit (cpu , cpu_online_mask (prog ).bits ):
1739+ return "on"
1740+ else :
1741+ return "off"
1742+
1743+
16321744@redirectable
1633- def report (prog : drgn .Program ) -> None :
1745+ def rds_conn_cpu_info (
1746+ prog : drgn .Program ,
1747+ laddr : Optional [str ] = None ,
1748+ faddr : Optional [str ] = None ,
1749+ state : Optional [str ] = None ,
1750+ tos : Optional [str ] = None ,
1751+ ) -> None :
1752+ """
1753+ Display all RDS connections
1754+
1755+ :param prog: drgn program
1756+ :param laddr: comma separated string list of LOCAL-IP. Ex: '192.168.X.X, 10.211.X.X, ...'
1757+ :param faddr: comma separated string list of REMOTE-IP. Ex: '192.168.X.X, 10.211.X.X, ...'
1758+ :param tos: comma separated string list of TOS. Ex: '0, 3, ...'
1759+ :param state: comma separated string list of conn states. Ex 'RDS_CONN_UP, CONNECTING, ...'
1760+ :returns: None
1761+ """
1762+ msg = ensure_debuginfo (prog , ["rds" ])
1763+ if msg :
1764+ print (msg )
1765+ return None
1766+
1767+ index = - 1
1768+ conn_list = Table (
1769+ [
1770+ " " , # index
1771+ "rds_conn" ,
1772+ "ib_conn" ,
1773+ "Conn Path" ,
1774+ "ToS" ,
1775+ "Local Addr" ,
1776+ "Remote Addr" ,
1777+ "State" ,
1778+ "preferred_send_cpu [state]" ,
1779+ "preferred_recv_cpu [state]" ,
1780+ "preferred_recv_sibling_cpu [state]" ,
1781+ ]
1782+ )
1783+
1784+ for conn in for_each_rds_conn (prog , laddr , faddr , tos , state ):
1785+ conn_val = hex (conn .value_ ())
1786+ trans_name = "" .join (re .findall ('"([^"]*)"' , str (conn .c_trans .t_name )))
1787+ if trans_name == "infiniband" :
1788+ ic : Any = cast (
1789+ "struct rds_ib_connection *" , conn .c_path .cp_transport_data
1790+ )
1791+ ib_conn = hex (ic .value_ ())
1792+ else :
1793+ ic = None
1794+ ib_conn = "N/A"
1795+ conn_tos = int (conn .c_tos )
1796+ conn_path = hex (conn .c_path .value_ ())
1797+ conn_laddr = rds_inet_ntoa (conn .c_laddr )
1798+ conn_faddr = rds_inet_ntoa (conn .c_faddr )
1799+ conn_state = rds_conn_path_state (conn )
1800+ preferred_send_cpu = str (int (ic .i_preferred_send_cpu ))
1801+ preferred_send_cpu = (
1802+ preferred_send_cpu
1803+ + " ["
1804+ + cpu_online_state (prog , ic .i_preferred_send_cpu )
1805+ + "]"
1806+ )
1807+ preferred_recv_cpu = str (int (ic .i_preferred_recv_cpu ))
1808+ preferred_recv_cpu = (
1809+ preferred_recv_cpu
1810+ + " ["
1811+ + cpu_online_state (prog , ic .i_preferred_recv_cpu )
1812+ + "]"
1813+ )
1814+ try :
1815+ preferred_recv_sibling = str (int (ic .i_preferred_recv_sibling ))
1816+ preferred_recv_sibling = (
1817+ preferred_recv_sibling
1818+ + " ["
1819+ + cpu_online_state (prog , ic .i_preferred_recv_sibling )
1820+ + "]"
1821+ )
1822+ except Exception :
1823+ preferred_recv_sibling = "NA"
1824+
1825+ index += 1
1826+ conn_list .row (
1827+ index ,
1828+ conn_val ,
1829+ ib_conn ,
1830+ conn_path ,
1831+ conn_tos ,
1832+ conn_laddr ,
1833+ conn_faddr ,
1834+ conn_state ,
1835+ preferred_send_cpu ,
1836+ preferred_recv_cpu ,
1837+ preferred_recv_sibling ,
1838+ )
1839+ print ("RDS connections CPU information:" )
1840+ conn_list .write ()
1841+
1842+
1843+ @redirectable
1844+ def report (prog : drgn .Program , verbose : bool = False ) -> None :
16341845 """
16351846 Generate a report of RDS related data.
16361847 This functions runs all the functions in the module and saves the results to the output file provided.
@@ -1646,10 +1857,11 @@ def report(prog: drgn.Program) -> None:
16461857 # rds_dev_info(prog)
16471858 # rdma_resource_usage(prog)
16481859 rds_sock_info (prog )
1649- rds_conn_info (prog )
1860+ rds_conn_info (prog , verbose )
16501861 rds_info_verbose (prog )
16511862 rds_conn_cq_eq_info (prog )
16521863 rds_stats (prog )
1864+ rds_conn_cpu_info (prog )
16531865 rds_print_msg_queue (prog , queue = "All" )
16541866
16551867
@@ -1662,6 +1874,19 @@ class Rds(CorelensModule):
16621874 # We access information from the following modules #
16631875 debuginfo_kmods = ["mlx5_core" , "mlx4_core" , "mlx5_ib" , "mlx4_ib" ]
16641876
1877+ default_args = [
1878+ [
1879+ "--verbose" ,
1880+ ]
1881+ ]
1882+
1883+ def add_args (self , parser : argparse .ArgumentParser ) -> None :
1884+ parser .add_argument (
1885+ "--verbose" ,
1886+ action = "store_true" ,
1887+ help = "Print additional debug information" ,
1888+ )
1889+
16651890 def run (self , prog : Program , args : argparse .Namespace ) -> None :
1666- report (prog )
1891+ report (prog , args . verbose )
16671892 rds_ib_conn_ring_info (prog , 0xDEADBEEF )
0 commit comments