@@ -37,10 +37,17 @@ EXIT_NEXT_IMAGE_NOT_EXISTS=4
3737EXIT_SONIC_INSTALLER_VERIFY_REBOOT=21
3838EXIT_PLATFORM_FW_AU_FAILURE=22
3939PLATFORM_FWUTIL_AU_REBOOT_HANDLE=" platform_fw_au_reboot_handle"
40+ PLATFORM_JSON_FILE=" platform.json"
41+ PLATFORM_JSON_PATH=" ${DEVPATH} /${PLATFORM} /${PLATFORM_JSON_FILE} "
4042REBOOT_SCRIPT_NAME=$( basename $0 )
4143REBOOT_TYPE=" ${REBOOT_SCRIPT_NAME} "
4244TAG_LATEST=no
4345REBOOT_FLAGS=" "
46+ FORCE_REBOOT=" no"
47+ SMART_SWITCH=" no"
48+ DPU_MODULE_NAME=" "
49+ REBOOT_DPU=" no"
50+ PRE_SHUTDOWN=" no"
4451
4552function debug()
4653{
@@ -128,6 +135,8 @@ function show_help_and_exit()
128135 echo " "
129136 echo " Available options:"
130137 echo " -h, -? : getting this help"
138+ echo " -d : DPU module name on a smart switch, option is invalid when on DPU"
139+ echo " -p : Pre-shutdown steps on DPU, invalid on NPU"
131140
132141 exit ${EXIT_SUCCESS}
133142}
@@ -154,7 +163,7 @@ function reboot_pre_check()
154163 ${DEVPATH} /${PLATFORM} /${PLATFORM_REBOOT_PRE_CHECK}
155164 [[ $? -ne 0 ]] && exit $?
156165 fi
157-
166+
158167 # Verify the next image by sonic-installer
159168 local message=$( sonic-installer verify-next-image 2>&1 )
160169 if [ $? -ne 0 ]; then
@@ -176,9 +185,128 @@ function check_conflict_boot_in_fw_update()
176185 fi
177186}
178187
188+ # Function to retrieve DPU IP from CONFIG_DB
189+ function get_dpu_ip()
190+ {
191+ local DPU_NAME=$1
192+ dpu_ip=$( sonic-db-cli CONFIG_DB HGET " DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME} " " ips@" )
193+ if [ $? -ne 0 ] || [ -z " $dpu_ip " ]; then
194+ echo " Error: Failed to retrieve DPU IP address for ${DPU_NAME} "
195+ exit ${EXIT_ERROR}
196+ fi
197+ debug " $DPU_NAME ip: $dpu_ip "
198+ }
199+
200+ # Function to retrieve GNMI port from CONFIG_DB
201+ function get_gnmi_port() {
202+ local DPU_NAME=$1
203+ port=$( sonic-db-cli CONFIG_DB HGET " DPU_PORT|$DPU_NAME " " gnmi" )
204+ if [ $? -ne 0 ] || [ -z " $port " ]; then
205+ echo " Error: Failed to retrieve GNMI port"
206+ exit ${EXIT_ERROR}
207+ fi
208+ debug " $DPU_NAME GNMI port:$port "
209+ }
210+
211+ # Function to get reboot status from DPU
212+ function get_reboot_status()
213+ {
214+ local dpu_ip=$1
215+ local port=$2
216+ reboot_status=$( docker exec -i gnmi gnoi_client -target ${dpu_ip} :${port} -logtostderr -insecure -rpc RebootStatus)
217+ if [ $? -ne 0 ] || [ -z " $reboot_status " ]; then
218+ echo " Error: Failed to send reboot status command to DPU ${DPU_NAME} "
219+ exit ${EXIT_ERROR}
220+ fi
221+ debug " $reboot_status "
222+ }
223+
224+ # Function to retrieve DPU bus info from platform JSON
225+ function get_dpu_bus_info() {
226+ local DPU_NAME=$1
227+ DPU_BUS_INFO=$( jq -r --arg DPU_NAME " ${DPU_NAME} " ' .DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' " $PLATFORM_JSON_PATH " )
228+ if [ -z " $DPU_BUS_INFO " ]; then
229+ echo " Error: bus_info not found for DPU ${DPU_NAME} "
230+ exit ${EXIT_ERROR}
231+ fi
232+ debug " $DPU_NAME : $DPU_BUS_INFO "
233+ }
234+
235+ # Function to reboot the platform module
236+ function reboot_platform_module() {
237+ local DPU_NAME=$1
238+ reboot_status=$( python3 -c " import reboot_helper; reboot_helper.reboot_module('${DPU_NAME} ')" )
239+ if [ -z " $reboot_status " ] || [ " $reboot_status " = " false" ]; then
240+ echo " Error: Failed to reboot the platform"
241+ exit ${EXIT_ERROR}
242+ fi
243+ }
244+
245+ function reboot_dpu_module()
246+ {
247+ local DPU_NAME=$1
248+ local DPU_INDEX=${DPU_NAME// [!0-9]/ }
249+
250+ debug " User requested rebooting device ${DPU_NAME} ..."
251+
252+ # Retrieve DPU IP and GNMI port
253+ dpu_ip=$( get_dpu_ip " ${DPU_NAME} " )
254+ port=$( get_gnmi_port " ${DPU_NAME} " )
255+
256+ if [ -z " $dpu_ip " ] || [ -z " $port " ]; then
257+ echo " Error: Failed to retrieve DPU IP or GNMI port for ${DPU_NAME} "
258+ exit ${EXIT_ERROR}
259+ fi
260+
261+ # Issue GNOI client command to reboot the DPU
262+ docker exec -i gnmi gnoi_client -target ${dpu_ip} :${port} -logtostderr -insecure -rpc Reboot -jsonin ' {"method":3}'
263+ if [ $? -ne 0 ]; then
264+ echo " Error: Failed to send reboot command to DPU ${DPU_NAME} "
265+ exit ${EXIT_ERROR}
266+ fi
267+
268+ # Retrieve dpu_halt_services_timeout value using jq
269+ dpu_halt_services_timeout=$( jq -r ' .dpu_halt_services_timeout' " $PLATFORM_JSON_PATH " 2> /dev/null)
270+ if [ $? -ne 0 ]; then
271+ echo " Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH} "
272+ exit ${EXIT_ERROR}
273+ fi
274+
275+ # Poll on reboot status response with a timeout mechanism
276+ poll_interval=5
277+ waited_time=0
278+ while true ; do
279+ reboot_status=$( get_reboot_status " ${dpu_ip} " " ${port} " )
280+ debug " GNOI RebootStatus response ${reboot_status} "
281+ is_reboot_active=$( echo " $reboot_status " | grep " active" | awk ' {print $2}' )
282+ if [ " $is_reboot_active " == " false" ]; then
283+ break
284+ fi
285+
286+ sleep " $poll_interval "
287+ waited_time=$(( waited_time + poll_interval))
288+ if [ $waited_time -ge $dpu_halt_services_timeout ]; then
289+ echo " Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting"
290+ exit ${EXIT_ERROR}
291+ fi
292+ done
293+
294+ # Check if DPU exists and retrieve bus info
295+ DPU_BUS_INFO=$( get_dpu_bus_info " ${DPU_NAME} " )
296+
297+ # Update STATE_DB and handle PCIe removal and rescan
298+ sonic-db-cli state_db set " PCIE_DETACH_INFO|${DPU_NAME} " ' {"dpu_id": "' ${DPU_INDEX} ' ", "dpu_state": "detaching", "bus_info": "' ${DPU_BUS_INFO} ' "}'
299+
300+ echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO} /remove
301+ reboot_platform_module " ${DPU_NAME} "
302+ echo 1 > /sys/bus/pci/rescan
303+
304+ sonic-db-cli state_db del " PCIE_DETACH_INFO|${DPU_NAME} "
305+ }
306+
179307function parse_options()
180308{
181- while getopts " h?vf " opt; do
309+ while getopts " h?vfpd " opt; do
182310 case ${opt} in
183311 h|\? )
184312 show_help_and_exit
@@ -192,6 +320,13 @@ function parse_options()
192320 f )
193321 REBOOT_FLAGS+=" -f"
194322 ;;
323+ d )
324+ REBOOT_DPU=" yes"
325+ DPU_MODULE_NAME=" $OPTARG "
326+ ;;
327+ p )
328+ PRE_SHUTDOWN=" yes"
329+ ;;
195330 esac
196331 done
197332}
@@ -215,6 +350,56 @@ function linecard_reboot_notify_supervisor()
215350 fi
216351}
217352
353+ # Function to reboot all DPUs in parallel
354+ function reboot_all_dpus() {
355+ local NUM_DPU=$1
356+
357+ for (( i= 0 ; i< "$NUM_DPU "; i++ )) ; do
358+ echo " Rebooting DPU module dpu$i "
359+ reboot_dpu_module " dpu$i " &
360+ done
361+ wait
362+ }
363+
364+ # Function to handle scenarios on smart switch
365+ function handle_smart_switch() {
366+ if [ -f " $PLATFORM_JSON_PATH " ]; then
367+ NUM_DPU=$( jq -r ' .DPUS | length' " $PLATFORM_JSON_PATH " 2> /dev/null)
368+ if [ " $NUM_DPU " -gt 0 ]; then
369+ SMART_SWITCH=" yes"
370+ fi
371+ fi
372+
373+ if [[ " $REBOOT_DPU " == " yes" ]]; then
374+ if [[ " $SMART_SWITCH " == " yes" ]]; then
375+ echo " User requested to reboot the device ${DPU_MODULE_NAME} "
376+ reboot_dpu_module " $DPU_MODULE_NAME "
377+ else
378+ echo " Invalid '-d' option specified for a non-smart switch"
379+ exit ${EXIT_ERROR}
380+ fi
381+ fi
382+
383+ is_dpu=$( python3 -c " import reboot_helper; reboot_helper.is_dpu()" )
384+ debug " Is the platform DPU: $is_dpu "
385+
386+ # Check if system is a DPU and handle -p option accordingly
387+ if [[ " $is_dpu " == " True" && " $PRE_SHUTDOWN " != " yes" ]]; then
388+ echo " Invalid, '-p' option not specified for a DPU"
389+ exit ${EXIT_ERROR}
390+ elif [[ " $is_dpu " != " True" && " $PRE_SHUTDOWN " == " yes" ]]; then
391+ echo " Invalid '-p' option specified for a non-DPU"
392+ exit ${EXIT_ERROR}
393+ fi
394+
395+ if [[ " $SMART_SWITCH " == " yes" ]]; then
396+ # If not a DPU, reboot all DPUs in parallel
397+ if [[ " $is_dpu " != " True" ]]; then
398+ reboot_all_dpus " $NUM_DPU "
399+ fi
400+ fi
401+ }
402+
218403parse_options $@
219404
220405# Exit if not superuser
225410
226411debug " User requested rebooting device ..."
227412
413+ handle_smart_switch
414+
228415check_conflict_boot_in_fw_update
229416
230417setup_reboot_variables
@@ -287,6 +474,11 @@ if [ -x ${WATCHDOG_UTIL} ]; then
287474 ${WATCHDOG_UTIL} arm
288475fi
289476
477+ if [[ " ${PRE_SHUTDOWN} " == " yes" ]]; then
478+ echo " ${DPU_MODULE_NAME} pre-shutdown steps are completed"
479+ exit ${EXIT_SUCCESS}
480+ fi
481+
290482if [ -x ${DEVPATH} /${PLATFORM} /${PLAT_REBOOT} ]; then
291483 VERBOSE=yes debug " Rebooting with platform ${PLATFORM} specific tool ..."
292484 ${DEVPATH} /${PLATFORM} /${PLAT_REBOOT} $@
0 commit comments