Skip to content

Commit 10ed4da

Browse files
committed
Modularize the functions
1 parent bf71b46 commit 10ed4da

File tree

1 file changed

+69
-60
lines changed

1 file changed

+69
-60
lines changed

scripts/reboot

Lines changed: 69 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -184,14 +184,60 @@ function check_conflict_boot_in_fw_update()
184184
fi
185185
}
186186

187+
# Function to retrieve DPU IP from CONFIG_DB
188+
function get_dpu_ip()
189+
{
190+
local DPU_NAME=$1
191+
dpu_ip=$(sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips")
192+
if [ $? -ne 0 ] || [ -z "$dpu_ip" ]; then
193+
echo "Error: Failed to retrieve DPU IP address for ${DPU_NAME}"
194+
exit ${EXIT_ERROR}
195+
fi
196+
debug "$DPU_NAME ip: $dpu_ip"
197+
}
198+
199+
# Function to retrieve GNMI port from CONFIG_DB
200+
function get_gnmi_port() {
201+
port=$(sonic-db-cli CONFIG_DB HGET "GNMI|gnmi" "port")
202+
if [ $? -ne 0 ] || [ -z "$port" ]; then
203+
echo "Error: Failed to retrieve GNMI port"
204+
exit ${EXIT_ERROR}
205+
fi
206+
debug "$DPU_NAME GNMI port:$port"
207+
}
208+
209+
# Function to get reboot status from DPU
187210
function get_reboot_status()
188211
{
212+
local dpu_ip=$1
213+
local port=$2
189214
reboot_status=$(gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc RebootStatus)
190215
if [ $? -ne 0 ] || [ -z "$reboot_status" ]; then
191216
echo "Error: Failed to send reboot status command to DPU ${DPU_NAME}"
192217
exit ${EXIT_ERROR}
193218
fi
194-
echo "$reboot_status"
219+
debug "$reboot_status"
220+
}
221+
222+
# Function to retrieve DPU bus info from platform JSON
223+
function get_dpu_bus_info() {
224+
local DPU_NAME=$1
225+
DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH")
226+
if [ -z "$DPU_BUS_INFO" ]; then
227+
echo "Error: bus_info not found for DPU ${DPU_NAME}"
228+
exit ${EXIT_ERROR}
229+
fi
230+
debug "$DPU_NAME : $DPU_BUS_INFO"
231+
}
232+
233+
# Function to reboot the platform module
234+
function reboot_platform_module() {
235+
local DPU_NAME=$1
236+
reboot_status=$(python3 -c "import reboot_helper; reboot_helper.reboot_module('${DPU_NAME}')")
237+
if [ -z "$reboot_status" ] || [ "$reboot_status" = "false" ]; then
238+
echo "Error: Failed to reboot the platform"
239+
exit ${EXIT_ERROR}
240+
fi
195241
}
196242

197243
function reboot_dpu_module()
@@ -201,20 +247,9 @@ function reboot_dpu_module()
201247

202248
debug "User requested rebooting device ${DPU_NAME} ..."
203249

204-
# Retrieve DPU IP from CONFIG_DB
205-
dpu_ip=$(sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips")
206-
if [ $? -ne 0 ] || [ -z "$dpu_ip" ]; then
207-
echo "Error: Failed to retrieve DPU IP address for ${DPU_NAME}"
208-
exit ${EXIT_ERROR}
209-
fi
210-
211-
# Retrieve GNMI port from CONFIG_DB
212-
port=$(sonic-db-cli CONFIG_DB HGET "GNMI|gnmi" "port")
213-
debug "GNMI port ${port}"
214-
if [ $? -ne 0 ] || [ -z "$port" ]; then
215-
echo "Error: Failed to retrieve GNMI port"
216-
exit ${EXIT_ERROR}
217-
fi
250+
# Retrieve DPU IP and GNMI port
251+
dpu_ip=$(get_dpu_ip "${DPU_NAME}")
252+
port=$(get_gnmi_port)
218253

219254
# Issue GNOI client command to reboot the DPU
220255
gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc Reboot -jsonin '{"method":3}'
@@ -229,60 +264,30 @@ function reboot_dpu_module()
229264
# Poll on reboot status response with a timeout mechanism
230265
poll_interval=5
231266
waited_time=0
232-
233267
while true; do
234-
reboot_status=$(get_reboot_status)
268+
reboot_status=$(get_reboot_status "${dpu_ip}" "${port}")
235269
debug "GNOI RebootStatus response ${reboot_status}"
236270
is_reboot_active=$(echo "$reboot_status" | grep "active" | awk '{print $2}')
237-
238271
if [ "$is_reboot_active" == "false" ]; then
239272
break
240273
fi
241274

242275
sleep "$poll_interval"
243276
waited_time=$((waited_time + poll_interval))
244-
245-
if [ $waited_time -ge $dpu_halt_services_timeout ]; then
277+
if [ $waited_time -ge $dpu_halt_services_timeout ]; then
246278
echo "Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting"
247279
exit ${EXIT_ERROR}
248280
fi
249281
done
250282

251-
# Check if the given DPU_NAME exists in the JSON file
252-
DPU_EXISTS=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME]' "$PLATFORM_JSON_PATH" 2>/dev/null)
253-
254-
if [ -n "$DPU_EXISTS" ]; then
255-
# Retrieve bus_info for the given DPU_NAME
256-
DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH")
257-
258-
if [ -n "$DPU_BUS_INFO" ]; then
259-
debug "DPU: ${DPU_NAME}, Bus Info: ${BUS_INFO}"
260-
else
261-
echo "Error: bus_info not found for DPU ${DPU_NAME}"
262-
exit ${EXIT_ERROR}
263-
fi
264-
else
265-
echo "Error: DPU ${DPU_NAME} not found in platform.json"
266-
exit ${EXIT_ERROR}
267-
fi
283+
# Check if DPU exists and retrieve bus info
284+
DPU_BUS_INFO=$(get_dpu_bus_info "${DPU_NAME}")
268285

269-
# Update STATE_DB with DPU PCIe key
270-
sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "${DPU_INDEX}", "dpu_state": "detaching", "bus_info": ${DPU_BUS_INFO}}'
271-
272-
# Detach the DPU module PCIe
286+
# Update STATE_DB and handle PCIe removal and rescan
287+
sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}'
273288
echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove
274-
275-
# Reboot the DPU via platform vendor API
276-
reboot_status=$(python3 -c "import reboot_helper; reboot_helper.reboot_module('${DPU_NAME}')")
277-
if [ -z "$reboot_status" ] || [ "$reboot_status" = "false" ]; then
278-
echo "Error: Failed to reboot the platform"
279-
exit ${EXIT_ERROR}
280-
fi
281-
282-
# Rescan the PCIe
289+
reboot_platform_module "${DPU_NAME}"
283290
echo 1 > /sys/bus/pci/rescan
284-
285-
# Update STATE_DB to delete DPU PCIe key
286291
sonic-db-cli state_db del "PCIE_DETACH_INFO|${DPU_NAME}"
287292
}
288293

@@ -332,6 +337,17 @@ function linecard_reboot_notify_supervisor()
332337
fi
333338
}
334339

340+
# Function to reboot all DPUs in parallel
341+
function reboot_all_dpus() {
342+
local NUM_DPU=$1
343+
344+
for (( i=0; i<"$NUM_DPU"; i++ )); do
345+
echo "Rebooting DPU module dpu$i"
346+
reboot_dpu_module "dpu$i" &
347+
done
348+
wait
349+
}
350+
335351
parse_options $@
336352

337353
# Exit if not superuser
@@ -342,10 +358,8 @@ fi
342358

343359
debug "User requested rebooting device ..."
344360

345-
# Check for smart switch by parsing platform.json file
346361
if [ -f "$PLATFORM_JSON_PATH" ]; then
347362
NUM_DPU=$(jq -r '.DPUS | length' "$PLATFORM_JSON_PATH" 2>/dev/null)
348-
349363
if [ "$NUM_DPU" -gt 0 ]; then
350364
SMART_SWITCH="yes"
351365
fi
@@ -355,12 +369,7 @@ if [[ "$REBOOT_DPU" == "yes" && "$SMART_SWITCH" == "yes" ]]; then
355369
echo "User requested to reboot the device ${DPU_MODULE_NAME}"
356370
reboot_dpu_module "$DPU_MODULE_NAME"
357371
elif [ "$SMART_SWITCH" == "yes" ]; then
358-
# Loop to iterate over DPUs and invoke reboot_dpu_module in parallel
359-
for (( i=0; i<"$NUM_DPU"; i++ )); do
360-
echo "Rebooting DPU module $i"
361-
reboot_dpu_module "dpu$i" &
362-
done
363-
wait
372+
reboot_all_dpus "$NUM_DPU"
364373
fi
365374

366375
check_conflict_boot_in_fw_update

0 commit comments

Comments
 (0)