Skip to content

Commit e1b9fd1

Browse files
committed
Initial commit
1 parent 0363d89 commit e1b9fd1

File tree

50 files changed

+2749
-632
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2749
-632
lines changed

.github/actions/restore-checkpoint/action.yml

Lines changed: 182 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,24 @@ inputs:
1818
cache-valid:
1919
description: 'Whether checkpoint cache validation passed (true/false)'
2020
required: true
21+
dry-run:
22+
description: 'If true, validate checkpoints without extracting (default: false)'
23+
required: false
24+
default: 'false'
2125

2226
outputs:
2327
restored:
2428
description: 'Whether any checkpoint was restored (true/false)'
25-
value: ${{ steps.restore.outputs.restored }}
29+
value: ${{ steps.restore.outputs.restored || steps.skip.outputs.restored }}
2630
checkpoint-restored:
2731
description: 'Name of the checkpoint that was restored (empty if none)'
28-
value: ${{ steps.restore.outputs.checkpoint_restored }}
32+
value: ${{ steps.restore.outputs.checkpoint_restored || steps.skip.outputs.checkpoint_restored }}
2933
checkpoint-index:
3034
description: 'Index of restored checkpoint (0=newest, higher=older)'
31-
value: ${{ steps.restore.outputs.checkpoint_index }}
35+
value: ${{ steps.restore.outputs.checkpoint_index || steps.skip.outputs.checkpoint_index }}
3236
needs-build:
3337
description: 'Whether build needs to run to complete remaining checkpoints (true/false)'
34-
value: ${{ steps.restore.outputs.needs_build }}
38+
value: ${{ steps.restore.outputs.needs_build || steps.skip.outputs.needs_build }}
3539

3640
runs:
3741
using: 'composite'
@@ -49,19 +53,52 @@ runs:
4953
BUILD_MODE="${{ inputs.build-mode }}"
5054
CHECKPOINT_CHAIN="${{ inputs.checkpoint-chain }}"
5155
56+
# Validate inputs to prevent command injection
57+
# Use [[ =~ ]] instead of echo|grep to prevent newline injection bypass
58+
if ! [[ "${PACKAGE_NAME}" =~ ^[a-zA-Z0-9_-]+$ ]]; then
59+
echo "❌ Invalid package name: ${PACKAGE_NAME}"
60+
echo " Package name must contain only alphanumeric characters, hyphens, and underscores"
61+
exit 1
62+
fi
63+
64+
if ! [[ "${BUILD_MODE}" =~ ^(prod|dev)$ ]]; then
65+
echo "❌ Invalid build mode: ${BUILD_MODE}"
66+
echo " Build mode must be either 'prod' or 'dev'"
67+
exit 1
68+
fi
69+
70+
# Validate checkpoint chain has no empty segments
71+
if [[ "${CHECKPOINT_CHAIN}" =~ (^,|,,|,$) ]]; then
72+
echo "❌ Invalid checkpoint chain: contains empty segments"
73+
exit 1
74+
fi
75+
5276
MODE_CHECKPOINT_DIR="packages/${PACKAGE_NAME}/build/${BUILD_MODE}/checkpoints"
5377
SHARED_CHECKPOINT_DIR="packages/${PACKAGE_NAME}/build/shared/checkpoints"
5478
OUTPUT_DIR="packages/${PACKAGE_NAME}/build/${BUILD_MODE}/out"
79+
SHARED_SOURCE_DIR="packages/${PACKAGE_NAME}/build/shared/source"
80+
MODE_SOURCE_DIR="packages/${PACKAGE_NAME}/build/${BUILD_MODE}/source"
5581
5682
echo "📦 Package: ${PACKAGE_NAME}"
5783
echo "🔧 Build mode: ${BUILD_MODE}"
5884
echo "📁 Mode checkpoint directory: ${MODE_CHECKPOINT_DIR}"
5985
echo "📁 Shared checkpoint directory: ${SHARED_CHECKPOINT_DIR}"
6086
echo "📤 Output directory: ${OUTPUT_DIR}"
87+
echo "📂 Shared source directory: ${SHARED_SOURCE_DIR}"
88+
echo "📂 Mode source directory: ${MODE_SOURCE_DIR}"
6189
echo ""
6290
63-
# Parse checkpoint chain into array (comma-separated)
64-
IFS=',' read -ra CHECKPOINTS <<< "$CHECKPOINT_CHAIN"
91+
# Parse checkpoint chain into array (comma-separated, localize IFS to prevent side effects)
92+
# Use trap to ensure IFS is restored even if script exits unexpectedly
93+
restore_ifs() {
94+
IFS="$OLD_IFS"
95+
}
96+
OLD_IFS="$IFS"
97+
trap restore_ifs EXIT
98+
IFS=','
99+
read -ra CHECKPOINTS <<< "$CHECKPOINT_CHAIN"
100+
IFS="$OLD_IFS"
101+
trap - EXIT
65102
66103
echo "🔗 Checkpoint chain (newest → oldest):"
67104
INDEX=0
@@ -111,18 +148,16 @@ runs:
111148
echo ""
112149
echo "✅ Found valid checkpoint: ${CHECKPOINT} (index ${INDEX})"
113150
break
114-
115-
INDEX=$((INDEX + 1))
116151
done
117152
118153
# Check if we found any checkpoint
119154
if [ -z "${RESTORED_CHECKPOINT}" ]; then
120155
echo ""
121156
echo "❌ No valid checkpoints found in chain"
122157
echo " Available mode checkpoints:"
123-
ls -lh "${MODE_CHECKPOINT_DIR}" 2>/dev/null || echo " (mode checkpoint directory not found)"
158+
find "${MODE_CHECKPOINT_DIR}" -type f -ls 2>/dev/null || echo " (mode checkpoint directory not found)"
124159
echo " Available shared checkpoints:"
125-
ls -lh "${SHARED_CHECKPOINT_DIR}" 2>/dev/null || echo " (shared checkpoint directory not found)"
160+
find "${SHARED_CHECKPOINT_DIR}" -type f -ls 2>/dev/null || echo " (shared checkpoint directory not found)"
126161
echo ""
127162
echo "restored=false" >> $GITHUB_OUTPUT
128163
echo "checkpoint_restored=" >> $GITHUB_OUTPUT
@@ -135,6 +170,24 @@ runs:
135170
echo "📦 Restoring from checkpoint: ${RESTORED_CHECKPOINT}"
136171
echo ""
137172
173+
# Check if this is a dry-run (validation only)
174+
if [ "${{ inputs.dry-run }}" = "true" ]; then
175+
echo "🔍 DRY-RUN MODE: Validation only, skipping extraction"
176+
echo ""
177+
echo "✓ Checkpoint validation passed"
178+
echo " Checkpoint: ${RESTORED_CHECKPOINT}"
179+
echo " Index: ${RESTORED_INDEX}"
180+
echo " File: ${CHECKPOINT_FILE}"
181+
echo ""
182+
echo "ℹ️ In normal mode, this checkpoint would be extracted"
183+
echo ""
184+
echo "restored=true" >> $GITHUB_OUTPUT
185+
echo "checkpoint_restored=${RESTORED_CHECKPOINT}" >> $GITHUB_OUTPUT
186+
echo "checkpoint_index=${RESTORED_INDEX}" >> $GITHUB_OUTPUT
187+
echo "needs_build=$([ ${RESTORED_INDEX} -eq 0 ] && echo "false" || echo "true")" >> $GITHUB_OUTPUT
188+
exit 0
189+
fi
190+
138191
# Show tarball contents
139192
if [ "${RESTORED_CHECKPOINT}" = "source-cloned" ]; then
140193
CHECKPOINT_FILE="${SHARED_CHECKPOINT_DIR}/${RESTORED_CHECKPOINT}.tar.gz"
@@ -149,13 +202,127 @@ runs:
149202
fi
150203
echo ""
151204
152-
# Extract checkpoint
153-
echo "📦 Extracting checkpoint to ${OUTPUT_DIR}..."
154-
mkdir -p "${OUTPUT_DIR}"
155-
tar -xzf "${CHECKPOINT_FILE}" -C "${OUTPUT_DIR}"
205+
# Extract checkpoint to correct directory based on checkpoint name
206+
# Checkpoint structure varies by package:
207+
#
208+
# node-smol-builder:
209+
# - finalized → build/{mode}/out/Final/
210+
# - binary-compressed → build/{mode}/out/Compressed/
211+
# - binary-stripped → build/{mode}/out/Stripped/
212+
# - binary-released → build/{mode}/out/Release/
213+
# - source-cloned → build/shared/source/
214+
# - source-patched → build/{mode}/source/
215+
#
216+
# onnxruntime-builder, yoga-layout-builder:
217+
# - finalized → build/{mode}/out/Final/
218+
# - wasm-synced → build/{mode}/out/Synced/
219+
# - wasm-released → build/{mode}/out/Released/
220+
# - wasm-optimized → build/{mode}/out/Optimized/
221+
# - wasm-compiled → build/{mode}/out/Compiled/
222+
# - source-configured → build/{mode}/source/ (yoga-layout only)
223+
# - source-cloned → build/shared/source/
224+
#
225+
# models:
226+
# - finalized → build/{mode}/out/Final/
227+
# - quantized → build/{mode}/models/
228+
# - converted → build/{mode}/models/
229+
# - downloaded → build/{mode}/models/
230+
EXTRACT_DIR="${OUTPUT_DIR}"
231+
case "${RESTORED_CHECKPOINT}" in
232+
finalized)
233+
EXTRACT_DIR="${OUTPUT_DIR}/Final"
234+
;;
235+
# node-smol-builder checkpoints
236+
binary-compressed)
237+
EXTRACT_DIR="${OUTPUT_DIR}/Compressed"
238+
;;
239+
binary-stripped)
240+
EXTRACT_DIR="${OUTPUT_DIR}/Stripped"
241+
;;
242+
binary-released)
243+
EXTRACT_DIR="${OUTPUT_DIR}/Release"
244+
;;
245+
# onnxruntime-builder, yoga-layout-builder checkpoints
246+
wasm-synced)
247+
EXTRACT_DIR="${OUTPUT_DIR}/Synced"
248+
;;
249+
wasm-released)
250+
EXTRACT_DIR="${OUTPUT_DIR}/Released"
251+
;;
252+
wasm-optimized)
253+
EXTRACT_DIR="${OUTPUT_DIR}/Optimized"
254+
;;
255+
wasm-compiled)
256+
EXTRACT_DIR="${OUTPUT_DIR}/Compiled"
257+
;;
258+
source-configured)
259+
# yoga-layout-builder specific: extract to mode source directory
260+
EXTRACT_DIR="${MODE_SOURCE_DIR}"
261+
;;
262+
# models checkpoints
263+
quantized|converted|downloaded)
264+
# Models checkpoints extract to build/{mode}/models/
265+
EXTRACT_DIR="packages/${PACKAGE_NAME}/build/${BUILD_MODE}/models"
266+
;;
267+
# Source checkpoints (shared across packages)
268+
source-cloned)
269+
# Shared checkpoint: extract to shared source directory
270+
EXTRACT_DIR="${SHARED_SOURCE_DIR}"
271+
;;
272+
source-patched)
273+
# Mode-specific checkpoint: extract to mode source directory
274+
EXTRACT_DIR="${MODE_SOURCE_DIR}"
275+
;;
276+
esac
277+
278+
echo "📦 Extracting checkpoint to ${EXTRACT_DIR}..."
279+
mkdir -p "${EXTRACT_DIR}"
280+
281+
# Validate tarball size before extraction (protect against zip bombs)
282+
COMPRESSED_SIZE=$(stat -c%s "${CHECKPOINT_FILE}" 2>/dev/null || stat -f%z "${CHECKPOINT_FILE}")
283+
MAX_COMPRESSED=$((10 * 1024 * 1024 * 1024)) # 10GB compressed limit
284+
285+
if [ "${COMPRESSED_SIZE}" -gt "${MAX_COMPRESSED}" ]; then
286+
echo "❌ Checkpoint tarball too large: ${COMPRESSED_SIZE} bytes (max: ${MAX_COMPRESSED})"
287+
echo " This may indicate a corrupted or malicious tarball"
288+
exit 1
289+
fi
290+
291+
echo " Tarball size: $((COMPRESSED_SIZE / 1024 / 1024))MB"
292+
293+
# Extract tarball, stripping the top-level directory wrapper
294+
# --strip-components=1: remove first directory level from paths
295+
# Example: tarball contains Final/ort.wasm → extracts to out/Final/ort.wasm
296+
# All checkpoints create directory archives for consistency
297+
if ! tar -xzf "${CHECKPOINT_FILE}" -C "${EXTRACT_DIR}" --strip-components=1; then
298+
echo "❌ Failed to extract checkpoint: ${CHECKPOINT_FILE}"
299+
echo " This may indicate disk space issues, permissions problems, or tarball corruption"
300+
exit 1
301+
fi
302+
303+
# Verify extraction produced files
304+
if [ -z "$(ls -A "${EXTRACT_DIR}" 2>/dev/null)" ]; then
305+
echo "❌ Extraction produced no files in ${EXTRACT_DIR}"
306+
echo " Tarball may be empty or extraction path may be incorrect"
307+
exit 1
308+
fi
309+
156310
echo "✅ Checkpoint extracted successfully"
157311
echo ""
158312
313+
# Verify checkpoint metadata JSON exists (needed for shouldRun() detection)
314+
# The JSON file lives alongside the tarball and is restored by the cache action
315+
CHECKPOINT_JSON="${CHECKPOINT_FILE%.tar.gz}.json"
316+
if [ -f "${CHECKPOINT_JSON}" ]; then
317+
echo "📝 Checkpoint metadata verified"
318+
echo " ✓ ${CHECKPOINT_JSON}"
319+
echo " Build scripts will detect this checkpoint via shouldRun()"
320+
else
321+
echo "⚠️ Warning: Checkpoint metadata missing: ${CHECKPOINT_JSON}"
322+
echo " Build may not skip completed phases properly"
323+
fi
324+
echo ""
325+
159326
# Determine if build needs to run
160327
NEEDS_BUILD="false"
161328
if [ ${RESTORED_INDEX} -gt 0 ]; then
@@ -196,6 +363,7 @@ runs:
196363
echo "needs_build=${NEEDS_BUILD}" >> $GITHUB_OUTPUT
197364
198365
- name: Skip restoration (build will run from scratch)
366+
id: skip
199367
if: inputs.cache-hit != 'true' || inputs.cache-valid != 'true'
200368
shell: bash
201369
run: |

0 commit comments

Comments
 (0)