Skip to content

Commit 966ae7f

Browse files
committed
RHOAIENG-10449 - Add PR check for additional-demos notebooks
1 parent 9af07a0 commit 966ae7f

File tree

1 file changed

+360
-0
lines changed

1 file changed

+360
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
name: Additional demo notebooks tests
2+
3+
on:
4+
pull_request:
5+
types: [ labeled ]
6+
7+
concurrency:
8+
group: ${{ github.head_ref }}-${{ github.workflow }}
9+
cancel-in-progress: true
10+
11+
env:
12+
CODEFLARE_OPERATOR_IMG: "quay.io/project-codeflare/codeflare-operator:dev"
13+
14+
jobs:
15+
verify-hf_interactive:
16+
if: ${{ github.event.label.name == 'test-additional-notebooks' }}
17+
runs-on: ubuntu-20.04-4core
18+
19+
steps:
20+
- name: Checkout code
21+
uses: actions/checkout@v4
22+
with:
23+
submodules: recursive
24+
25+
- name: Checkout common repo code
26+
uses: actions/checkout@v4
27+
with:
28+
repository: 'project-codeflare/codeflare-common'
29+
ref: 'main'
30+
path: 'common'
31+
32+
- name: Checkout CodeFlare operator repository
33+
uses: actions/checkout@v4
34+
with:
35+
repository: project-codeflare/codeflare-operator
36+
path: codeflare-operator
37+
38+
- name: Set Go
39+
uses: actions/setup-go@v5
40+
with:
41+
go-version-file: './codeflare-operator/go.mod'
42+
cache-dependency-path: "./codeflare-operator/go.sum"
43+
44+
- name: Set up gotestfmt
45+
uses: gotesttools/gotestfmt-action@v2
46+
with:
47+
token: ${{ secrets.GITHUB_TOKEN }}
48+
49+
- name: Set up specific Python version
50+
uses: actions/setup-python@v5
51+
with:
52+
python-version: '3.9'
53+
cache: 'pip' # caching pip dependencies
54+
55+
- name: Setup and start KinD cluster
56+
uses: ./common/github-actions/kind
57+
58+
- name: Deploy CodeFlare stack
59+
id: deploy
60+
run: |
61+
cd codeflare-operator
62+
echo Setting up CodeFlare stack
63+
make setup-e2e
64+
echo Deploying CodeFlare operator
65+
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
66+
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
67+
cd ..
68+
69+
- name: Setup Additional demo notebooks execution
70+
run: |
71+
echo "Installing papermill and dependencies..."
72+
pip install poetry papermill ipython ipykernel
73+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
74+
poetry config virtualenvs.create false
75+
76+
echo "Installing SDK..."
77+
poetry install --with test,docs
78+
79+
- name: Run hf_interactive.ipynb
80+
run: |
81+
set -euo pipefail
82+
83+
# Remove login/logout cells, as KinD doesn't support authentication using token
84+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
85+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
86+
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
87+
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
88+
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
89+
# Set explicit namespace as SDK need it (currently) to resolve local queues
90+
sed -i "s/worker_cpu_requests=8,/worker_cpu_requests=1, namespace='default',/" hf_interactive.ipynb
91+
# Change cluster parameters (need to decrease)
92+
sed -i "s/{'nvidia.com/gpu':1}/{'nvidia.com/gpu':0}/g" hf_interactive.ipynb
93+
sed -i "s/worker_cpu_limits=8,/worker_cpu_limits=1,/" hf_interactive.ipynb
94+
sed -i "s/worker_memory_requests=16,/worker_memory_requests=4,/" hf_interactive.ipynb
95+
sed -i "s/worker_memory_limits=8,/worker_memory_limits=4,/" hf_interactive.ipynb
96+
# Run notebook
97+
poetry run papermill hf_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
98+
working-directory: demo-notebooks/additional-demos
99+
100+
- name: Print CodeFlare operator logs
101+
if: always() && steps.deploy.outcome == 'success'
102+
run: |
103+
echo "Printing CodeFlare operator logs"
104+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
105+
106+
- name: Print Kueue operator logs
107+
if: always() && steps.deploy.outcome == 'success'
108+
run: |
109+
echo "Printing Kueue operator logs"
110+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
111+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
112+
113+
- name: Print KubeRay operator logs
114+
if: always() && steps.deploy.outcome == 'success'
115+
run: |
116+
echo "Printing KubeRay operator logs"
117+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
118+
119+
- name: Export all KinD pod logs
120+
uses: ./common/github-actions/kind-export-logs
121+
if: always() && steps.deploy.outcome == 'success'
122+
with:
123+
output-directory: ${TEMP_DIR}
124+
125+
- name: Upload logs
126+
uses: actions/upload-artifact@v4
127+
if: always() && steps.deploy.outcome == 'success'
128+
with:
129+
name: logs-verify-hf_interactive
130+
retention-days: 10
131+
path: |
132+
${{ env.TEMP_DIR }}/**/*.log
133+
134+
verify-local_interactive:
135+
if: ${{ github.event.label.name == 'test-additional-notebooks' }}
136+
runs-on: ubuntu-20.04-4core
137+
138+
steps:
139+
- name: Checkout code
140+
uses: actions/checkout@v4
141+
with:
142+
submodules: recursive
143+
144+
- name: Checkout common repo code
145+
uses: actions/checkout@v4
146+
with:
147+
repository: 'project-codeflare/codeflare-common'
148+
ref: 'main'
149+
path: 'common'
150+
151+
- name: Checkout CodeFlare operator repository
152+
uses: actions/checkout@v4
153+
with:
154+
repository: project-codeflare/codeflare-operator
155+
path: codeflare-operator
156+
157+
- name: Set Go
158+
uses: actions/setup-go@v5
159+
with:
160+
go-version-file: './codeflare-operator/go.mod'
161+
cache-dependency-path: "./codeflare-operator/go.sum"
162+
163+
- name: Set up gotestfmt
164+
uses: gotesttools/gotestfmt-action@v2
165+
with:
166+
token: ${{ secrets.GITHUB_TOKEN }}
167+
168+
- name: Set up specific Python version
169+
uses: actions/setup-python@v5
170+
with:
171+
python-version: '3.9'
172+
cache: 'pip' # caching pip dependencies
173+
174+
- name: Setup and start KinD cluster
175+
uses: ./common/github-actions/kind
176+
177+
- name: Deploy CodeFlare stack
178+
id: deploy
179+
run: |
180+
cd codeflare-operator
181+
echo Setting up CodeFlare stack
182+
make setup-e2e
183+
echo Deploying CodeFlare operator
184+
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
185+
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
186+
cd ..
187+
188+
- name: Setup Additional demo notebooks execution
189+
run: |
190+
echo "Installing papermill and dependencies..."
191+
pip install poetry papermill ipython ipykernel
192+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
193+
poetry config virtualenvs.create false
194+
195+
echo "Installing SDK..."
196+
poetry install --with test,docs
197+
198+
- name: Run local_interactive.ipynb
199+
run: |
200+
set -euo pipefail
201+
202+
# Remove login/logout cells, as KinD doesn't support authentication using token
203+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
204+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
205+
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
206+
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
207+
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
208+
# Set explicit namespace as SDK need it (currently) to resolve local queues
209+
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" local_interactive.ipynb
210+
# Run notebook
211+
poetry run papermill local_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
212+
working-directory: demo-notebooks/additional-demos
213+
214+
- name: Print CodeFlare operator logs
215+
if: always() && steps.deploy.outcome == 'success'
216+
run: |
217+
echo "Printing CodeFlare operator logs"
218+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
219+
220+
- name: Print Kueue operator logs
221+
if: always() && steps.deploy.outcome == 'success'
222+
run: |
223+
echo "Printing Kueue operator logs"
224+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
225+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
226+
227+
- name: Print KubeRay operator logs
228+
if: always() && steps.deploy.outcome == 'success'
229+
run: |
230+
echo "Printing KubeRay operator logs"
231+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
232+
233+
- name: Export all KinD pod logs
234+
uses: ./common/github-actions/kind-export-logs
235+
if: always() && steps.deploy.outcome == 'success'
236+
with:
237+
output-directory: ${TEMP_DIR}
238+
239+
- name: Upload logs
240+
uses: actions/upload-artifact@v4
241+
if: always() && steps.deploy.outcome == 'success'
242+
with:
243+
name: logs-local_interactive
244+
retention-days: 10
245+
path: |
246+
${{ env.TEMP_DIR }}/**/*.log
247+
248+
verify-ray_job_client:
249+
if: ${{ github.event.label.name == 'test-additional-notebooks' }}
250+
runs-on: ubuntu-20.04-4core
251+
252+
steps:
253+
- name: Checkout code
254+
uses: actions/checkout@v4
255+
with:
256+
submodules: recursive
257+
258+
- name: Checkout common repo code
259+
uses: actions/checkout@v4
260+
with:
261+
repository: 'project-codeflare/codeflare-common'
262+
ref: 'main'
263+
path: 'common'
264+
265+
- name: Checkout CodeFlare operator repository
266+
uses: actions/checkout@v4
267+
with:
268+
repository: project-codeflare/codeflare-operator
269+
path: codeflare-operator
270+
271+
- name: Set Go
272+
uses: actions/setup-go@v5
273+
with:
274+
go-version-file: './codeflare-operator/go.mod'
275+
cache-dependency-path: "./codeflare-operator/go.sum"
276+
277+
- name: Set up gotestfmt
278+
uses: gotesttools/gotestfmt-action@v2
279+
with:
280+
token: ${{ secrets.GITHUB_TOKEN }}
281+
282+
- name: Set up specific Python version
283+
uses: actions/setup-python@v5
284+
with:
285+
python-version: '3.9'
286+
cache: 'pip' # caching pip dependencies
287+
288+
- name: Setup and start KinD cluster
289+
uses: ./common/github-actions/kind
290+
291+
- name: Deploy CodeFlare stack
292+
id: deploy
293+
run: |
294+
cd codeflare-operator
295+
echo Setting up CodeFlare stack
296+
make setup-e2e
297+
echo Deploying CodeFlare operator
298+
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
299+
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
300+
cd ..
301+
302+
- name: Setup Additional demo notebooks execution
303+
run: |
304+
echo "Installing papermill and dependencies..."
305+
pip install poetry papermill ipython ipykernel
306+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
307+
poetry config virtualenvs.create false
308+
309+
echo "Installing SDK..."
310+
poetry install --with test,docs
311+
312+
- name: Run ray_job_client.ipynb
313+
run: |
314+
set -euo pipefail
315+
316+
# Remove login/logout cells, as KinD doesn't support authentication using token
317+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
318+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
319+
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
320+
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
321+
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
322+
# Set explicit namespace as SDK need it (currently) to resolve local queues
323+
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" ray_job_client.ipynb
324+
# Run notebook
325+
poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
326+
working-directory: demo-notebooks/additional-demos
327+
328+
- name: Print CodeFlare operator logs
329+
if: always() && steps.deploy.outcome == 'success'
330+
run: |
331+
echo "Printing CodeFlare operator logs"
332+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
333+
334+
- name: Print Kueue operator logs
335+
if: always() && steps.deploy.outcome == 'success'
336+
run: |
337+
echo "Printing Kueue operator logs"
338+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
339+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
340+
341+
- name: Print KubeRay operator logs
342+
if: always() && steps.deploy.outcome == 'success'
343+
run: |
344+
echo "Printing KubeRay operator logs"
345+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
346+
347+
- name: Export all KinD pod logs
348+
uses: ./common/github-actions/kind-export-logs
349+
if: always() && steps.deploy.outcome == 'success'
350+
with:
351+
output-directory: ${TEMP_DIR}
352+
353+
- name: Upload logs
354+
uses: actions/upload-artifact@v4
355+
if: always() && steps.deploy.outcome == 'success'
356+
with:
357+
name: logs-ray_job_client
358+
retention-days: 10
359+
path: |
360+
${{ env.TEMP_DIR }}/**/*.log

0 commit comments

Comments
 (0)