Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
dchourasia committed Sep 23, 2023
2 parents bad490c + 483257f commit 9ae1c33
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 148 deletions.
118 changes: 0 additions & 118 deletions .github/workflows/sync.yml

This file was deleted.

8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ Integration of this stack into the Open Data Hub is owned by the Distributed Wor

| Component | Version |
|------------------------------|---------|
| CodeFlare Operator | v0.2.3 |
| Multi-Cluster App Dispatcher | v1.34.1 |
| CodeFlare-SDK | v0.7.1 |
| InstaScale | v0.0.8 |
| CodeFlare Operator | v1.0.0-rc.1 |
| Multi-Cluster App Dispatcher | v1.35.0 |
| CodeFlare-SDK | v0.8.0 |
| InstaScale | v0.0.9 |
| KubeRay | v0.6.0 |
<!-- Compatibility Matrix end -->

Expand Down
6 changes: 3 additions & 3 deletions codeflare-stack/base/codeflare-notebook-imagestream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ spec:
tags:
- annotations:
openshift.io/imported-from: quay.io/project-codeflare/notebook
name: v0.7.1
name: v0.8.0
from:
kind: DockerImage
name: quay.io/project-codeflare/notebook:v0.7.1
name: quay.io/project-codeflare/notebook:v0.8.0
importPolicy:
scheduled: true
scheduled: true
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ metadata:
name: mcad-controller-ray-clusterrolebinding
subjects:
- kind: ServiceAccount
name: mcad-controller-mcad
namespace: $(namespace)
name: codeflare-operator-controller-manager
namespace: openshift-operators
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
Expand Down
32 changes: 17 additions & 15 deletions tests/basictests/distributed-workloads.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ function test_mcad_torchx_functionality() {

########### Clean Cluster should be free of these resources ############
# Get appwrapper name
AW=$(oc get appwrapper -n ${ODHPROJECT} | grep mnistjob | cut -d ' ' -f 1) || true
AW=$(oc get appwrapper.workload.codeflare.dev -n ${ODHPROJECT} | grep mnistjob | cut -d ' ' -f 1) || true
# Clean up resources
if [[ -n $AW ]]; then
os::cmd::expect_success "oc delete appwrapper $AW -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete appwrapper.workload.codeflare.dev $AW -n ${ODHPROJECT} || true"
fi
os::cmd::expect_success "oc delete notebook jupyter-nb-kube-3aadmin -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete cm notebooks-mcad -n ${ODHPROJECT} || true"
Expand All @@ -50,7 +50,9 @@ function test_mcad_torchx_functionality() {
os::cmd::expect_success "oc create configmap notebooks-mcad -n ${ODHPROJECT} --from-file=${RESOURCEDIR}/mnist_mcad_mini.ipynb"

# Get Token
local TESTUSER_BEARER_TOKEN="$(curl -skiL -u $TEST_USER:$TEST_PASS -H 'X-CSRF-Token: xxx' "$OPENSHIFT_OAUTH_ENDPOINT/oauth/authorize?response_type=token&client_id=openshift-challenging-client" | grep -oE 'access_token=[^&]*'| sed 's/access_token=//')"
local TESTUSER_BEARER_TOKEN="$(curl -skiL -u $TEST_USER:$TEST_PASS -H 'X-CSRF-Token: xxx' "$OPENSHIFT_OAUTH_ENDPOINT/oauth/authorize?response_type=token&client_id=openshift-challenging-client" | grep -oP 'access_token=\K[^&]*')"

# Spawn notebook-server using the codeflare custom nb image
os::cmd::expect_success "cat ${RESOURCEDIR}/custom-nb-small.yaml \
| sed s/%INGRESS%/$(oc get ingresses.config/cluster -o jsonpath={.spec.domain})/g \
| sed s/%OCPSERVER%/$(oc whoami --show-server=true|cut -f3 -d "/")/g \
Expand All @@ -62,16 +64,16 @@ function test_mcad_torchx_functionality() {
os::cmd::try_until_text "oc get pod -n ${ODHPROJECT} | grep "jupyter-nb-kube-3aadmin" | awk '{print \$2}'" "2/2" $odhdefaulttimeout $odhdefaultinterval

# Wait for appwrapper to exist
os::cmd::try_until_text "oc get appwrapper -n ${ODHPROJECT} | grep mnistjob" "mnistjob-*" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get appwrapper.workload.codeflare.dev -n ${ODHPROJECT} | grep mnistjob" "mnistjob-*" $odhdefaulttimeout $odhdefaultinterval

# Get appwrapper name
AW=$(oc get appwrapper -n ${ODHPROJECT} | grep mnistjob | cut -d ' ' -f 1)
AW=$(oc get appwrapper.workload.codeflare.dev -n ${ODHPROJECT} | grep mnistjob | cut -d ' ' -f 1)

# Wait for the mnisttest appwrapper state to become running
os::cmd::try_until_text "oc get appwrapper $AW -n ${ODHPROJECT} -ojsonpath='{.status.state}'" "Running" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get appwrapper.workload.codeflare.dev $AW -n ${ODHPROJECT} -ojsonpath='{.status.state}'" "Running" $odhdefaulttimeout $odhdefaultinterval

# Wait for workload to succeed and clean up
os::cmd::try_until_text "oc get appwrapper $AW -n ${ODHPROJECT}" ".*NotFound.*" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get appwrapper.workload.codeflare.dev $AW -n ${ODHPROJECT}" "*NotFound*" $odhdefaulttimeout $odhdefaultinterval

# Test clean up resources
os::cmd::expect_success "oc delete notebook jupyter-nb-kube-3aadmin -n ${ODHPROJECT}"
Expand All @@ -80,8 +82,8 @@ function test_mcad_torchx_functionality() {
os::cmd::expect_success "oc delete cm notebooks-mcad -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get cm notebooks-mcad -n ${ODHPROJECT}"

os::cmd::expect_success "oc delete appwrapper $AW -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get appwrapper $AW -n ${ODHPROJECT}"
os::cmd::expect_success "oc delete appwrapper.workload.codeflare.dev $AW -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get appwrapper.workload.codeflare.dev $AW -n ${ODHPROJECT}"

os::cmd::expect_success "oc delete pvc jupyterhub-nb-kube-3aadmin-pvc -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get pvc jupyterhub-nb-kube-3aadmin-pvc -n ${ODHPROJECT}"
Expand All @@ -94,7 +96,7 @@ function test_mcad_ray_functionality() {
# Clean up resources
os::cmd::expect_success "oc delete notebook jupyter-nb-kube-3aadmin -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete cm notebooks-ray -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete appwrapper mnisttest -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete appwrapper.workload.codeflare.dev mnisttest -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete raycluster mnisttest -n ${ODHPROJECT} || true"
os::cmd::expect_success "oc delete pvc jupyterhub-nb-kube-3aadmin-pvc -n ${ODHPROJECT} || true"
##############################################################################
Expand All @@ -106,7 +108,7 @@ function test_mcad_ray_functionality() {
os::cmd::expect_success "oc create configmap notebooks-ray -n ${ODHPROJECT} --from-file=${RESOURCEDIR}/mnist_ray_mini.ipynb --from-file=${RESOURCEDIR}/mnist.py --from-file=${RESOURCEDIR}/requirements.txt"

# Get Token
local TESTUSER_BEARER_TOKEN="$(curl -skiL -u $TEST_USER:$TEST_PASS -H 'X-CSRF-Token: xxx' "$OPENSHIFT_OAUTH_ENDPOINT/oauth/authorize?response_type=token&client_id=openshift-challenging-client" | grep -oE 'access_token=[^&]*'| sed 's/access_token=//')"
local TESTUSER_BEARER_TOKEN="$(curl -skiL -u $TEST_USER:$TEST_PASS -H 'X-CSRF-Token: xxx' "$OPENSHIFT_OAUTH_ENDPOINT/oauth/authorize?response_type=token&client_id=openshift-challenging-client" | grep -oP 'access_token=\K[^&]*')"

# Spawn notebook-server using the codeflare custom nb image
os::cmd::expect_success "cat ${RESOURCEDIR}/custom-nb-small.yaml \
Expand All @@ -120,13 +122,13 @@ function test_mcad_ray_functionality() {
os::cmd::try_until_text "oc get pod -n ${ODHPROJECT} | grep "jupyter-nb-kube-3aadmin" | awk '{print \$2}'" "2/2" $odhdefaulttimeout $odhdefaultinterval

# Wait for the mnisttest appwrapper state to become running
os::cmd::try_until_text "oc get appwrapper mnisttest -n ${ODHPROJECT} -ojsonpath='{.status.state}'" "Running" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get appwrapper.workload.codeflare.dev mnisttest -n ${ODHPROJECT} -ojsonpath='{.status.state}'" "Running" $odhdefaulttimeout $odhdefaultinterval

# Wait for Raycluster to be ready
os::cmd::try_until_text "oc get raycluster -n ${ODHPROJECT} mnisttest -ojsonpath='{.status.state}'" "ready" $odhdefaulttimeout $odhdefaultinterval

# Wait for job to be completed and cleaned up
os::cmd::try_until_text "oc get appwrapper mnisttest -n ${ODHPROJECT}" ".*NotFound.*" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get appwrapper.workload.codeflare.dev mnisttest -n ${ODHPROJECT}" "*NotFound*" $odhdefaulttimeout $odhdefaultinterval
os::cmd::expect_failure "oc get raycluster mnisttest -n ${ODHPROJECT}"

# Test clean up resources
Expand All @@ -136,8 +138,8 @@ function test_mcad_ray_functionality() {
os::cmd::expect_success "oc delete cm notebooks-ray -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get cm notebooks-ray -n ${ODHPROJECT}"

os::cmd::expect_success "oc delete appwrapper mnisttest -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get appwrapper mnisttest -n ${ODHPROJECT}"
os::cmd::expect_success "oc delete appwrapper.workload.codeflare.dev mnisttest -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get appwrapper.workload.codeflare.dev mnisttest -n ${ODHPROJECT}"

os::cmd::expect_success "oc delete raycluster mnisttest -n ${ODHPROJECT} || true"
os::cmd::expect_failure "oc get raycluster mnisttest -n ${ODHPROJECT}"
Expand Down
3 changes: 2 additions & 1 deletion tests/basictests/ray.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ function start_test_ray_cluster(){
os::cmd::expect_success "oc project ${ODHPROJECT}"
os::cmd::expect_success "oc apply -f ${RESOURCEDIR}/ray/ray-test-cluster-test.yaml"
os::cmd::try_until_text "oc get RayCluster kuberay-cluster-test" "kuberay-cluster-test" $odhdefaulttimeout $odhdefaultinterval
sleep 15
os::cmd::try_until_text "oc get pods -l ray.io/identifier=kuberay-cluster-test-head -o jsonpath='{$.items[*].status.phase}'" "Running" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get pods -l ray.io/identifier=kuberay-cluster-test-worker -o jsonpath='{$.items[*].status.phase}'" "Running" $odhdefaulttimeout $odhdefaultinterval
}

function check_functionality(){
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/codeflare-subscription.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ spec:
source: community-operators
sourceNamespace: openshift-marketplace
installPlanApproval: Manual
startingCSV: codeflare-operator.v0.1.0
startingCSV: codeflare-operator.v0.2.3
6 changes: 3 additions & 3 deletions tests/resources/custom-nb-small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ kind: Notebook
metadata:
annotations:
notebooks.opendatahub.io/inject-oauth: "true"
notebooks.opendatahub.io/last-image-selection: codeflare-notebook:latest
notebooks.opendatahub.io/last-image-selection: codeflare-notebook:v0.7.1
notebooks.opendatahub.io/last-size-selection: Small
notebooks.opendatahub.io/oauth-logout-url: https://odh-dashboard-%NAMESPACE%.%INGRESS%/notebookController/kube-3aadmin/home
opendatahub.io/link: https://jupyter-nb-kube-3aadmin-%NAMESPACE%.%INGRESS%/notebook/%NAMESPACE%/jupyter-nb-kube-3aadmin
Expand Down Expand Up @@ -47,14 +47,14 @@ spec:
--ServerApp.quit_button=False
--ServerApp.tornado_settings={"user":"kube-3aadmin","hub_host":"https://odh-dashboard-%NAMESPACE%.%INGRESS%","hub_prefix":"/notebookController/kube-3aadmin"}
- name: JUPYTER_IMAGE
value: image-registry.openshift-image-registry.svc:5000/%NAMESPACE%/codeflare-notebook:latest
value: image-registry.openshift-image-registry.svc:5000/%NAMESPACE%/codeflare-notebook:v0.7.1
- name: JUPYTER_NOTEBOOK_PORT
value: "8888"
- name: OCP_SERVER
value: https://%OCPSERVER%
- name: OCP_TOKEN
value: %OCPTOKEN%
image: image-registry.openshift-image-registry.svc:5000/%NAMESPACE%/codeflare-notebook:latest
image: image-registry.openshift-image-registry.svc:5000/%NAMESPACE%/codeflare-notebook:v0.7.1
command: ["/bin/sh", "-c", "pip install papermill && oc login --token=${OCP_TOKEN} --server=${OCP_SERVER} --insecure-skip-tls-verify=true && papermill /opt/app-root/notebooks-%JOBTYPE%/mnist_%JOBTYPE%_mini.ipynb /opt/app-root/src/mcad-out.ipynb && sleep infinity"]
# args: ["pip install papermill && oc login --token=${OCP_TOKEN} --server=${OCP_SERVER} --insecure-skip-tls-verify=true && papermill /opt/app-root/notebooks/mcad.ipynb /opt/app-root/src/mcad-out.ipynb" ]
imagePullPolicy: Always
Expand Down
3 changes: 2 additions & 1 deletion tests/resources/mnist_ray_mini.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
"outputs": [],
"source": [
"# Bring up the cluster\n",
"cluster.up()"
"cluster.up()\n",
"sleep(10)"
]
},
{
Expand Down

0 comments on commit 9ae1c33

Please sign in to comment.