diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..a5c35dbf --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,22 @@ +name: Daily Building Script Execution + +on: + workflow_dispatch: + schedule: + # Runs at 1:00 AM every day + - cron: '0 1 * * *' + +jobs: + run-shell-script: + runs-on: self-hosted + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run shell script + run: | + tar_name=chatlearn-$(date +%F).tar.gz + tar czvf /tmp/${tar_name} . + ossutil64 -i ${{ secrets.OSS_AK_ID }} -k ${{ secrets.OSS_AK_SECRET }} -e ${{ secrets.OSS_ENDPOINT }} cp -r /tmp/${tar_name} ${{ secrets.OSS_BUCKET }}/regression/chatlearn/src/ + diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 0715d21f..4ccfecc3 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.10"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml new file mode 100644 index 00000000..4fae9ed1 --- /dev/null +++ b/.github/workflows/unit_test.yml @@ -0,0 +1,26 @@ +name: Unit Tests + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + tags: + - v[0-9]+.[0-9]+.[0-9]+ + + +jobs: + run-shell-script: + runs-on: self-hosted + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run unit test + run: | + docker pull $UT_IMAGE + docker run -v $PWD:$PWD -w $PWD --net host --ipc host --shm-size 80G -t --rm --gpus all $UT_IMAGE bash -c 'make test' + env: + UT_IMAGE: ${{ secrets.UT_IMAGE }} diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 302fcafe..0a76fb44 100644 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -1,12 +1,8 @@ #!/bin/bash -set -exo pipefail export PYTHONPATH=$(cd ../ && pwd):${PWD}:${PYTHONPATH} CDIR="$(cd "$(dirname "$0")" ; pwd -P)" LOGFILE=/tmp/pytorch_py_test.log rm -rf core* -MAX_GRAPH_SIZE=500 -GRAPH_CHECK_FREQUENCY=100 -VERBOSITY=2 [ -z "$MASTER_ADDR" ] && export MASTER_ADDR=localhost [ -z "$WORLD_SIZE" ] && export WORLD_SIZE=1 @@ -34,12 +30,6 @@ do L) LOGFILE= ;; - M) - MAX_GRAPH_SIZE=$OPTARG - ;; - C) - GRAPH_CHECK_FREQUENCY=$OPTARG - ;; V) VERBOSITY=$OPTARG ;; @@ -49,11 +39,25 @@ shift $(($OPTIND - 1)) function run_test { + attempts=0 + while [[ $attempts -lt 3 ]]; do + rm -rf core* + ray stop + "$@" + if [[ $? -eq 0 ]]; then + echo "$@ success" + break + fi + + attempts=$((attempts + 1)) + if [[ $attempts -lt 3 ]]; then + echo "$file fail, retry ($attempts/3)..." + else + echo "$file fail, exit ..." + exit 1 + fi + done ray stop - "$@" - exit_code=$? ; echo "Exit code: $exit_code" - ray stop - echo $@ } diff --git a/tests/test_flat_tensors.py b/tests/test_flat_tensors.py index ebe6c1e8..a15ebcdd 100644 --- a/tests/test_flat_tensors.py +++ b/tests/test_flat_tensors.py @@ -20,10 +20,10 @@ def run_flat_tensors_test_with_constructor(self, constructor): measure1 = torch.cuda.memory_allocated() # Randomly generate some tensors. - n = 64 + n = 4 n_dims = [random.randint(1, 4) for _ in range(n)] shapes = [ - [random.randint(0, 1 << 8) for _ in range(dim)] + [random.randint(0, 8) for _ in range(dim)] for dim in n_dims ]