From 300624201b3a87e6876a9756b83c896ae08ddd04 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 12 Aug 2024 23:48:01 +0800 Subject: [PATCH 01/20] fix start_server error --- python/xoscar/backends/communication/socket.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/xoscar/backends/communication/socket.py b/python/xoscar/backends/communication/socket.py index 246956e3..4390657d 100644 --- a/python/xoscar/backends/communication/socket.py +++ b/python/xoscar/backends/communication/socket.py @@ -201,10 +201,6 @@ def client_type(self) -> Type["Client"]: def channel_type(self) -> int: return ChannelType.remote - @classmethod - def parse_config(cls, config: dict) -> dict: - return config - @staticmethod @implements(Server.create) async def create(config: Dict) -> "Server": From 379e78d6bda0df7b943af3d03bc49217381c5b91 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 12 Aug 2024 23:49:13 +0800 Subject: [PATCH 02/20] fix start_server error --- .github/workflows/python.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index bfefb871..12e7dab6 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -65,7 +65,6 @@ jobs: find . -name "CMakeLists.txt" -not -path "*third_party/*" | xargs cmake-format -c .cmake-format.yaml --check build_test_job: - if: github.repository == 'xorbitsai/xoscar' runs-on: ${{ matrix.os }} needs: lint env: From 784f50bb0d02d16fb725bcc7a0289b9f80da96b4 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 00:10:40 +0800 Subject: [PATCH 03/20] fix start_server error --- .github/workflows/python.yaml | 5 +++-- python/xoscar/backends/communication/socket.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 12e7dab6..b64a2317 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -4,8 +4,8 @@ on: push: branches: - '*' - pull_request: - types: ['opened', 'reopened', 'synchronize'] + # pull_request: + # types: ['opened', 'reopened', 'synchronize'] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -65,6 +65,7 @@ jobs: find . -name "CMakeLists.txt" -not -path "*third_party/*" | xargs cmake-format -c .cmake-format.yaml --check build_test_job: + # if: github.repository == 'xorbitsai/xoscar' runs-on: ${{ matrix.os }} needs: lint env: diff --git a/python/xoscar/backends/communication/socket.py b/python/xoscar/backends/communication/socket.py index 4390657d..246956e3 100644 --- a/python/xoscar/backends/communication/socket.py +++ b/python/xoscar/backends/communication/socket.py @@ -201,6 +201,10 @@ def client_type(self) -> Type["Client"]: def channel_type(self) -> int: return ChannelType.remote + @classmethod + def parse_config(cls, config: dict) -> dict: + return config + @staticmethod @implements(Server.create) async def create(config: Dict) -> "Server": From c97ea310d3ec2e6646cca68aa99a42086fbbe98e Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 06:49:15 +0800 Subject: [PATCH 04/20] sockets don't need ucx config --- python/xoscar/backends/communication/socket.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/xoscar/backends/communication/socket.py b/python/xoscar/backends/communication/socket.py index 246956e3..d80aeefb 100644 --- a/python/xoscar/backends/communication/socket.py +++ b/python/xoscar/backends/communication/socket.py @@ -209,6 +209,8 @@ def parse_config(cls, config: dict) -> dict: @implements(Server.create) async def create(config: Dict) -> "Server": config = config.copy() + if "ucx" in config: + config.pop("ucx") if "address" in config: address = config.pop("address") host, port = address.rsplit(":", 1) From 3dd0802bdfa73ccee7a700bae73b7ad3170fb38d Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 09:15:14 +0800 Subject: [PATCH 05/20] sockets don't need ucx config --- .github/workflows/python.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index b64a2317..bfefb871 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -4,8 +4,8 @@ on: push: branches: - '*' - # pull_request: - # types: ['opened', 'reopened', 'synchronize'] + pull_request: + types: ['opened', 'reopened', 'synchronize'] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -65,7 +65,7 @@ jobs: find . -name "CMakeLists.txt" -not -path "*third_party/*" | xargs cmake-format -c .cmake-format.yaml --check build_test_job: - # if: github.repository == 'xorbitsai/xoscar' + if: github.repository == 'xorbitsai/xoscar' runs-on: ${{ matrix.os }} needs: lint env: From c1a6c14d6b01c30d610849861bf9404990de08fc Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 12:37:22 +0800 Subject: [PATCH 06/20] filter config --- python/xoscar/backends/communication/socket.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/xoscar/backends/communication/socket.py b/python/xoscar/backends/communication/socket.py index d80aeefb..37ff103a 100644 --- a/python/xoscar/backends/communication/socket.py +++ b/python/xoscar/backends/communication/socket.py @@ -203,14 +203,16 @@ def channel_type(self) -> int: @classmethod def parse_config(cls, config: dict) -> dict: - return config + # we only need the following config + keys_of_interest = ['listen_elastic_ip', 'address', 'host', 'port'] + parsed_config = {key: config[key] for key in keys_of_interest if key in config} + + return parsed_config @staticmethod @implements(Server.create) async def create(config: Dict) -> "Server": config = config.copy() - if "ucx" in config: - config.pop("ucx") if "address" in config: address = config.pop("address") host, port = address.rsplit(":", 1) From b27fccff2108092e87d006990fa12f6015741ee1 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 12:38:14 +0800 Subject: [PATCH 07/20] filter config --- python/xoscar/backends/communication/socket.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/xoscar/backends/communication/socket.py b/python/xoscar/backends/communication/socket.py index 37ff103a..c20e04cf 100644 --- a/python/xoscar/backends/communication/socket.py +++ b/python/xoscar/backends/communication/socket.py @@ -204,9 +204,9 @@ def channel_type(self) -> int: @classmethod def parse_config(cls, config: dict) -> dict: # we only need the following config - keys_of_interest = ['listen_elastic_ip', 'address', 'host', 'port'] + keys_of_interest = ["listen_elastic_ip", "address", "host", "port"] parsed_config = {key: config[key] for key in keys_of_interest if key in config} - + return parsed_config @staticmethod From f5d3e7ce5381729ac30901370af8ef5a34fadf96 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 17:52:35 +0800 Subject: [PATCH 08/20] filter config --- python/xoscar/backends/communication/socket.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/xoscar/backends/communication/socket.py b/python/xoscar/backends/communication/socket.py index c20e04cf..816859f2 100644 --- a/python/xoscar/backends/communication/socket.py +++ b/python/xoscar/backends/communication/socket.py @@ -203,9 +203,11 @@ def channel_type(self) -> int: @classmethod def parse_config(cls, config: dict) -> dict: + if config is None or not config: + return dict() # we only need the following config - keys_of_interest = ["listen_elastic_ip", "address", "host", "port"] - parsed_config = {key: config[key] for key in keys_of_interest if key in config} + keys = ["listen_elastic_ip"] + parsed_config = {key: config[key] for key in keys if key in config} return parsed_config From a93e1e51656a212fad198f9ec026749db4f019da Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Tue, 13 Aug 2024 17:55:13 +0800 Subject: [PATCH 09/20] filter config --- .../backends/indigen/tests/test_pool.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/python/xoscar/backends/indigen/tests/test_pool.py b/python/xoscar/backends/indigen/tests/test_pool.py index 04d5c513..6b60481d 100644 --- a/python/xoscar/backends/indigen/tests/test_pool.py +++ b/python/xoscar/backends/indigen/tests/test_pool.py @@ -542,6 +542,104 @@ async def test_create_actor_pool(): assert len(global_router._mapping) == 0 +@pytest.mark.asyncio +async def test_create_actor_pool_extra_config(): + start_method = ( + os.environ.get("POOL_START_METHOD", "forkserver") + if sys.platform != "win32" + else None + ) + # create a actor pool based on socket rather than ucx + # pass `extra_conf` to check if we can filter out ucx config + pool = await create_actor_pool( + "127.0.0.1", + pool_cls=MainActorPool, + n_process=2, + subprocess_start_method=start_method, + extra_conf={ + "ucx": { + "tcp": None, + "nvlink": None, + "infiniband": None, + "rdmacm": None, + "cuda-copy": None, + "create-cuda-contex": None, + } + }, + ) + + async with pool: + # test global router + global_router = Router.get_instance() + # global router should not be the identical one with pool's router + assert global_router is not pool.router + assert pool.external_address in global_router._curr_external_addresses + assert pool.external_address in global_router._mapping + + ctx = get_context() + + # actor on main pool + actor_ref = await ctx.create_actor( + TestActor, uid="test-1", address=pool.external_address + ) + assert await actor_ref.add(3) == 3 + assert await actor_ref.add(1) == 4 + assert (await ctx.has_actor(actor_ref)) is True + assert (await ctx.actor_ref(actor_ref)) == actor_ref + # test cancel + task = asyncio.create_task(actor_ref.sleep(20)) + await asyncio.sleep(0) + task.cancel() + assert await task == 5 + await ctx.destroy_actor(actor_ref) + assert (await ctx.has_actor(actor_ref)) is False + for f in actor_ref.add, ctx.actor_ref, ctx.destroy_actor: + with pytest.raises(ActorNotExist): + await f(actor_ref) + + # actor on sub pool + actor_ref1 = await ctx.create_actor( + TestActor, uid="test-main", address=pool.external_address + ) + actor_ref2 = await ctx.create_actor( + TestActor, + uid="test-2", + address=pool.external_address, + allocate_strategy=RandomSubPool(), + ) + assert ( + await ctx.actor_ref(uid="test-2", address=actor_ref2.address) + ) == actor_ref2 + main_ref = await ctx.actor_ref(uid="test-main", address=actor_ref2.address) + assert main_ref.address == pool.external_address + main_ref = await ctx.actor_ref(actor_ref1) + assert main_ref.address == pool.external_address + assert actor_ref2.address != actor_ref.address + assert await actor_ref2.add(3) == 3 + assert await actor_ref2.add(1) == 4 + with pytest.raises(RuntimeError): + await actor_ref2.return_cannot_unpickle() + with pytest.raises(SendMessageFailed): + await actor_ref2.raise_cannot_pickle() + assert (await ctx.has_actor(actor_ref2)) is True + assert (await ctx.actor_ref(actor_ref2)) == actor_ref2 + # test cancel + task = asyncio.create_task(actor_ref2.sleep(20)) + start = time.time() + await asyncio.sleep(0) + task.cancel() + assert await task == 5 + assert time.time() - start < 3 + await ctx.destroy_actor(actor_ref2) + assert (await ctx.has_actor(actor_ref2)) is False + + assert pool.stopped + # after pool shutdown, global router must has been cleaned + global_router = Router.get_instance() + assert len(global_router._curr_external_addresses) == 0 + assert len(global_router._mapping) == 0 + + @pytest.mark.asyncio @require_unix async def test_create_actor_pool_elastic_ip(): From f022437da1beed0eb2adb0149da0d40b2fe003ff Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 16:54:42 +0800 Subject: [PATCH 10/20] CI GPU --- .github/workflows/python.yaml | 5 +++++ python/xoscar/serialization/tests/test_serial.py | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index bfefb871..dc4acd2c 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -70,6 +70,8 @@ jobs: needs: lint env: CONDA_ENV: xoscar-test + SELF_HOST_PYTHON: /root/miniconda3/envs/xoscar-test/bin/python + SELF_HOST_CONDA: /root/miniconda3/condabin/conda defaults: run: shell: bash -l {0} @@ -131,6 +133,9 @@ jobs: - name: Install on GPU if: ${{ matrix.module == 'gpu' }} run: | + ${{ env.SELF_HOST_PYTHON }} -m pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 + ${{ env.SELF_HOST_PYTHON }} -m pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging + ${{ env.SELF_HOST_PYTHON }} pip install -e ".[dev,extra]" python setup.py build_ext -i working-directory: ./python diff --git a/python/xoscar/serialization/tests/test_serial.py b/python/xoscar/serialization/tests/test_serial.py index cc293640..236fb395 100644 --- a/python/xoscar/serialization/tests/test_serial.py +++ b/python/xoscar/serialization/tests/test_serial.py @@ -231,8 +231,9 @@ def test_cudf(): test_df = cudf.DataFrame(raw_df) cudf.testing.assert_frame_equal(test_df, deserialize(*serialize(test_df))) - raw_df.columns = pd.MultiIndex.from_tuples([("a", "a"), ("a", "b"), ("b", "c")]) - test_df = cudf.DataFrame(raw_df) + multi_index = pd.MultiIndex.from_tuples([("a", "a"), ("a", "b"), ("b", "c")]) + raw_df.columns = multi_index + test_df = cudf.DataFrame(raw_df, columns=multi_index) cudf.testing.assert_frame_equal(test_df, deserialize(*serialize(test_df))) From 3aabcd1bb6ae61f1b4a7598b74a53c893be67d53 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 17:05:09 +0800 Subject: [PATCH 11/20] CI GPU --- .github/workflows/python.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index dc4acd2c..4952c9c6 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -134,9 +134,9 @@ jobs: if: ${{ matrix.module == 'gpu' }} run: | ${{ env.SELF_HOST_PYTHON }} -m pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 - ${{ env.SELF_HOST_PYTHON }} -m pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging - ${{ env.SELF_HOST_PYTHON }} pip install -e ".[dev,extra]" - python setup.py build_ext -i + ${{ env.SELF_HOST_PYTHON }} -m pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky + ${{ env.SELF_HOST_PYTHON }} -m pip install -e ".[dev,extra]" + ${{ env.SELF_HOST_PYTHON }} setup.py build_ext -i working-directory: ./python - name: Test with pytest From dc51dacdd76d9919274c8233c2d7b3ac913ab76d Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 17:35:34 +0800 Subject: [PATCH 12/20] CI GPU --- .github/workflows/python.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 4952c9c6..3cf2ce25 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -148,7 +148,7 @@ jobs: -W ignore::PendingDeprecationWarning \ --cov-config=setup.cfg --cov-report=xml --cov=xoscar xoscar --capture=no else - pytest -m cuda --cov-config=setup.cfg --cov-report=xml --cov=xoscar --capture=no + ${{ env.SELF_HOST_PYTHON }} -m pytest -m cuda --cov-config=setup.cfg --cov-report=xml --cov=xoscar --capture=no fi working-directory: ./python From 6798e99f4f3b12e4949f02d165f64ca383f4eda8 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 18:58:57 +0800 Subject: [PATCH 13/20] CI GPU --- .github/workflows/python.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 3cf2ce25..96770145 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -87,7 +87,7 @@ jobs: - { os: windows-latest, python-version: 3.9} - { os: windows-latest, python-version: 3.10} include: - - { os: self-hosted, module: gpu, python-version: 3.9} + - { os: self-hosted, module: gpu, python-version: 3.11} steps: - name: Check out code @@ -135,6 +135,7 @@ jobs: run: | ${{ env.SELF_HOST_PYTHON }} -m pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 ${{ env.SELF_HOST_PYTHON }} -m pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky + ${{ env.SELF_HOST_PYTHON }} setup.py clean --all ${{ env.SELF_HOST_PYTHON }} -m pip install -e ".[dev,extra]" ${{ env.SELF_HOST_PYTHON }} setup.py build_ext -i working-directory: ./python From 3e995bac70d38b81d4a4bd60ee82ba9a3ffb0574 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 19:25:27 +0800 Subject: [PATCH 14/20] CI GPU --- .github/workflows/python.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 96770145..39331ee9 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -135,9 +135,9 @@ jobs: run: | ${{ env.SELF_HOST_PYTHON }} -m pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 ${{ env.SELF_HOST_PYTHON }} -m pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky + ${{ env.SELF_HOST_PYTHON }} -m pip uninstall xoscar ${{ env.SELF_HOST_PYTHON }} setup.py clean --all - ${{ env.SELF_HOST_PYTHON }} -m pip install -e ".[dev,extra]" - ${{ env.SELF_HOST_PYTHON }} setup.py build_ext -i + ${{ env.SELF_HOST_PYTHON }} -m pip install -e ./ working-directory: ./python - name: Test with pytest From a94f7d995f6cfae5c1459f9aa697d4dffdfff870 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 19:49:07 +0800 Subject: [PATCH 15/20] CI GPU --- .github/workflows/python.yaml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 39331ee9..d7254923 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -141,18 +141,22 @@ jobs: working-directory: ./python - name: Test with pytest - env: - MODULE: ${{ matrix.module }} + if: ${{ matrix.module != 'gpu' }} run: | - if [[ "$MODULE" == "xoscar" ]]; then - pytest --timeout=1500 \ - -W ignore::PendingDeprecationWarning \ - --cov-config=setup.cfg --cov-report=xml --cov=xoscar xoscar --capture=no - else - ${{ env.SELF_HOST_PYTHON }} -m pytest -m cuda --cov-config=setup.cfg --cov-report=xml --cov=xoscar --capture=no - fi + pytest --timeout=1500 \ + -W ignore::PendingDeprecationWarning \ + --cov-config=setup.cfg --cov-report=xml \ + --cov=xoscar xoscar --capture=no working-directory: ./python + - name: Test with pytest GPU + if: ${{ matrix.module == 'gpu' }} + run: | + ${{ env.SELF_HOST_PYTHON }} -m pytest -m cuda \ + --cov-config=setup.cfg --cov-report=xml \ + --cov=xoscar --capture=no + working-directory: ./python + - name: Report coverage data uses: codecov/codecov-action@v4 with: From 215c558f3d70a384be1948b746024abdb370bea1 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 20:12:23 +0800 Subject: [PATCH 16/20] CI GPU --- .github/workflows/python.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index d7254923..9e94a3e5 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -133,11 +133,12 @@ jobs: - name: Install on GPU if: ${{ matrix.module == 'gpu' }} run: | - ${{ env.SELF_HOST_PYTHON }} -m pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 - ${{ env.SELF_HOST_PYTHON }} -m pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky - ${{ env.SELF_HOST_PYTHON }} -m pip uninstall xoscar - ${{ env.SELF_HOST_PYTHON }} setup.py clean --all - ${{ env.SELF_HOST_PYTHON }} -m pip install -e ./ + source activate xoscar-test + pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 + pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky + # pip uninstall xoscar + # python setup.py clean --all + # pip install -e ./ working-directory: ./python - name: Test with pytest @@ -152,7 +153,8 @@ jobs: - name: Test with pytest GPU if: ${{ matrix.module == 'gpu' }} run: | - ${{ env.SELF_HOST_PYTHON }} -m pytest -m cuda \ + source activate xoscar-test + pytest -m cuda \ --cov-config=setup.cfg --cov-report=xml \ --cov=xoscar --capture=no working-directory: ./python From 9b0e3dbf41f88a0591302d85460bf0d1d8f227db Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 20:22:45 +0800 Subject: [PATCH 17/20] CI GPU --- .github/workflows/python.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 9e94a3e5..ea687d59 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -136,9 +136,8 @@ jobs: source activate xoscar-test pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky - # pip uninstall xoscar - # python setup.py clean --all - # pip install -e ./ + python setup.py clean --all + pip install -e ./ working-directory: ./python - name: Test with pytest From fabdbe5166694fd8b862ad3fbf8a3134fdc5d9b0 Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 21:27:38 +0800 Subject: [PATCH 18/20] CI GPU --- .github/workflows/python.yaml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index ea687d59..09bc2350 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -133,20 +133,27 @@ jobs: - name: Install on GPU if: ${{ matrix.module == 'gpu' }} run: | - source activate xoscar-test + source activate ${{ env.CONDA_ENV }} + which python + conda install -y conda-forge::nccl pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky - python setup.py clean --all - pip install -e ./ + # python setup.py clean --all + # pip install -e ./ working-directory: ./python - name: Test with pytest - if: ${{ matrix.module != 'gpu' }} + env: + MODULE: ${{ matrix.module }} run: | - pytest --timeout=1500 \ - -W ignore::PendingDeprecationWarning \ - --cov-config=setup.cfg --cov-report=xml \ - --cov=xoscar xoscar --capture=no + if [[ "$MODULE" == "xoscar" ]]; then + pytest --timeout=1500 \ + -W ignore::PendingDeprecationWarning \ + --cov-config=setup.cfg --cov-report=xml --cov=xoscar xoscar --capture=no + else + source activate ${{ env.CONDA_ENV }} + pytest -m cuda --cov-config=setup.cfg --cov-report=xml --cov=xoscar --capture=no + fi working-directory: ./python - name: Test with pytest GPU From 9f88d52b01b1cf96262e373e97f0f7c0a7aeb1ca Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 21:30:23 +0800 Subject: [PATCH 19/20] CI GPU --- .github/workflows/python.yaml | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 09bc2350..fd1304c7 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -138,8 +138,8 @@ jobs: conda install -y conda-forge::nccl pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky - # python setup.py clean --all - # pip install -e ./ + python setup.py clean --all + pip install -e ./ working-directory: ./python - name: Test with pytest @@ -155,15 +155,6 @@ jobs: pytest -m cuda --cov-config=setup.cfg --cov-report=xml --cov=xoscar --capture=no fi working-directory: ./python - - - name: Test with pytest GPU - if: ${{ matrix.module == 'gpu' }} - run: | - source activate xoscar-test - pytest -m cuda \ - --cov-config=setup.cfg --cov-report=xml \ - --cov=xoscar --capture=no - working-directory: ./python - name: Report coverage data uses: codecov/codecov-action@v4 From 9bb4c9cacd10de284b238905b676c0abcde6d22e Mon Sep 17 00:00:00 2001 From: Lu Weizheng Date: Mon, 19 Aug 2024 22:08:21 +0800 Subject: [PATCH 20/20] CI GPU --- .github/workflows/python.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index fd1304c7..77bb4bae 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -134,8 +134,7 @@ jobs: if: ${{ matrix.module == 'gpu' }} run: | source activate ${{ env.CONDA_ENV }} - which python - conda install -y conda-forge::nccl + conda install -y conda-forge::nccl=2.22.3 pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 pip install ucx-py-cu12 cloudpickle psutil tblib uvloop packaging "numpy<2.0.0" scipy cython coverage flaky python setup.py clean --all @@ -155,7 +154,7 @@ jobs: pytest -m cuda --cov-config=setup.cfg --cov-report=xml --cov=xoscar --capture=no fi working-directory: ./python - + - name: Report coverage data uses: codecov/codecov-action@v4 with: