-
Notifications
You must be signed in to change notification settings - Fork 114
Open
Description
The following test is failing in the latest UCC release 1.4.4:
[----------] 24 tests from test_asymmetric_memory
[ RUN ] test_asymmetric_memory.single/0
[arch-nspawn-2479619:19824:0:19824] ucp_worker.c:3023 Assertion `ucs_async_check_owner_thread(&(worker)->async)' failed
==== backtrace (tid: 19824) ====
0 0x000000000005c379 ucp_worker_progress() ???:0
1 0x000000000000b047 ucc_context_progress() /usr/src/debug/openucc/ucc-1.4.4/src/core/ucc_context.c:993
2 0x00000000004c0cd7 UccTeam::init_team() :0
3 0x00000000004c1bf4 UccJob::create_team() :0
4 0x00000000004c1bf4 std::__shared_ptr<UccTeam, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<std::allocator<void>, std::vector<std::shared_ptr<UccProcess>, std::allocator<std::shared_ptr<UccProcess> > >&, bool&, bool&, bool&>() /usr/include/c++/15.1.1/bits/shared_ptr_base.h:1719
5 0x00000000004c1bf4 std::shared_ptr<UccTeam>::shared_ptr<std::allocator<void>, std::vector<std::shared_ptr<UccProcess>, std::allocator<std::shared_ptr<UccProcess> > >&, bool&, bool&, bool&>() /usr/include/c++/15.1.1/bits/shared_ptr.h:463
6 0x00000000004c1bf4 std::make_shared<UccTeam, std::vector<std::shared_ptr<UccProcess>, std::allocator<std::shared_ptr<UccProcess> > >&, bool&, bool&, bool&>() /usr/include/c++/15.1.1/bits/shared_ptr.h:1008
7 0x00000000004c1bf4 UccJob::create_team() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/test_ucc.cc:565
8 0x0000000001149e64 test_asymmetric_memory_single_Test::TestBody() :0
9 0x00000000004a29cf testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:3925
10 0x00000000004af34a testing::Test::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:4000
11 0x00000000004af34a testing::Test::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:4007
12 0x00000000004af34a testing::Test::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:3990
13 0x00000000004af34a testing::TestInfo::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:4176
14 0x00000000004af34a testing::TestInfo::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:4149
15 0x00000000004af34a testing::TestSuite::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:4308
16 0x00000000004af34a testing::TestSuite::Run() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:4287
17 0x00000000004af34a testing::internal::UnitTestImpl::RunAllTests() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:6833
18 0x000000000025980b testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest-all.cc:3925
19 0x000000000025980b RUN_ALL_TESTS() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/gtest.h:14812
20 0x000000000025980b main() /usr/src/debug/openucc/ucc-1.4.4/test/gtest/common/main.cc:27
21 0x00000000000276b5 __libc_init_first() ???:0
22 0x0000000000027769 __libc_start_main() ???:0
23 0x000000000048cd25 _start() ???:0
=================================
This is from building the openucc package on Arch Linux.
The configure flags are
#define UCC_CONFIGURE_FLAGS "--prefix=/usr --with-ucx=/usr --with-cuda=/opt/cuda --with-rocm=/opt/rocm --with-rocm-arch=all-arch-no-native --enable-gtest CFLAGS=-march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=3 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -g -ffile-prefix-map=/build/openucc/src=/usr/src/debug/openucc -flto=auto CXXFLAGS=-march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=3 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wp,-D_GLIBCXX_ASSERTIONS -g -ffile-prefix-map=/build/openucc/src=/usr/src/debug/openucc -flto=auto LDFLAGS=-Wl,-O1 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,-z,pack-relative-relocs -flto=auto NVCC_CFLAGS=--threads 0 HIPCC=/opt/rocm/lib/llvm/bin/amdclang"
The reported build configuration:
configure:33378: =========================================================
configure:33388: UCC build configuration:
configure:33390: Build prefix: /usr
configure:33392: Preprocessor flags: -I/usr/include/ -DCPU_FLAGS="" -I${abs_top_srcdir}/src -I${abs_top_builddir} -I${abs_top_builddir}/src
configure:33394: C compiler: gcc -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=3 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -g -ffile-prefix-map=/build/openucc/src=/usr/src/debug/openucc -flto=auto -O3 -g -DNDEBUG -std=gnu11 -Wall -Werror -funwind-tables -Wno-missing-field-initializers -Wno-unused-parameter -Wno-long-long -Wno-endif-labels -Wno-sign-compare -Wno-multichar -Wno-deprecated-declarations -Winvalid-pch -Wno-pointer-sign -Werror-implicit-function-declaration -Wnested-externs -Wshadow -Wenum-conversion
configure:33396: C++ compiler: g++ -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=3 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -g -ffile-prefix-map=/build/openucc/src=/usr/src/debug/openucc -flto=auto -O3 -g -DNDEBUG -Wall -Werror -funwind-tables -Wno-missing-field-initializers -Wno-unused-parameter -Wno-long-long -Wno-endif-labels -Wno-sign-compare -Wno-multichar -Wno-deprecated-declarations -Winvalid-pch
configure:33401: NVCC gencodes: -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_61,code=compute_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=sm_90 -gencode=arch=compute_90,code=compute_90
configure:33405: DOCA UROM enabled: no
configure:33410: ROCM architectures: --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx940 --offload-arch=gfx941 --offload-arch=gfx942 --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 --offload-arch=gfx1102
configure:33414: Perftest: disabled
configure:33416: Gtest: enabled
configure:33418: MC modules: < cpu cuda rocm >
configure:33420: TL modules: < cuda nccl rccl self ucp >
configure:33422: TLCP modules: < >
configure:33434: =========================================================
Metadata
Metadata
Assignees
Labels
No labels