Skip to content

[BUG] Basalt VIO race condition #1510

@nhahn

Description

@nhahn

Currently this line in the Basalt VIO node:

passthrough.send(leftImg);

can cause a segfault due to a race condition when updating the shared_ptr reference for leftImg from both the pipeline thread and the host node thread. (at least I think that's what is happening, see stack trace below).

Either the passthrough should be removed, or access to leftImg should be protected.

oakd-1        | [component_container-1] Stack trace (most recent call last) in thread 30649:
oakd-1        | [component_container-1] #6    Object "/usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1", at 0xffffffffffffffff, in 
oakd-1        | [component_container-1] #5    Object "/usr/lib/aarch64-linux-gnu/libc.so.6", at 0xffffb6f09e9b, in 
oakd-1        | [component_container-1] #4    Object "/usr/lib/aarch64-linux-gnu/libc.so.6", at 0xffffb6ea0397, in 
oakd-1        | [component_container-1] #3    Object "/usr/lib/aarch64-linux-gnu/libstdc++.so.6.0.30", at 0xffffb70d29cb, in 
oakd-1        | [component_container-1] #2  | Source "/usr/include/c++/11/bits/std_thread.h", line 211, in operator()
oakd-1        | [component_container-1]     |   210: 	void
oakd-1        | [component_container-1]     | > 211: 	_M_run() { _M_func(); }
oakd-1        | [component_container-1]     |   212:       };
oakd-1        | [component_container-1]     | Source "/usr/include/c++/11/bits/std_thread.h", line 266, in _M_invoke<0>
oakd-1        | [component_container-1]     |   264: 	  using _Indices
oakd-1        | [component_container-1]     |   265: 	    = typename _Build_index_tuple<tuple_size<_Tuple>::value>::__type;
oakd-1        | [component_container-1]     | > 266: 	  return _M_invoke(_Indices());
oakd-1        | [component_container-1]     |   267: 	}
oakd-1        | [component_container-1]     |   268:       };
oakd-1        | [component_container-1]     | Source "/usr/include/c++/11/bits/std_thread.h", line 259, in __invoke<dai::ThreadedNode::start()::<lambda()> >
oakd-1        | [component_container-1]     |   257: 	  typename __result<_Tuple>::type
oakd-1        | [component_container-1]     |   258: 	  _M_invoke(_Index_tuple<_Ind...>)
oakd-1        | [component_container-1]     | > 259: 	  { return std::__invoke(std::get<_Ind>(std::move(_M_t))...); }
oakd-1        | [component_container-1]     |   260: 
oakd-1        | [component_container-1]     |   261: 	typename __result<_Tuple>::type
oakd-1        | [component_container-1]     | Source "/usr/include/c++/11/bits/invoke.h", line 96, in __invoke_impl<void, dai::ThreadedNode::start()::<lambda()> >
oakd-1        | [component_container-1]     |    94:       using __type = typename __result::type;
oakd-1        | [component_container-1]     |    95:       using __tag = typename __result::__invoke_type;
oakd-1        | [component_container-1]     | >  96:       return std::__invoke_impl<__type>(__tag{}, std::forward<_Callable>(__fn),
oakd-1        | [component_container-1]     |    97: 					std::forward<_Args>(__args)...);
oakd-1        | [component_container-1]     |    98:     }
oakd-1        | [component_container-1]     | Source "/usr/include/c++/11/bits/invoke.h", line 61, in operator()
oakd-1        | [component_container-1]     |    59:     constexpr _Res
oakd-1        | [component_container-1]     |    60:     __invoke_impl(__invoke_other, _Fn&& __f, _Args&&... __args)
oakd-1        | [component_container-1]     | >  61:     { return std::forward<_Fn>(__f)(std::forward<_Args>(__args)...); }
oakd-1        | [component_container-1]     |    62: 
oakd-1        | [component_container-1]     |    63:   template<typename _Res, typename _MemFun, typename _Tp, typename... _Args>
oakd-1        | [component_container-1]       Source "/opt/ros_ws/src/depthai-core/src/pipeline/ThreadedNode.cpp", line 35, in _M_run [0xffff7d7c24ab]
oakd-1        | [component_container-1]          32:     running = true;
oakd-1        | [component_container-1]          33:     thread = std::thread([this]() {
oakd-1        | [component_container-1]          34:         try {
oakd-1        | [component_container-1]       >  35:             run();
oakd-1        | [component_container-1]          36:         } catch(const MessageQueue::QueueException& ex) {
oakd-1        | [component_container-1]          37:             // catch the exception and stop the node
oakd-1        | [component_container-1]          38:             auto expStr = fmt::format("Node stopped with a queue exception: {}", ex.what());
oakd-1        | [component_container-1] #1  | Source "/opt/ros_ws/src/depthai-core/src/basalt/BasaltVIO.cpp", line 85, in ~shared_ptr
oakd-1        | [component_container-1]     |    83:         auto out = std::make_shared<TransformData>(trans.x(), trans.y(), trans.z(), rot.x(), rot.y(), rot.z(), rot.w());
oakd-1        | [component_container-1]     |    84:         transform.send(out);
oakd-1        | [component_container-1]     | >  85:         passthrough.send(leftImg);
oakd-1        | [component_container-1]     |    86:     }
oakd-1        | [component_container-1]     |    87: }
oakd-1        | [component_container-1]     | Source "/usr/include/c++/11/bits/shared_ptr.h", line 122, in ~__shared_ptr
oakd-1        | [component_container-1]     |   120:   */
oakd-1        | [component_container-1]     |   121:   template<typename _Tp>
oakd-1        | [component_container-1]     | > 122:     class shared_ptr : public __shared_ptr<_Tp>
oakd-1        | [component_container-1]     |   123:     {
oakd-1        | [component_container-1]     |   124:       template<typename... _Args>
oakd-1        | [component_container-1]     | Source "/usr/include/c++/11/bits/shared_ptr_base.h", line 1154, in ~__shared_count
oakd-1        | [component_container-1]     |  1152:       __shared_ptr(const __shared_ptr&) noexcept = default;
oakd-1        | [component_container-1]     |  1153:       __shared_ptr& operator=(const __shared_ptr&) noexcept = default;
oakd-1        | [component_container-1]     | >1154:       ~__shared_ptr() = default;
oakd-1        | [component_container-1]     |  1155: 
oakd-1        | [component_container-1]     |  1156:       template<typename _Yp, typename = _Compatible<_Yp>>
oakd-1        | [component_container-1]       Source "/usr/include/c++/11/bits/shared_ptr_base.h", line 705, in run [0xffff7d9e59c7]
oakd-1        | [component_container-1]         702:       ~__shared_count() noexcept
oakd-1        | [component_container-1]         703:       {
oakd-1        | [component_container-1]         704: 	if (_M_pi != nullptr)
oakd-1        | [component_container-1]       > 705: 	  _M_pi->_M_release();
oakd-1        | [component_container-1]         706:       }
oakd-1        | [component_container-1]         707: 
oakd-1        | [component_container-1]         708:       __shared_count(const __shared_count& __r) noexcept
oakd-1        | [component_container-1] #0    Source "/usr/include/c++/11/bits/shared_ptr_base.h", line 168, in __stack_chk_fail [0xaaaaba7b27c8]
oakd-1        | [component_container-1]         165: 	if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
oakd-1        | [component_container-1]         166: 	  {
oakd-1        | [component_container-1]         167:             _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
oakd-1        | [component_container-1]       > 168: 	    _M_dispose();
oakd-1        | [component_container-1]         169: 	    // There must be a memory barrier between dispose() and destroy()
oakd-1        | [component_container-1]         170: 	    // to ensure that the effects of dispose() are observed in the
oakd-1        | [component_container-1]         171: 	    // thread that runs destroy().
oakd-1        | [component_container-1] Segmentation fault (Address not mapped to object [0xffff2808f])

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions