@@ -931,11 +931,12 @@ uct_test::entity::entity(const resource& resource, uct_md_config_t *md_config,
931931
932932void uct_test::entity::mem_alloc (size_t length, unsigned mem_flags,
933933 uct_allocated_memory_t *mem,
934- ucs_memory_type_t mem_type) const
934+ ucs_memory_type_t mem_type,
935+ unsigned num_retries) const
935936{
936- void *address = NULL ;
937- uct_md_h uct_md = md ();
938- ucs_status_t status;
937+ void *address = NULL ;
938+ uct_md_h uct_md = md ();
939+ ucs_status_t status = UCS_OK ;
939940 uct_mem_alloc_params_t params;
940941
941942 params.field_mask = UCT_MEM_ALLOC_PARAM_FIELD_FLAGS |
@@ -947,22 +948,38 @@ void uct_test::entity::mem_alloc(size_t length, unsigned mem_flags,
947948 params.mem_type = mem_type;
948949 params.address = address;
949950
950- if ((md_attr ().flags & (UCT_MD_FLAG_ALLOC | UCT_MD_FLAG_REG)) &&
951- (mem_type == UCS_MEMORY_TYPE_HOST)) {
952- status = uct_iface_mem_alloc (m_iface, length, mem_flags, " uct_test" ,
953- mem);
954- ASSERT_UCS_OK (status);
955- } else {
956- uct_alloc_method_t alloc_methods[] = {UCT_ALLOC_METHOD_MMAP,
957- UCT_ALLOC_METHOD_MD};
958- params.field_mask |= UCT_MEM_ALLOC_PARAM_FIELD_MDS;
959- params.mds .mds = &uct_md;
960- params.mds .count = 1 ;
961- status = uct_mem_alloc (length, alloc_methods,
962- ucs_static_array_size (alloc_methods), ¶ms,
963- mem);
964- ASSERT_UCS_OK (status);
951+ for (unsigned i = 0 ; i <= num_retries; ++i) {
952+ scoped_log_handler slh (wrap_errors_logger);
953+ if ((md_attr ().flags & (UCT_MD_FLAG_ALLOC | UCT_MD_FLAG_REG)) &&
954+ (mem_type == UCS_MEMORY_TYPE_HOST)) {
955+ status = uct_iface_mem_alloc (m_iface, length, mem_flags, " uct_test" ,
956+ mem);
957+ } else {
958+ uct_alloc_method_t alloc_methods[] = {UCT_ALLOC_METHOD_MMAP,
959+ UCT_ALLOC_METHOD_MD};
960+ params.field_mask |= UCT_MEM_ALLOC_PARAM_FIELD_MDS;
961+ params.mds .mds = &uct_md;
962+ params.mds .count = 1 ;
963+ status = uct_mem_alloc (length, alloc_methods,
964+ ucs_static_array_size (alloc_methods),
965+ ¶ms, mem);
966+ }
967+
968+ if (status != UCS_ERR_NO_MEMORY) {
969+ break ;
970+ }
971+
972+ if (i < num_retries) {
973+ UCS_TEST_MESSAGE << " Retry " << (i + 1 ) << " /" << num_retries
974+ << " : Allocation failed - "
975+ << ucs_status_string (status);
976+ /* Sleep only if there are more retries remaining */
977+ usleep (ucs::rand () % 10000 );
978+ }
965979 }
980+
981+ ASSERT_UCS_OK (status);
982+
966983 ucs_assert (mem->mem_type == mem_type);
967984}
968985
@@ -1414,16 +1431,16 @@ void uct_test::mapped_buffer::reset()
14141431uct_test::mapped_buffer::mapped_buffer (size_t size, uint64_t seed,
14151432 const entity &entity, size_t offset,
14161433 ucs_memory_type_t mem_type,
1417- unsigned mem_flags) :
1418- mapped_buffer(size, entity, offset, mem_type, mem_flags)
1434+ unsigned mem_flags, unsigned num_retries ) :
1435+ mapped_buffer(size, entity, offset, mem_type, mem_flags, num_retries )
14191436{
14201437 pattern_fill (seed);
14211438}
14221439
1423- uct_test::mapped_buffer::mapped_buffer (size_t size,
1440+ uct_test::mapped_buffer::mapped_buffer (size_t size,
14241441 const entity &entity, size_t offset,
14251442 ucs_memory_type_t mem_type,
1426- unsigned mem_flags) :
1443+ unsigned mem_flags, unsigned num_retries ) :
14271444 m_entity(entity)
14281445{
14291446 if (size == 0 ) {
@@ -1433,7 +1450,7 @@ uct_test::mapped_buffer::mapped_buffer(size_t size,
14331450
14341451 size_t alloc_size = size + offset;
14351452 if ((mem_type == UCS_MEMORY_TYPE_HOST) || (mem_type == UCS_MEMORY_TYPE_RDMA)) {
1436- m_entity.mem_alloc (alloc_size, mem_flags, &m_mem, mem_type);
1453+ m_entity.mem_alloc (alloc_size, mem_flags, &m_mem, mem_type, num_retries );
14371454 } else {
14381455 m_mem.method = UCT_ALLOC_METHOD_LAST;
14391456 m_mem.address = mem_buffer::allocate (alloc_size, mem_type);
0 commit comments