-
Notifications
You must be signed in to change notification settings - Fork 497
TOOLS/DEVICE: support channel id in perftest #10993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,6 +110,7 @@ private: | |
| struct ucp_perf_cuda_params { | ||
| ucp_device_mem_list_handle_h mem_list; | ||
| size_t length; | ||
| unsigned num_channels; | ||
| unsigned *indices; | ||
| size_t *local_offsets; | ||
| size_t *remote_offsets; | ||
|
|
@@ -122,6 +123,7 @@ class ucp_perf_cuda_params_handler { | |
| public: | ||
| ucp_perf_cuda_params_handler(const ucx_perf_context_t &perf) | ||
| { | ||
| m_params.num_channels = perf.params.device_ep_channel_count; | ||
| init_mem_list(perf); | ||
| init_elements(perf); | ||
| init_counters(perf); | ||
|
|
@@ -243,15 +245,18 @@ ucp_perf_cuda_send_async(const ucp_perf_cuda_params ¶ms, | |
| ucx_perf_counter_t idx, ucp_device_request_t *req, | ||
| ucp_device_flags_t flags = UCP_DEVICE_FLAG_NODELAY) | ||
| { | ||
| const unsigned channel_id = threadIdx.x % params.num_channels; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO the channel id should be some random value (or at least have a "mode" for channel where it's generated randomly) |
||
|
|
||
| switch (cmd) { | ||
| case UCX_PERF_CMD_PUT_SINGLE: | ||
| *params.counter_send = idx + 1; | ||
| return ucp_device_put_single<level>(params.mem_list, params.indices[0], | ||
| 0, 0, | ||
| params.length + ONESIDED_SIGNAL_SIZE, | ||
| 0, flags, req); | ||
| channel_id, flags, req); | ||
| case UCX_PERF_CMD_PUT_MULTI: | ||
| return ucp_device_put_multi<level>(params.mem_list, 1, 0, flags, req); | ||
| return ucp_device_put_multi<level>(params.mem_list, 1, channel_id, | ||
| flags, req); | ||
| case UCX_PERF_CMD_PUT_PARTIAL: { | ||
| unsigned counter_index = params.mem_list->mem_list_length - 1; | ||
| return ucp_device_put_multi_partial<level>(params.mem_list, | ||
|
|
@@ -260,8 +265,8 @@ ucp_perf_cuda_send_async(const ucp_perf_cuda_params ¶ms, | |
| params.local_offsets, | ||
| params.remote_offsets, | ||
| params.lengths, | ||
| counter_index, 1, 0, 0, | ||
| flags, req); | ||
| counter_index, 1, 0, | ||
| channel_id, flags, req); | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -192,41 +192,42 @@ ucs_status_t init_test_params(perftest_params_t *params) | |
| {UCS_MEMORY_TYPE_LAST, UCX_PERF_MEM_DEV_DEFAULT}; | ||
|
|
||
| memset(params, 0, sizeof(*params)); | ||
| params->super.api = UCX_PERF_API_LAST; | ||
| params->super.command = UCX_PERF_CMD_LAST; | ||
| params->super.test_type = UCX_PERF_TEST_TYPE_LAST; | ||
| params->super.thread_mode = UCS_THREAD_MODE_SERIALIZED; | ||
| params->super.thread_count = 1; | ||
| params->super.async_mode = UCS_ASYNC_THREAD_LOCK_TYPE; | ||
| params->super.wait_mode = UCX_PERF_WAIT_MODE_LAST; | ||
| params->super.max_outstanding = 0; | ||
| params->super.warmup_iter = 10000; | ||
| params->super.warmup_time = 100e-3; | ||
| params->super.alignment = ucs_get_page_size(); | ||
| params->super.max_iter = 1000000l; | ||
| params->super.max_time = 0.0; | ||
| params->super.report_interval = 1.0; | ||
| params->super.percentile_rank = 50.0; | ||
| params->super.flags = UCX_PERF_TEST_FLAG_VERBOSE; | ||
| params->super.uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW; | ||
| params->super.uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT; | ||
| params->super.uct.am_hdr_size = 8; | ||
| params->super.send_mem_type = UCS_MEMORY_TYPE_HOST; | ||
| params->super.recv_mem_type = UCS_MEMORY_TYPE_HOST; | ||
| params->super.send_device = default_dev; | ||
| params->super.recv_device = default_dev; | ||
| params->super.device_level = UCS_DEVICE_LEVEL_THREAD; | ||
| params->super.msg_size_cnt = 1; | ||
| params->super.iov_stride = 0; | ||
| params->super.ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG; | ||
| params->super.ucp.recv_datatype = UCP_PERF_DATATYPE_CONTIG; | ||
| params->super.ucp.am_hdr_size = 0; | ||
| params->super.device_thread_count = 1; | ||
| params->super.device_block_count = 1; | ||
| params->super.device_fc_window = UCP_PERF_FC_WINDOW_DEFAULT; | ||
| params->super.ucp.is_daemon_mode = 0; | ||
| params->super.ucp.dmn_local_addr = empty_addr; | ||
| params->super.ucp.dmn_remote_addr = empty_addr; | ||
| params->super.api = UCX_PERF_API_LAST; | ||
| params->super.command = UCX_PERF_CMD_LAST; | ||
| params->super.test_type = UCX_PERF_TEST_TYPE_LAST; | ||
| params->super.thread_mode = UCS_THREAD_MODE_SERIALIZED; | ||
| params->super.thread_count = 1; | ||
| params->super.async_mode = UCS_ASYNC_THREAD_LOCK_TYPE; | ||
| params->super.wait_mode = UCX_PERF_WAIT_MODE_LAST; | ||
| params->super.max_outstanding = 0; | ||
| params->super.warmup_iter = 10000; | ||
| params->super.warmup_time = 100e-3; | ||
| params->super.alignment = ucs_get_page_size(); | ||
| params->super.max_iter = 1000000l; | ||
| params->super.max_time = 0.0; | ||
| params->super.report_interval = 1.0; | ||
| params->super.percentile_rank = 50.0; | ||
| params->super.flags = UCX_PERF_TEST_FLAG_VERBOSE; | ||
| params->super.uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW; | ||
| params->super.uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT; | ||
| params->super.uct.am_hdr_size = 8; | ||
| params->super.send_mem_type = UCS_MEMORY_TYPE_HOST; | ||
| params->super.recv_mem_type = UCS_MEMORY_TYPE_HOST; | ||
| params->super.send_device = default_dev; | ||
| params->super.recv_device = default_dev; | ||
| params->super.device_level = UCS_DEVICE_LEVEL_THREAD; | ||
| params->super.msg_size_cnt = 1; | ||
| params->super.iov_stride = 0; | ||
| params->super.ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG; | ||
| params->super.ucp.recv_datatype = UCP_PERF_DATATYPE_CONTIG; | ||
| params->super.ucp.am_hdr_size = 0; | ||
| params->super.device_ep_channel_count = 1; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO the default should be some large value like UINT_MAX, so by default each thread would use a different channels, since this is also controlled by the NUM_CHANNELS configuration of GDAKI transport |
||
| params->super.device_thread_count = 1; | ||
| params->super.device_block_count = 1; | ||
| params->super.device_fc_window = UCP_PERF_FC_WINDOW_DEFAULT; | ||
| params->super.ucp.is_daemon_mode = 0; | ||
| params->super.ucp.dmn_local_addr = empty_addr; | ||
| params->super.ucp.dmn_remote_addr = empty_addr; | ||
| strcpy(params->super.uct.dev_name, TL_RESOURCE_NAME_NONE); | ||
| strcpy(params->super.uct.tl_name, TL_RESOURCE_NAME_NONE); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
device_num_channels