ve2: add support for create/destroy hw context#560
Conversation
-Support hw context create/destroy ioctls -Add new ve2 resourse solver drivers to manage aie columns resources among hw contexts. -Add new ve2 management driver for cert firmware Signed-off-by: Raj Kumar Rampelli <[email protected]>
|
Can one of the admins verify this patch? |
|
Internal review in progress, so marked as draft. |
| @@ -0,0 +1,219 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | |||
There was a problem hiding this comment.
The Handshake region is got updated in CERT. Could you please check once and update accordingly
src/driver/amdxdna/amdxdna_of_drv.c
Outdated
| xrs_cfg.ddev = &xdna->ddev; | ||
| xrs_cfg.total_col = XRS_MAX_COL; | ||
|
|
||
| if (xdna->dev_handle) |
There was a problem hiding this comment.
xdna->dev_handle
This one checks twice.
| if (xdna->dev_handle) | |
| if (xdna->dev_handle) { | |
| xdna->dev_handle->xrs_hdl = xrsm_init(&xrs_cfg); | |
| if (!xdna->dev_handle->xrs_hdl) { | |
| XDNA_ERR(xdna, "Initialize resolver failed"); | |
| drm_dev_put(&xdna->ddev); | |
| return -EINVAL; | |
| } | |
| } | |
src/driver/amdxdna/ve2_mgmt.c
Outdated
|
|
||
| total_col = xrs_get_total_cols(xdna->dev_handle->xrs_hdl); | ||
| if (total_col < 0) | ||
| return -EINVAL; |
There was a problem hiding this comment.
Before return we should free xrs_req memory also unlock the lock
src/driver/amdxdna/ve2_mgmt.c
Outdated
| ret = cert_setup_partition(aie_dev, col, start_col, num_col, hsa_addr); | ||
| if (ret < 0) { | ||
| XDNA_ERR(xdna, "cert_setup_partition() err %d for col %d", ret, start_col); | ||
| return ret; |
There was a problem hiding this comment.
As we are returning from here please teardown and release the aie partition
src/driver/amdxdna/ve2_mgmt.c
Outdated
| for (u32 col = start_col; col < start_col + num_col; col++) { | ||
| rel_col = col - start_col; | ||
| ret = cert_clear_partition(xdna, priv->aie_dev, rel_col); | ||
| if (ret < 0) | ||
| XDNA_ERR(xdna, "cert_clear_partition() err %d for col %d", ret, rel_col); | ||
| } |
There was a problem hiding this comment.
Please update the loop.
| for (u32 col = start_col; col < start_col + num_col; col++) { | |
| rel_col = col - start_col; | |
| ret = cert_clear_partition(xdna, priv->aie_dev, rel_col); | |
| if (ret < 0) | |
| XDNA_ERR(xdna, "cert_clear_partition() err %d for col %d", ret, rel_col); | |
| } | |
| for (u32 col = 0; col < num_col; col++) { | |
| ret = cert_clear_partition(xdna, priv->aie_dev, col); | |
| if (ret < 0) | |
| XDNA_ERR(xdna, "cert_clear_partition() err %d for col %d", ret, col); | |
| } |
src/driver/amdxdna/ve2_mgmt.c
Outdated
| aie_dev = aie_partition_request(&request); | ||
| if (IS_ERR(aie_dev)) { | ||
| XDNA_ERR(xdna, "aie parition request failed, error %ld", PTR_ERR(aie_dev)); | ||
| return -ENODEV; |
There was a problem hiding this comment.
Please release the resource if failed case xrs_release_resource
Same subsequently
Signed-off-by: Raj Kumar Rampelli <[email protected]>
|
ok to test |
Signed-off-by: Raj Kumar Rampelli <[email protected]>
Signed-off-by: Raj Kumar Rampelli <[email protected]>
src/driver/amdxdna/amdxdna_ctx.c
Outdated
| if (!xdna->dev_info->ops->ctx_init) | ||
| return -EOPNOTSUPP; | ||
|
|
There was a problem hiding this comment.
Why adding this check? I don't think we need this check. I don't see any device that doesn't support this ctx_init() callback.
Please let me know what the justification for this is. Otherwise, please remove.
src/driver/amdxdna/amdxdna_of_drv.c
Outdated
| #include <drm/drm_managed.h> | ||
|
|
||
| #include "amdxdna_of_drv.h" | ||
| #include "ve2_res_solver.h" |
There was a problem hiding this comment.
NEVER include aie2_, aie4_ and ve2_ header in amdxdna_ files. Please remove this line and make necessary changes.
src/driver/amdxdna/ve2_host_queue.h
Outdated
| u16 minor; | ||
| } | ||
| version; | ||
| //Queue capacity, must be a power of two. |
There was a problem hiding this comment.
Use WARN_ON() instead of comment. Delete this comment pls.
src/driver/amdxdna/ve2_host_queue.h
Outdated
| union { | ||
| struct { | ||
| u16 type: 8; | ||
| u16 barrier: 1; | ||
| u16 acquire_fence_scope: 2; | ||
| u16 release_fence_scope: 2; | ||
| }; | ||
| u16 header; | ||
| }; |
There was a problem hiding this comment.
Not suggesting coding style. Ref aie2_msg_priv.h
If this triggers many codes change, we can merge this for open source. But this needs to be refine when upstreaming.
src/driver/amdxdna/ve2_host_queue.h
Outdated
| u32 size; //5c | ||
| } | ||
| dbg_buf; | ||
| volatile struct { |
There was a problem hiding this comment.
Can you put a comment to explain why 'volatile' is needed for this but not for above fields?
src/driver/amdxdna/ve2_host_queue.h
Outdated
| // number of checks whether there are jobs ready | ||
| u32 c_job_readiness_checked; | ||
| // number of opcode run | ||
| u32 c_opcode; | ||
| u32 c_job_launched; | ||
| u32 c_job_finished; | ||
| // number of hsa pkt handled | ||
| u32 c_hsa_pkt; | ||
| // number of pages loaded | ||
| u32 c_page; | ||
| // number of hsa doorbell ring | ||
| u32 c_doorbell; | ||
| // number of uc memory(PM) scrub | ||
| u32 c_uc_scrub; | ||
| // number of tct requested | ||
| u32 c_tct_requested; | ||
| // number of tct received | ||
| u32 c_tct_received; | ||
| // run out of wait handle UC_DMA_WRITE_DES opcode | ||
| u16 c_preemption_ucdma; | ||
| // run out of wait handle UC_DMA_WRITE_DES_SYNC opcode | ||
| u16 c_preemption_ucdma_sync; | ||
| // POLL_32 opcode retry times | ||
| u16 c_preemption_poll; | ||
| // MASK_POLL_32 opcode retry times | ||
| u16 c_preemption_mask_poll; | ||
| // run out of physical barrier REMOTE_BARRIER opcode | ||
| u16 c_preemption_remote_barrier; | ||
| // actor entry overflow or run out of wait handle WAIT_TCTS opcode | ||
| u16 c_preemption_wait_tct; | ||
| // block UC_DMA_WRITE_DES opcode | ||
| u16 c_block_ucdma; | ||
| // block UC_DMA_WRITE_DES_SYNC opcode | ||
| u16 c_block_ucdma_sync; | ||
| // block local_barrier opcode | ||
| u16 c_block_local_barrier; | ||
| // block REMOTE_BARRIER opcode | ||
| u16 c_block_remote_barrier; | ||
| // block WAIT_TCTS opcode | ||
| u16 c_block_wait_tct; | ||
| // number of slow actor entry lookup | ||
| u16 c_actor_hash_conflict; |
There was a problem hiding this comment.
Instead of comment, use readable field name is prefer.
src/driver/amdxdna/ve2_hwctx.c
Outdated
| free_host_queue: | ||
| ve2_free_hsa_queue(xdna, &hwctx->priv->hwctx_hsa_queue); |
There was a problem hiding this comment.
Are host_queue and hsa_queu the same? Even they are the same, please use consistent terminology to avoid confusion.
src/driver/amdxdna/ve2_mgmt.c
Outdated
| static void ve2_irq_handler(u32 partition_id, void *cb_arg) | ||
| { | ||
| struct amdxdna_ctx *hwctx = (struct amdxdna_ctx *)cb_arg; | ||
| struct amdxdna_dev *xdna = hwctx->client->xdna; | ||
|
|
||
| XDNA_DBG(xdna, "Created partition for start_col %d, num_col %d with partition_id %d\n", | ||
| hwctx->start_col, hwctx->num_col, partition_id); | ||
| } |
There was a problem hiding this comment.
This does nothing but only print a debug message. Is this expected?
This function doesn't look like a Linux IRQ handler. If my understanding is correct, please rename this to something else. This is more like a callback function.
Signed-off-by: Raj Kumar Rampelli <[email protected]>
Signed-off-by: Raj Kumar Rampelli <[email protected]>
Signed-off-by: Raj Kumar Rampelli <[email protected]>
|
The checkpatch step failed in the CI. Please fix that. |
Signed-off-by: Raj Kumar Rampelli <[email protected]>
mamin506
left a comment
There was a problem hiding this comment.
Overall, this looks good to me. Just some minor changes and suggestion. Please take a look. The upper_32_bits() macro is a must have change.
src/driver/amdxdna/ve2_of.h
Outdated
| #define ADDR64_HIGH(x) (((x) >> 32) & 0xFFFFFFFF) | ||
| #define ADDR64_LOW(x) ((x) & 0xFFFFFFFF) | ||
|
|
There was a problem hiding this comment.
Linux has predefined macro for the same purpose. Please use those macros,
See https://elixir.bootlin.com/linux/v6.15/source/include/linux/wordpart.h#L14
src/driver/amdxdna/ve2_mgmt.c
Outdated
| /* save aie_dev into priv */ | ||
| priv->aie_dev = aie_dev; |
src/driver/amdxdna/ve2_mgmt.c
Outdated
| if (!xrs) | ||
| return -EINVAL; | ||
|
|
There was a problem hiding this comment.
The xrt_hdl is assigned when probe the device. It is OK to remove this check.
Unless xrs can be dynamically load/unload, and maybe the caller of this function doesn't aware of it. You can check such a fundamental struct. :)
Signed-off-by: Raj Kumar Rampelli <[email protected]>
|
retest this please |
-Support hw context create/destroy ioctls
-Add new ve2 resourse solver drivers to manage aie columns
resources among hw contexts.
-Add new ve2 management driver for cert firmware