From 2b636711ccc59de30c8367902f2dcc0098af5b8b Mon Sep 17 00:00:00 2001 From: Min Ma Date: Fri, 10 Jan 2025 15:09:44 -0800 Subject: [PATCH] refactor hwctx fini (#332) 1. Add a function to wait for all the jobs are free. This is because drm_sched_fini() not able to handle job in some case in the current Linux driver code 2. Some more refactor because of 1) 3. add fwctx.c for hardware/firmware related operations, like create firmware context etc. 4. Some lock dep fix in mailbox. --------- Signed-off-by: Min Ma --- src/driver/amdxdna/Kbuild | 3 +- src/driver/amdxdna/aie2_ctx.c | 407 +++++++++------------------ src/driver/amdxdna/aie2_fwctx.c | 200 +++++++++++++ src/driver/amdxdna/aie2_message.c | 79 ++---- src/driver/amdxdna/aie2_pci.c | 78 +++-- src/driver/amdxdna/aie2_pci.h | 43 ++- src/driver/amdxdna/amdxdna_ctx.c | 45 +-- src/driver/amdxdna/amdxdna_ctx.h | 32 +-- src/driver/amdxdna/amdxdna_devel.c | 9 +- src/driver/amdxdna/amdxdna_devel.h | 4 +- src/driver/amdxdna/amdxdna_drm.c | 4 +- src/driver/amdxdna/amdxdna_drm.h | 10 +- src/driver/amdxdna/amdxdna_mailbox.c | 108 ++++--- src/driver/amdxdna/amdxdna_mailbox.h | 57 +++- src/driver/amdxdna/amdxdna_pci_drv.c | 12 +- src/driver/amdxdna/amdxdna_tdr.c | 14 +- 16 files changed, 585 insertions(+), 520 deletions(-) create mode 100644 src/driver/amdxdna/aie2_fwctx.c diff --git a/src/driver/amdxdna/Kbuild b/src/driver/amdxdna/Kbuild index 31e4237..47dd220 100644 --- a/src/driver/amdxdna/Kbuild +++ b/src/driver/amdxdna/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. +# Copyright (C) 2022-2025, Advanced Micro Devices, Inc. # ROOT := $(src) @@ -27,6 +27,7 @@ amdxdna-y := \ aie2_smu.o \ aie2_psp.o \ aie2_ctx.o \ + aie2_fwctx.o \ aie2_error.o \ aie2_debugfs.o \ aie2_message.o \ diff --git a/src/driver/amdxdna/aie2_ctx.c b/src/driver/amdxdna/aie2_ctx.c index bcba323..b47da51 100644 --- a/src/driver/amdxdna/aie2_ctx.c +++ b/src/driver/amdxdna/aie2_ctx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2024, Advanced Micro Devices, Inc. + * Copyright (C) 2024-2025, Advanced Micro Devices, Inc. */ #include @@ -10,13 +10,8 @@ #include "amdxdna_gem.h" #include "amdxdna_trace.h" #include "aie2_pci.h" -#include "aie2_solver.h" #include "aie2_msg_priv.h" -#ifdef AMDXDNA_DEVEL -#include "amdxdna_devel.h" -#endif - bool force_cmdlist; module_param(force_cmdlist, bool, 0600); MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); @@ -29,7 +24,6 @@ static void aie2_job_release(struct kref *ref) amdxdna_sched_job_cleanup(job); if (job->out_fence) dma_fence_put(job->out_fence); - wake_up(&job->hwctx->priv->status_wq); kfree(job); } @@ -38,52 +32,34 @@ static void aie2_job_put(struct amdxdna_sched_job *job) kref_put(&job->refcnt, aie2_job_release); } -static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, - struct drm_sched_job *bad_job) +static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) { - if (hwctx->status == HWCTX_STATE_STOP) { - XDNA_DBG(xdna, "%s was stopped, skip", hwctx->name); - return; - } - - drm_sched_stop(&hwctx->priv->sched, bad_job); - aie2_destroy_context(xdna->dev_handle, hwctx); - hwctx->status = HWCTX_STATE_STOP; + hwctx->status &= ~HWCTX_STATE_READY; + aie2_fwctx_stop(hwctx); XDNA_DBG(xdna, "Stopped %s", hwctx->name); + amdxdna_hwctx_wait_jobs(hwctx, MAX_SCHEDULE_TIMEOUT); + aie2_fwctx_free(hwctx); } static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) { - struct amdxdna_gem_obj *heap = hwctx->priv->heap; int ret; - WARN_ONCE(hwctx->status != HWCTX_STATE_STOP, "hwctx should be in stop state"); - - ret = aie2_create_context(xdna->dev_handle, hwctx); + ret = aie2_fwctx_start(hwctx); if (ret) { - XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); - goto out; - } - - ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, - heap->mem.userptr, heap->mem.size); - if (ret) { - XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); - goto out; + XDNA_ERR(xdna, "Failed to start %s, ret %d", hwctx->name, ret); + return ret; } - if (!hwctx->cus) { - XDNA_DBG(xdna, "%s restart to init state", hwctx->name); - hwctx->status = HWCTX_STATE_INIT; + if (!hwctx->cus) goto out; - } #ifdef AMDXDNA_DEVEL if (priv_load) { ret = aie2_legacy_config_cu(hwctx); if (ret) { XDNA_ERR(xdna, "Legacy config cu failed, ret %d", ret); - goto out; + goto failed; } goto skip_config_cu; } @@ -91,25 +67,19 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw ret = aie2_config_cu(hwctx); if (ret) { XDNA_ERR(xdna, "Config cu failed, ret %d", ret); - goto out; + goto failed; } #ifdef AMDXDNA_DEVEL skip_config_cu: #endif + hwctx->status |= FIELD_PREP(HWCTX_STATE_READY, 1); out: - /* - * Even above commands might failed, we still needs to restart DRM - * scheduler, to signal those commands in the pending list. - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 12, 0) - drm_sched_start(&hwctx->priv->sched, true); -#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 13, 0) - drm_sched_start(&hwctx->priv->sched); -#else - drm_sched_start(&hwctx->priv->sched, 0); -#endif - hwctx->status = HWCTX_STATE_READY; - XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); + XDNA_DBG(xdna, "%s restarted, status 0x%x", hwctx->name, hwctx->status); + return 0; + +failed: + aie2_fwctx_stop(hwctx); + XDNA_DBG(xdna, "%s restarted failed, ret %d", hwctx->name, ret); return ret; } @@ -151,10 +121,10 @@ void aie2_dump_ctx(struct amdxdna_client *client) unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - mutex_lock(&client->hwctx_lock); - amdxdna_for_each_hwctx(client, hwctx_id, hwctx) + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + hwctx->status |= FIELD_PREP(HWCTX_STATE_DEAD, 1); aie2_hwctx_dump(xdna, hwctx); - mutex_unlock(&client->hwctx_lock); + } } void aie2_stop_ctx(struct amdxdna_client *client) @@ -164,14 +134,13 @@ void aie2_stop_ctx(struct amdxdna_client *client) unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - mutex_lock(&client->hwctx_lock); amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { - if (hwctx->status == HWCTX_STATE_INIT) + if (!FIELD_GET(HWCTX_STATE_CONNECTED, hwctx->status)) continue; - aie2_hwctx_stop(xdna, hwctx, NULL); + hwctx->status |= FIELD_PREP(HWCTX_STATE_DEAD, 1); + aie2_hwctx_stop(xdna, hwctx); } - mutex_unlock(&client->hwctx_lock); } void aie2_restart_ctx(struct amdxdna_client *client) @@ -182,32 +151,20 @@ void aie2_restart_ctx(struct amdxdna_client *client) int err; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - mutex_lock(&client->hwctx_lock); amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { - if (hwctx->status != HWCTX_STATE_STOP) + if (!FIELD_GET(HWCTX_STATE_DEAD, hwctx->status)) continue; XDNA_DBG(xdna, "Resetting %s", hwctx->name); err = aie2_hwctx_restart(xdna, hwctx); - if (!err) + if (!err) { + hwctx->status &= ~HWCTX_STATE_DEAD; continue; + } - XDNA_WARN(xdna, "Failed to restart %s status %d err %d", + XDNA_WARN(xdna, "Failed to restart %s status 0x%x err %d", hwctx->name, hwctx->status, err); } - mutex_unlock(&client->hwctx_lock); -} - -static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) -{ - struct dma_fence *fence; - - fence = aie2_cmd_get_out_fence(hwctx, hwctx->submitted - 1); - if (!fence) - return; - - dma_fence_wait(fence, false); - dma_fence_put(fence); } void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx) @@ -220,8 +177,7 @@ void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx) * and abort all commands. */ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - aie2_hwctx_wait_for_idle(hwctx); - aie2_hwctx_stop(xdna, hwctx, NULL); + aie2_hwctx_stop(xdna, hwctx); } void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx) @@ -237,7 +193,7 @@ void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx) drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); err = aie2_hwctx_restart(xdna, hwctx); if (err) - XDNA_WARN(xdna, "Failed to resume %s status %d err %d", + XDNA_WARN(xdna, "Failed to resume %s status 0x%x err %d", hwctx->name, hwctx->status, err); } @@ -255,10 +211,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job) trace_xdna_job(&job->base, hwctx->name, "signaling fence", job->seq, job->opcode); dma_fence_signal(fence); idx = get_job_idx(job->seq); - mutex_lock(&hwctx->priv->io_lock); hwctx->priv->pending[idx] = NULL; - mutex_unlock(&hwctx->priv->io_lock); - up(&job->hwctx->priv->job_sem); job->job_done = true; dma_fence_put(fence); @@ -385,9 +338,6 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) if (!mmget_not_zero(job->mm)) return ERR_PTR(-ESRCH); - if (!hwctx->priv->mbox_chann) - return ERR_PTR(-ENODEV); - kref_get(&job->refcnt); fence = dma_fence_get(job->fence); @@ -433,8 +383,14 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job) struct amdxdna_hwctx *hwctx = job->hwctx; trace_xdna_job(sched_job, hwctx->name, "job free", job->seq, job->opcode); - if (!job->job_done) + if (!job->job_done) { + int idx; + + idx = get_job_idx(job->seq); + /* No contention with submit, no lock */ + hwctx->priv->pending[idx] = NULL; up(&hwctx->priv->job_sem); + } drm_sched_job_cleanup(sched_job); aie2_job_put(job); @@ -445,6 +401,45 @@ const struct drm_sched_backend_ops sched_ops = { .free_job = aie2_sched_job_free, }; +static int aie2_hwctx_syncobj_create(struct amdxdna_hwctx *hwctx) +{ + struct drm_syncobj *syncobj; + struct amdxdna_dev *xdna; + struct drm_file *filp; + u32 hdl; + int ret; + + xdna = hwctx->client->xdna; + filp = hwctx->client->filp; + hwctx->priv->syncobj = NULL; + hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; + + ret = drm_syncobj_create(&syncobj, 0, NULL); + if (ret) { + XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); + return ret; + } + ret = drm_syncobj_get_handle(filp, syncobj, &hdl); + if (ret) { + drm_syncobj_put(syncobj); + XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); + return ret; + } + hwctx->priv->syncobj = syncobj; + hwctx->syncobj_hdl = hdl; + + return 0; +} + +static void aie2_hwctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) +{ + /* + * The syncobj_hdl is owned by user space and will be cleaned up + * separately. + */ + drm_syncobj_put(hwctx->priv->syncobj); +} + static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) { struct amdxdna_dev *xdna = hwctx->client->xdna; @@ -527,93 +522,12 @@ static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) return 0; } -static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) -{ - struct amdxdna_dev *xdna = hwctx->client->xdna; - struct alloc_requests *xrs_req; - int ret; - - xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL); - if (!xrs_req) - return -ENOMEM; - - xrs_req->cdo.start_cols = hwctx->col_list; - xrs_req->cdo.cols_len = hwctx->col_list_len; - xrs_req->cdo.ncols = hwctx->num_col; - xrs_req->cdo.qos_cap.opc = hwctx->max_opc; - - xrs_req->rqos.gops = hwctx->qos.gops; - xrs_req->rqos.fps = hwctx->qos.fps; - xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; - xrs_req->rqos.latency = hwctx->qos.latency; - xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; - xrs_req->rqos.priority = hwctx->qos.priority; - - xrs_req->rid = (uintptr_t)hwctx; - - ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); - if (ret) - XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); - - kfree(xrs_req); - return ret; -} - -static void aie2_release_resource(struct amdxdna_hwctx *hwctx) -{ - struct amdxdna_dev *xdna = hwctx->client->xdna; - int ret; - - ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); - if (ret) - XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); -} - -static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) -{ - struct amdxdna_dev *xdna = hwctx->client->xdna; - struct drm_file *filp = hwctx->client->filp; - struct drm_syncobj *syncobj; - u32 hdl; - int ret; - - hwctx->priv->syncobj = NULL; - hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; - - ret = drm_syncobj_create(&syncobj, 0, NULL); - if (ret) { - XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); - return ret; - } - ret = drm_syncobj_get_handle(filp, syncobj, &hdl); - if (ret) { - drm_syncobj_put(syncobj); - XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); - return ret; - } - hwctx->priv->syncobj = syncobj; - hwctx->syncobj_hdl = hdl; - - return 0; -} - -static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) -{ - /* - * The syncobj_hdl is owned by user space and will be cleaned up - * separately. - */ - drm_syncobj_put(hwctx->priv->syncobj); -} - int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) { struct amdxdna_client *client = hwctx->client; struct amdxdna_dev *xdna = client->xdna; - struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; struct amdxdna_gem_obj *heap; - struct amdxdna_dev_hdl *ndev; unsigned int wq_flags; int i, ret; @@ -622,13 +536,19 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) return -ENOMEM; hwctx->priv = priv; + ret = aie2_hwctx_col_list(hwctx); + if (ret) { + XDNA_ERR(xdna, "Create col list failed, ret %d", ret); + goto free_priv; + } + mutex_lock(&client->mm_lock); heap = client->dev_heap; if (!heap) { XDNA_ERR(xdna, "The client dev heap object not exist"); mutex_unlock(&client->mm_lock); ret = -ENOENT; - goto free_priv; + goto free_col_list; } drm_gem_object_get(to_gobj(heap)); mutex_unlock(&client->mm_lock); @@ -661,7 +581,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) priv->cmd_buf[i] = abo; } - sched = &priv->sched; mutex_init(&priv->io_lock); fs_reclaim_acquire(GFP_KERNEL); @@ -676,72 +595,26 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) XDNA_ERR(xdna, "Failed to alloc submit wq"); goto free_cmd_bufs; } - ret = drm_sched_init(sched, &sched_ops, priv->submit_wq, DRM_SCHED_PRIORITY_COUNT, - HWCTX_MAX_CMDS, 0, MAX_SCHEDULE_TIMEOUT, - NULL, NULL, hwctx->name, xdna->ddev.dev); - if (ret) { - XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); - goto free_wq; - } - - ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (ret) { - XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); - goto free_sched; - } - ret = aie2_hwctx_col_list(hwctx); + ret = aie2_hwctx_syncobj_create(hwctx); if (ret) { - XDNA_ERR(xdna, "Create col list failed, ret %d", ret); - goto free_entity; + XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); + goto free_wq; } - ret = aie2_alloc_resource(hwctx); + ret = aie2_fwctx_start(hwctx); if (ret) { - XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); - goto free_col_list; + XDNA_ERR(xdna, "Failed to start fw context, ret %d", ret); + goto syncobj_destroy; } -#ifdef AMDXDNA_DEVEL - if (iommu_mode == AMDXDNA_IOMMU_NO_PASID) { - ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, - heap->mem.dma_addr, heap->mem.size); - goto skip; - } -#endif - ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, - heap->mem.userptr, heap->mem.size); -#ifdef AMDXDNA_DEVEL -skip: -#endif - if (ret) { - XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); - goto release_resource; - } - - ret = aie2_ctx_syncobj_create(hwctx); - if (ret) { - XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); - goto release_resource; - } - hwctx->status = HWCTX_STATE_INIT; - ndev = xdna->dev_handle; - ndev->hwctx_num++; - init_waitqueue_head(&priv->status_wq); + xdna->dev_handle->hwctx_num++; XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); - return 0; -release_resource: - aie2_release_resource(hwctx); -free_col_list: - kfree(hwctx->col_list); -free_entity: - drm_sched_entity_destroy(&priv->entity); -free_sched: - drm_sched_fini(&priv->sched); +syncobj_destroy: + aie2_hwctx_syncobj_destroy(hwctx); free_wq: destroy_workqueue(priv->submit_wq); free_cmd_bufs: @@ -753,6 +626,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) amdxdna_gem_unpin(heap); put_heap: drm_gem_object_put(to_gobj(heap)); +free_col_list: + kfree(hwctx->col_list); free_priv: kfree(priv); return ret; @@ -760,34 +635,19 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) { - struct amdxdna_dev_hdl *ndev; - struct amdxdna_dev *xdna; - int idx; - - xdna = hwctx->client->xdna; - ndev = xdna->dev_handle; - ndev->hwctx_num--; - drm_sched_wqueue_stop(&hwctx->priv->sched); + struct amdxdna_dev *xdna = hwctx->client->xdna; - /* Now, scheduler will not send command to device. */ - aie2_release_resource(hwctx); + xdna->dev_handle->hwctx_num--; + aie2_fwctx_stop(hwctx); +} - /* - * All submitted commands are aborted. - * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run() - * will return NODEV if it is called. - */ - drm_sched_wqueue_start(&hwctx->priv->sched); +void aie2_hwctx_free(struct amdxdna_hwctx *hwctx) +{ + int idx; - wait_event(hwctx->priv->status_wq, - atomic_read(&hwctx->job_submit_cnt) == atomic_read(&hwctx->job_free_cnt)); - drm_sched_entity_destroy(&hwctx->priv->entity); - drm_sched_fini(&hwctx->priv->sched); + aie2_fwctx_free(hwctx); destroy_workqueue(hwctx->priv->submit_wq); - aie2_ctx_syncobj_destroy(hwctx); - - XDNA_DBG(xdna, "%s total completed jobs %lld", hwctx->name, hwctx->completed); - + aie2_hwctx_syncobj_destroy(hwctx); for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); amdxdna_gem_unpin(hwctx->priv->heap); @@ -797,6 +657,8 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) aie2_unregister_pdis(hwctx); #endif + XDNA_DBG(hwctx->client->xdna, "%s total completed jobs %lld", + hwctx->name, hwctx->completed); mutex_destroy(&hwctx->priv->io_lock); kfree(hwctx->col_list); kfree(hwctx->priv); @@ -811,7 +673,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size int ret; XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); - if (hwctx->status != HWCTX_STATE_INIT) { + if (hwctx->cus) { XDNA_ERR(xdna, "Not support re-config CU"); return -EINVAL; } @@ -859,7 +721,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size skip_config_cu: #endif wmb(); /* To avoid locking in command submit when check status */ - hwctx->status = HWCTX_STATE_READY; + hwctx->status |= FIELD_PREP(HWCTX_STATE_READY, 1); return 0; @@ -1055,6 +917,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, struct amdxdna_dev *xdna = hwctx->client->xdna; struct ww_acquire_ctx acquire_ctx; struct dma_fence_chain *chain; + struct amdxdna_dev_hdl *ndev; struct amdxdna_gem_obj *abo; unsigned long timeout = 0; int ret, i; @@ -1072,6 +935,19 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, goto up_sem; } + ndev = xdna->dev_handle; + down_read(&ndev->recover_lock); + if (!FIELD_GET(HWCTX_STATE_READY, hwctx->status)) { + XDNA_ERR(xdna, "HW Context is not ready"); + ret = -EINVAL; + goto unlock_recover; + } + + if (FIELD_GET(HWCTX_STATE_DEAD, hwctx->status)) { + ret = -ENODEV; + goto unlock_recover; + } + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx); if (ret) { XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); @@ -1127,6 +1003,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, for (i = 0; i < job->bo_cnt; i++) dma_resv_add_fence(job->bos[i].obj->resv, job->out_fence, DMA_RESV_USAGE_WRITE); job->seq = hwctx->submitted++; + hwctx->priv->pending[get_job_idx(job->seq)] = job; kref_get(&job->refcnt); drm_sched_entity_push_job(&job->base); @@ -1136,6 +1013,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, up_read(&xdna->notifier_lock); amdxdna_unlock_objects(job, &acquire_ctx); + up_read(&ndev->recover_lock); aie2_job_put(job); @@ -1145,6 +1023,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, drm_sched_job_cleanup(&job->base); free_chain: dma_fence_chain_free(chain); +unlock_recover: + up_read(&ndev->recover_lock); up_sem: up(&hwctx->priv->job_sem); job->job_done = true; @@ -1188,8 +1068,7 @@ int aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout) return ret; } -void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, - unsigned long cur_seq) +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); struct drm_gem_object *gobj = to_gobj(abo); @@ -1200,33 +1079,3 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, if (!ret || ret == -ERESTARTSYS) XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); } - -int aie2_xrs_load_hwctx(struct amdxdna_hwctx *hwctx, struct xrs_action_load *action) -{ - struct amdxdna_dev *xdna; - int ret; - - xdna = hwctx->client->xdna; - - hwctx->start_col = action->part.start_col; - hwctx->num_col = action->part.ncols; - ret = aie2_create_context(xdna->dev_handle, hwctx); - if (ret) - XDNA_ERR(xdna, "create context failed, ret %d", ret); - - return ret; -} - -int aie2_xrs_unload_hwctx(struct amdxdna_hwctx *hwctx) -{ - struct amdxdna_dev *xdna; - int ret; - - xdna = hwctx->client->xdna; - - ret = aie2_destroy_context(xdna->dev_handle, hwctx); - if (ret) - XDNA_ERR(xdna, "destroy context failed, ret %d", ret); - - return ret; -} diff --git a/src/driver/amdxdna/aie2_fwctx.c b/src/driver/amdxdna/aie2_fwctx.c new file mode 100644 index 0000000..215698b --- /dev/null +++ b/src/driver/amdxdna/aie2_fwctx.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024-2025, Advanced Micro Devices, Inc. + */ + +#include "amdxdna_ctx.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_trace.h" +#include "aie2_solver.h" +#include "aie2_pci.h" + +#ifdef AMDXDNA_DEVEL +#include "amdxdna_devel.h" +#endif + +extern const struct drm_sched_backend_ops sched_ops; + +static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) +{ + struct alloc_requests *xrs_req; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL); + if (!xrs_req) + return -ENOMEM; + + xrs_req->cdo.start_cols = hwctx->col_list; + xrs_req->cdo.cols_len = hwctx->col_list_len; + xrs_req->cdo.ncols = hwctx->num_col; + xrs_req->cdo.qos_cap.opc = hwctx->max_opc; + + xrs_req->rqos.gops = hwctx->qos.gops; + xrs_req->rqos.fps = hwctx->qos.fps; + xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; + xrs_req->rqos.latency = hwctx->qos.latency; + xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; + xrs_req->rqos.priority = hwctx->qos.priority; + + xrs_req->rid = (uintptr_t)hwctx; + + ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); + if (ret) + XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); + + kfree(xrs_req); + return ret; +} + +static void aie2_release_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); + if (ret) + XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); +} + +int aie2_fwctx_start(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct drm_gpu_scheduler *sched; + struct amdxdna_gem_obj *heap; + int ret; + + sched = &hwctx->priv->sched; + heap = hwctx->priv->heap; + + ret = drm_sched_init(sched, &sched_ops, hwctx->priv->submit_wq, + DRM_SCHED_PRIORITY_COUNT, + HWCTX_MAX_CMDS, 0, MAX_SCHEDULE_TIMEOUT, + NULL, NULL, hwctx->name, xdna->ddev.dev); + if (ret) { + XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); + return ret; + } + + ret = drm_sched_entity_init(&hwctx->priv->entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (ret) { + XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); + goto fini_sched; + } + + ret = aie2_alloc_resource(hwctx); + if (ret) { + XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); + goto destroy_entity; + } + +#ifdef AMDXDNA_DEVEL + if (iommu_mode == AMDXDNA_IOMMU_NO_PASID) { + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->priv->id, + heap->mem.dma_addr, heap->mem.size); + goto skip; + } +#endif + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->priv->id, + heap->mem.userptr, heap->mem.size); +#ifdef AMDXDNA_DEVEL +skip: +#endif + if (ret) { + XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); + goto release_resource; + } + + hwctx->status |= FIELD_PREP(HWCTX_STATE_CONNECTED, 1); + return 0; + +release_resource: + aie2_release_resource(hwctx); +destroy_entity: + drm_sched_entity_destroy(&hwctx->priv->entity); +fini_sched: + drm_sched_fini(&hwctx->priv->sched); + return ret; +} + +void aie2_fwctx_stop(struct amdxdna_hwctx *hwctx) +{ + if (!FIELD_GET(HWCTX_STATE_CONNECTED, hwctx->status)) { + XDNA_DBG(hwctx->client->xdna, "%s was stopped, skip", hwctx->name); + return; + } + + aie2_release_resource(hwctx); + hwctx->status &= ~HWCTX_STATE_CONNECTED; +} + +void aie2_fwctx_free(struct amdxdna_hwctx *hwctx) +{ + drm_sched_entity_destroy(&hwctx->priv->entity); + drm_sched_fini(&hwctx->priv->sched); +} + +int aie2_xrs_load_fwctx(struct amdxdna_hwctx *hwctx, struct xrs_action_load *action) +{ + enum xdna_mailbox_channel_type type; + struct xdna_mailbox_chann_info info; + struct amdxdna_dev_hdl *ndev; + struct amdxdna_dev *xdna; + void *mbox_chann; + int ret; + + hwctx->start_col = action->part.start_col; + hwctx->num_col = action->part.ncols; + + xdna = hwctx->client->xdna; + ndev = xdna->dev_handle; + + ret = aie2_create_context(ndev, hwctx, &info); + if (ret) { + XDNA_ERR(xdna, "create context failed, ret %d", ret); + return ret; + } + + if (aie2_pm_is_turbo(ndev)) + type = MB_CHANNEL_USER_POLL; + else + type = MB_CHANNEL_USER_NORMAL; + mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &info, type); + if (!mbox_chann) { + XDNA_ERR(xdna, "not able to create channel"); + goto failed; + } + + trace_amdxdna_debug_point(hwctx->name, ret, "channel created"); + XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", + hwctx->name, ret, info.msix_id); + + hwctx->priv->mbox_chann = mbox_chann; + return 0; + +failed: + aie2_destroy_context(xdna->dev_handle, hwctx); + return ret; +} + +int aie2_xrs_unload_fwctx(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); + ret = aie2_destroy_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "destroy context failed, ret %d", ret); + + /* + * The DRM scheduler thread might still running. + * Call xdna_mailbox_free_channel() when hwctx is destroyed. + */ + xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); + return ret; +} diff --git a/src/driver/amdxdna/aie2_message.c b/src/driver/amdxdna/aie2_message.c index 89a62f6..9cd425d 100644 --- a/src/driver/amdxdna/aie2_message.c +++ b/src/driver/amdxdna/aie2_message.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2023-2025, Advanced Micro Devices, Inc. */ #include @@ -255,15 +255,13 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, return 0; } -int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx, + struct xdna_mailbox_chann_info *info) { DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); struct amdxdna_dev *xdna = ndev->xdna; - enum xdna_mailbox_channel_type type; - struct xdna_mailbox_chann_res x2i; - struct xdna_mailbox_chann_res i2x; struct cq_pair *cq_pair; - u32 intr_reg, priority; + u32 priority; int ret; ret = map_app_priority_to_fw(hwctx->qos.priority, &priority); @@ -283,57 +281,33 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct if (ret) return ret; - hwctx->fw_ctx_id = resp.context_id; - WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + hwctx->priv->id = resp.context_id; + WARN_ONCE(hwctx->priv->id == -1, "Unexpected context id"); if (ndev->force_preempt_enabled) { ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPTION, - &hwctx->fw_ctx_id); + &hwctx->priv->id); if (ret) XDNA_WARN(ndev->xdna, "Failed to config force preemption"); } + info->msix_id = resp.msix_id; cq_pair = &resp.cq_pair[0]; - x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); - x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); - x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr); - x2i.rb_size = cq_pair->x2i_q.buf_size; - - i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr); - i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr); - i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr); - i2x.rb_size = cq_pair->i2x_q.buf_size; - - ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id); - if (ret == -EINVAL) { - XDNA_ERR(xdna, "not able to create channel"); - goto out_destroy_context; - } + info->x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); + info->x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); + info->x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr); + info->x2i.rb_size = cq_pair->x2i_q.buf_size; - intr_reg = i2x.mb_head_ptr_reg + 4; - if (aie2_pm_is_turbo(ndev)) - type = MB_CHANNEL_USER_POLL; - else - type = MB_CHANNEL_USER_NORMAL; - hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, - intr_reg, ret, type); - if (!hwctx->priv->mbox_chann) { - XDNA_ERR(xdna, "not able to create channel"); - ret = -EINVAL; - goto out_destroy_context; - } + info->i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr); + info->i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr); + info->i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr); + info->i2x.rb_size = cq_pair->i2x_q.buf_size; - trace_amdxdna_debug_point(hwctx->name, ret, "channel created"); - XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", - hwctx->name, ret, resp.msix_id); - XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name, - hwctx->fw_ctx_id, hwctx->client->pasid); + aie2_calc_intr_reg(info); + XDNA_DBG(xdna, "%s created fw ctx %d pasid %d priority %d", hwctx->name, + hwctx->priv->id, hwctx->client->pasid, priority); return 0; - -out_destroy_context: - aie2_destroy_context(ndev, hwctx); - return ret; } int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) @@ -342,22 +316,17 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc struct amdxdna_dev *xdna = ndev->xdna; int ret; - if (hwctx->fw_ctx_id == -1) + if (hwctx->priv->id == -1) return 0; - xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); - - req.context_id = hwctx->fw_ctx_id; + req.context_id = hwctx->priv->id; ret = aie2_send_mgmt_msg_wait(ndev, &msg); if (ret) XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret); - xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); trace_amdxdna_debug_point(hwctx->name, 0, "channel destroyed"); - XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name, - hwctx->fw_ctx_id); - hwctx->priv->mbox_chann = NULL; - hwctx->fw_ctx_id = -1; + XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name, hwctx->priv->id); + hwctx->priv->id = -1; return ret; } @@ -1067,8 +1036,8 @@ int aie2_unregister_pdis(struct amdxdna_hwctx *hwctx) int aie2_legacy_config_cu(struct amdxdna_hwctx *hwctx) { - DECLARE_AIE2_MSG(legacy_config_cu, MSG_OP_LEGACY_CONFIG_CU); struct mailbox_channel *chann = hwctx->priv->mbox_chann; + DECLARE_AIE2_MSG(legacy_config_cu, MSG_OP_LEGACY_CONFIG_CU); struct amdxdna_dev *xdna = hwctx->client->xdna; int ret, i; diff --git a/src/driver/amdxdna/aie2_pci.c b/src/driver/amdxdna/aie2_pci.c index 39f6b86..2940f5e 100644 --- a/src/driver/amdxdna/aie2_pci.c +++ b/src/driver/amdxdna/aie2_pci.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2023-2025, Advanced Micro Devices, Inc. */ #include @@ -104,15 +104,15 @@ static inline void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev) { struct amdxdna_dev *xdna = ndev->xdna; - XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg); - XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg); - XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr); - XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size); - XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg); - XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg); - XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr); - XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size); - XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx); + XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_info.i2x.mb_tail_ptr_reg); + XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_info.i2x.mb_head_ptr_reg); + XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_info.i2x.rb_start_addr); + XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_info.i2x.rb_size); + XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_info.x2i.mb_tail_ptr_reg); + XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_info.x2i.mb_head_ptr_reg); + XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_info.x2i.rb_start_addr); + XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_info.x2i.rb_size); + XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_info.msix_id); if (!ndev->mgmt_prot_major) return; @@ -148,8 +148,8 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++) reg[i] = readl(ndev->sram_base + off + i * sizeof(u32)); - i2x = &ndev->mgmt_i2x; - x2i = &ndev->mgmt_x2i; + i2x = &ndev->mgmt_info.i2x; + x2i = &ndev->mgmt_info.x2i; i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head); i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail); @@ -162,17 +162,18 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) x2i->rb_size = info_regs.x2i_buf_sz; if (info_regs.magic != MGMT_MBOX_MAGIC) { - ndev->mgmt_chan_idx = CHANN_INDEX(ndev, x2i->rb_start_addr); + ndev->mgmt_info.msix_id = CHANN_INDEX(ndev, x2i->rb_start_addr); goto done; } - ndev->mgmt_chan_idx = info_regs.msi_id; + ndev->mgmt_info.msix_id = info_regs.msi_id; ndev->mgmt_prot_major = info_regs.prot_major; ndev->mgmt_prot_minor = info_regs.prot_minor; if (aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor)) ret = -EINVAL; done: + aie2_calc_intr_reg(&ndev->mgmt_info); aie2_dump_chann_info_debug(ndev); /* Must clear address at FW_ALIVE_OFF */ @@ -334,8 +335,8 @@ static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) } static struct xrs_action_ops aie2_xrs_actions = { - .load_hwctx = aie2_xrs_load_hwctx, - .unload_hwctx = aie2_xrs_unload_hwctx, + .load_hwctx = aie2_xrs_load_fwctx, + .unload_hwctx = aie2_xrs_unload_fwctx, .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level, }; @@ -370,8 +371,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); struct amdxdna_dev_hdl *ndev = xdna->dev_handle; struct xdna_mailbox_res mbox_res; - u32 xdna_mailbox_intr_reg; - int mgmt_mb_irq, ret; + int ret; if (ndev->dev_status >= AIE2_DEV_START) { XDNA_INFO(xdna, "device is already started"); @@ -415,19 +415,8 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto stop_psp; } - mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx); - if (mgmt_mb_irq < 0) { - ret = mgmt_mb_irq; - XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret); - goto destroy_mbox; - } - - xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4; - ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox, - &ndev->mgmt_x2i, - &ndev->mgmt_i2x, - xdna_mailbox_intr_reg, - mgmt_mb_irq, MB_CHANNEL_MGMT); + ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox, &ndev->mgmt_info, + MB_CHANNEL_MGMT); if (!ndev->mgmt_chann) { XDNA_ERR(xdna, "failed to create management mailbox channel"); ret = -EINVAL; @@ -485,6 +474,7 @@ static int aie2_init(struct amdxdna_dev *xdna) ndev->priv = xdna->dev_info->dev_priv; ndev->xdna = xdna; + init_rwsem(&ndev->recover_lock); ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev); if (ret) { @@ -659,20 +649,27 @@ static void aie2_fini(struct amdxdna_dev *xdna) static void aie2_recover(struct amdxdna_dev *xdna, bool dump_only) { + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; struct amdxdna_client *client; - mutex_lock(&xdna->dev_lock); if (dump_only) { + mutex_lock(&xdna->dev_lock); list_for_each_entry(client, &xdna->client_list, node) aie2_dump_ctx(client); - } else { - list_for_each_entry(client, &xdna->client_list, node) - aie2_stop_ctx(client); - /* The AIE will reset after all hardware contexts are destroyed */ - list_for_each_entry(client, &xdna->client_list, node) - aie2_restart_ctx(client); + mutex_unlock(&xdna->dev_lock); + return; } + + down_write(&ndev->recover_lock); + mutex_lock(&xdna->dev_lock); + list_for_each_entry(client, &xdna->client_list, node) + aie2_stop_ctx(client); + + /* The AIE will reset after all hardware contexts are destroyed */ + list_for_each_entry(client, &xdna->client_list, node) + aie2_restart_ctx(client); mutex_unlock(&xdna->dev_lock); + up_write(&ndev->recover_lock); } static int aie2_get_aie_status(struct amdxdna_client *client, @@ -969,7 +966,7 @@ static int aie2_get_telemetry(struct amdxdna_client *client, } static int aie2_get_force_preempt_state(struct amdxdna_client *client, - struct amdxdna_drm_get_info *args) + struct amdxdna_drm_get_info *args) { struct amdxdna_drm_get_force_preempt_state force = {}; struct amdxdna_dev *xdna = client->xdna; @@ -1068,7 +1065,7 @@ static int aie2_set_power_mode(struct amdxdna_client *client, struct amdxdna_drm } static int aie2_set_force_preempt_state(struct amdxdna_client *client, - struct amdxdna_drm_set_state *args) + struct amdxdna_drm_set_state *args) { struct amdxdna_drm_set_force_preempt_state force; struct amdxdna_dev *xdna = client->xdna; @@ -1134,6 +1131,7 @@ const struct amdxdna_dev_ops aie2_ops = { .set_aie_state = aie2_set_state, .hwctx_init = aie2_hwctx_init, .hwctx_fini = aie2_hwctx_fini, + .hwctx_free = aie2_hwctx_free, .hwctx_config = aie2_hwctx_config, .hwctx_suspend = aie2_hwctx_suspend, .hwctx_resume = aie2_hwctx_resume, diff --git a/src/driver/amdxdna/aie2_pci.h b/src/driver/amdxdna/aie2_pci.h index 164ed23..4a73b00 100644 --- a/src/driver/amdxdna/aie2_pci.h +++ b/src/driver/amdxdna/aie2_pci.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2023-2025, Advanced Micro Devices, Inc. */ #ifndef _AIE2_PCI_H_ @@ -65,6 +65,7 @@ #define SMU_DPM_TABLE_ENTRY(ndev, level) \ (&(ndev)->smu.dpm_table[level]) +struct amdxdna_hwctx_priv; struct xrs_action_load; enum aie2_smu_reg_idx { @@ -174,6 +175,7 @@ struct hwctx_pdi { dma_addr_t dma_addr; }; #endif + /* * Define the maximum number of pending commands in a hardware context. * Must be power of 2! @@ -182,25 +184,26 @@ struct hwctx_pdi { #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) struct amdxdna_hwctx_priv { struct amdxdna_gem_obj *heap; - void *mbox_chann; #ifdef AMDXDNA_DEVEL struct hwctx_pdi *pdi_infos; #endif - struct drm_gpu_scheduler sched; - struct drm_sched_entity entity; + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; struct mutex io_lock; /* protect seq and cmd order */ - struct wait_queue_head job_free_wq; +#ifdef AMDXDNA_DEVEL struct amdxdna_sched_job *pending[HWCTX_MAX_CMDS]; - u32 num_pending; +#endif struct semaphore job_sem; - struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; struct workqueue_struct *submit_wq; struct drm_syncobj *syncobj; - wait_queue_head_t status_wq; + /* Firmware context related in below */ + u32 id; + void *mbox_chann; + struct drm_gpu_scheduler sched; + struct drm_sched_entity entity; }; enum aie2_dev_status { @@ -224,11 +227,11 @@ struct amdxdna_dev_hdl { void __iomem *mbox_base; struct psp_device *psp_hdl; - struct xdna_mailbox_chann_res mgmt_x2i; - struct xdna_mailbox_chann_res mgmt_i2x; - u32 mgmt_chan_idx; + struct xdna_mailbox_chann_info mgmt_info; u32 mgmt_prot_major; u32 mgmt_prot_minor; + /* for recover and IO code path exclusion */ + struct rw_semaphore recover_lock; u32 total_col; struct aie_version version; @@ -291,6 +294,11 @@ struct amdxdna_dev_priv { extern const struct amdxdna_dev_ops aie2_ops; +static inline void aie2_calc_intr_reg(struct xdna_mailbox_chann_info *info) +{ + info->intr_reg = info->i2x.mb_head_ptr_reg + 4; +} + int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, enum rt_config_category category, u32 *val); @@ -355,7 +363,8 @@ int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *ver int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, struct amdxdna_fw_ver *fw_ver); -int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx, + struct xdna_mailbox_chann_info *info); int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); int aie2_query_status(struct amdxdna_dev_hdl *ndev, char *buf, u32 size, u32 *cols_filled); @@ -385,6 +394,7 @@ int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job * /* aie2_hwctx.c */ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); +void aie2_hwctx_free(struct amdxdna_hwctx *hwctx); int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx); void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx); @@ -396,7 +406,12 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); void aie2_stop_ctx(struct amdxdna_client *client); void aie2_dump_ctx(struct amdxdna_client *client); void aie2_restart_ctx(struct amdxdna_client *client); -int aie2_xrs_load_hwctx(struct amdxdna_hwctx *hwctx, struct xrs_action_load *action); -int aie2_xrs_unload_hwctx(struct amdxdna_hwctx *hwctx); + +/* aie2_fwctx.c */ +int aie2_fwctx_start(struct amdxdna_hwctx *hwctx); +void aie2_fwctx_stop(struct amdxdna_hwctx *hwctx); +void aie2_fwctx_free(struct amdxdna_hwctx *hwctx); +int aie2_xrs_load_fwctx(struct amdxdna_hwctx *hwctx, struct xrs_action_load *action); +int aie2_xrs_unload_fwctx(struct amdxdna_hwctx *hwctx); #endif /* _AIE2_PCI_H_ */ diff --git a/src/driver/amdxdna/amdxdna_ctx.c b/src/driver/amdxdna/amdxdna_ctx.c index 9ecc836..03c73cc 100644 --- a/src/driver/amdxdna/amdxdna_ctx.c +++ b/src/driver/amdxdna/amdxdna_ctx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #include @@ -56,6 +56,18 @@ static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) return &fence->base; } +#define WAIT_JOB_COND \ + (atomic_read(&hwctx->job_submit_cnt) == atomic_read(&hwctx->job_free_cnt)) +void amdxdna_hwctx_wait_jobs(struct amdxdna_hwctx *hwctx, long timeout) +{ + if (timeout == MAX_SCHEDULE_TIMEOUT) { + wait_event(hwctx->status_wq, WAIT_JOB_COND); + return; + } + + wait_event_timeout(hwctx->status_wq, WAIT_JOB_COND, timeout); +} + void amdxdna_hwctx_suspend(struct amdxdna_client *client) { struct amdxdna_dev *xdna = client->xdna; @@ -63,10 +75,10 @@ void amdxdna_hwctx_suspend(struct amdxdna_client *client) unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - mutex_lock(&client->hwctx_lock); - amdxdna_for_each_hwctx(client, hwctx_id, hwctx) + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + amdxdna_hwctx_wait_jobs(hwctx, msecs_to_jiffies(2000)); xdna->dev_info->ops->hwctx_suspend(hwctx); - mutex_unlock(&client->hwctx_lock); + } } void amdxdna_hwctx_resume(struct amdxdna_client *client) @@ -76,10 +88,8 @@ void amdxdna_hwctx_resume(struct amdxdna_client *client) unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - mutex_lock(&client->hwctx_lock); amdxdna_for_each_hwctx(client, hwctx_id, hwctx) xdna->dev_info->ops->hwctx_resume(hwctx); - mutex_unlock(&client->hwctx_lock); } static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, @@ -89,17 +99,23 @@ static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, synchronize_srcu(ss); - /* At this point, user is not able to submit new commands */ + /* + * At this point, user is not able to submit new commands. + */ + amdxdna_hwctx_wait_jobs(hwctx, msecs_to_jiffies(2000)); mutex_lock(&xdna->dev_lock); xdna->dev_info->ops->hwctx_fini(hwctx); mutex_unlock(&xdna->dev_lock); + amdxdna_hwctx_wait_jobs(hwctx, MAX_SCHEDULE_TIMEOUT); + if (xdna->dev_info->ops->hwctx_free) + xdna->dev_info->ops->hwctx_free(hwctx); kfree(hwctx->name); kfree(hwctx); } /* - * This should be called in close() and remove(). DO NOT call in other syscalls. + * This should be called in flush() and remove(). DO NOT call in other syscalls. * This guarantee that when hwctx and resources will be released, if user * doesn't call amdxdna_drm_destroy_hwctx_ioctl. */ @@ -108,16 +124,12 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client *client) struct amdxdna_hwctx *hwctx; unsigned long hwctx_id; - mutex_lock(&client->hwctx_lock); amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { XDNA_DBG(client->xdna, "PID %d close HW context %d", client->pid, hwctx->id); xa_erase(&client->hwctx_xa, hwctx->id); - mutex_unlock(&client->hwctx_lock); amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); - mutex_lock(&client->hwctx_lock); } - mutex_unlock(&client->hwctx_lock); } int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -147,7 +159,6 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr } hwctx->client = client; - hwctx->fw_ctx_id = -1; hwctx->tdr_last_completed = -1; hwctx->num_tiles = args->num_tiles; hwctx->mem_size = args->mem_size; @@ -182,6 +193,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr atomic_set(&hwctx->job_submit_cnt, 0); atomic_set(&hwctx->job_free_cnt, 0); + init_waitqueue_head(&hwctx->status_wq); XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret); drm_dev_exit(idx); @@ -358,6 +370,7 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) amdxdna_gem_put_obj(job->cmd_bo); atomic_inc(&job->hwctx->job_free_cnt); + wake_up(&job->hwctx->status_wq); } int amdxdna_lock_objects(struct amdxdna_sched_job *job, struct ww_acquire_ctx *ctx) @@ -479,12 +492,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, u32 opcode, goto unlock_srcu; } - if (hwctx->status == HWCTX_STATE_INIT) { - XDNA_ERR(xdna, "HW Context is not ready"); - ret = -EINVAL; - goto unlock_srcu; - } - job->hwctx = hwctx; job->mm = current->mm; job->opcode = opcode; diff --git a/src/driver/amdxdna/amdxdna_ctx.h b/src/driver/amdxdna/amdxdna_ctx.h index aa0e0a2..1068696 100644 --- a/src/driver/amdxdna/amdxdna_ctx.h +++ b/src/driver/amdxdna/amdxdna_ctx.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #ifndef _AMDXDNA_CTX_H_ @@ -98,7 +98,6 @@ struct amdxdna_hwctx { u32 max_opc; u32 num_tiles; u32 mem_size; - u32 fw_ctx_id; u32 col_list_len; u32 *col_list; u32 start_col; @@ -107,28 +106,19 @@ struct amdxdna_hwctx { u32 log_buf_bo; u32 doorbell_offset; /* - * HWCTX_STATE_INIT indicated that hardware context is initialized. - * But in this state, user is not allow to submit commands. + * Set HWCTX_STATE_CONNECTED bit means hardware context is associated + * with firmware context */ -#define HWCTX_STATE_INIT 0 +#define HWCTX_STATE_CONNECTED BIT(0) /* - * HWCTX_STATE_READY indicated that hardware context is ready to accept - * commands. State bigger than HWCTX_STATE_READY implied that submit command - * is ready. + * Set HWCTX_STATE_READY bit means hardware/firmware context is ready + * to accept commands */ -#define HWCTX_STATE_READY 1 +#define HWCTX_STATE_READY BIT(1) /* - * HWCTX_STATE_STOP indicated that hardware context scheduler is stopped. - * Submit command is still allowed but commands will NOT be scheduled until - * scheduler is restarted. + * Set HWCTX_STATE_DEAD bit means hardware context marked as dead by TDR. */ -#define HWCTX_STATE_STOP 2 -/* - * HWCTX_STATE_DEAD indicated that hardware context marked as dead by TDR. - * Submit command is still allowed but commands will NOT be scheduled until - * context is fully recovered. - */ -#define HWCTX_STATE_DEAD 3 +#define HWCTX_STATE_DEAD BIT(2) u32 status; struct amdxdna_qos_info qos; @@ -143,7 +133,8 @@ struct amdxdna_hwctx { u32 syncobj_hdl; atomic_t job_submit_cnt; - atomic_t job_free_cnt; + atomic_t job_free_cnt ____cacheline_aligned_in_smp; + wait_queue_head_t status_wq; }; #define drm_job_to_xdna_job(j) \ @@ -254,6 +245,7 @@ static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx) hwctx->start_col); } +void amdxdna_hwctx_wait_jobs(struct amdxdna_hwctx *hwctx, long timeout); void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); void amdxdna_hwctx_remove_all(struct amdxdna_client *client); void amdxdna_hwctx_suspend(struct amdxdna_client *client); diff --git a/src/driver/amdxdna/amdxdna_devel.c b/src/driver/amdxdna/amdxdna_devel.c index 8e9832e..5ece5e3 100644 --- a/src/driver/amdxdna/amdxdna_devel.c +++ b/src/driver/amdxdna/amdxdna_devel.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2023-2025, Advanced Micro Devices, Inc. */ #include @@ -190,3 +190,10 @@ void amdxdna_bo_dma_unmap(struct amdxdna_gem_obj *abo) { } #endif /* AMDXDNA_SHMEM */ + +void amdxdna_gem_dump_mm(struct amdxdna_dev *xdna) +{ + struct drm_printer p = drm_dbg_printer(&xdna->ddev, DRM_UT_DRIVER, NULL); + + drm_mm_print(&xdna->ddev.vma_offset_manager->vm_addr_space_mm, &p); +} diff --git a/src/driver/amdxdna/amdxdna_devel.h b/src/driver/amdxdna/amdxdna_devel.h index 05a3dfc..9c649a2 100644 --- a/src/driver/amdxdna/amdxdna_devel.h +++ b/src/driver/amdxdna/amdxdna_devel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2024, Advanced Micro Devices, Inc. + * Copyright (C) 2024-2025, Advanced Micro Devices, Inc. */ #ifndef _AMDXDNA_DEVEL_ @@ -28,4 +28,6 @@ void amdxdna_mem_unmap(struct amdxdna_dev *xdna, struct amdxdna_mem *mem); int amdxdna_bo_dma_map(struct amdxdna_gem_obj *abo); void amdxdna_bo_dma_unmap(struct amdxdna_gem_obj *abo); + +void amdxdna_gem_dump_mm(struct amdxdna_dev *xdna); #endif /* _AMDXDNA_DEVEL_ */ diff --git a/src/driver/amdxdna/amdxdna_drm.c b/src/driver/amdxdna/amdxdna_drm.c index e1291c1..13f0eef 100644 --- a/src/driver/amdxdna/amdxdna_drm.c +++ b/src/driver/amdxdna/amdxdna_drm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #include @@ -57,7 +57,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) #ifdef AMDXDNA_DEVEL skip_sva_bind: #endif - mutex_init(&client->hwctx_lock); init_srcu_struct(&client->hwctx_srcu); xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); mutex_init(&client->mm_lock); @@ -97,7 +96,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) xa_destroy(&client->hwctx_xa); cleanup_srcu_struct(&client->hwctx_srcu); - mutex_destroy(&client->hwctx_lock); mutex_destroy(&client->mm_lock); if (client->dev_heap) drm_gem_object_put(to_gobj(client->dev_heap)); diff --git a/src/driver/amdxdna/amdxdna_drm.h b/src/driver/amdxdna/amdxdna_drm.h index 4b635ca..76d3a85 100644 --- a/src/driver/amdxdna/amdxdna_drm.h +++ b/src/driver/amdxdna/amdxdna_drm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #ifndef _AMDXDNA_DRM_H_ @@ -57,6 +57,7 @@ struct amdxdna_dev_ops { /* Below device ops are called by IOCTL */ int (*hwctx_init)(struct amdxdna_hwctx *hwctx); void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); + void (*hwctx_free)(struct amdxdna_hwctx *hwctx); int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx); @@ -123,7 +124,7 @@ struct amdxdna_dev { #ifdef AMDXDNA_DEVEL struct ida pdi_ida; #endif - struct rw_semaphore notifier_lock; /* for mmu notifier*/ + struct rw_semaphore notifier_lock; /* for mmu notifier */ struct workqueue_struct *notifier_wq; }; @@ -140,7 +141,6 @@ struct amdxdna_stats { * * @node: entry node in clients list * @pid: PID of current client - * @hwctx_lock: HW context lock for protect IDR * @hwctx_srcu: Per client SRCU for synchronizing hwctx destroy with other ioctls. * @hwctx_xa: HW context xarray * @xdna: XDNA device pointer @@ -154,9 +154,7 @@ struct amdxdna_stats { struct amdxdna_client { struct list_head node; pid_t pid; - /* To protect hwctx stop/restart/destroy etc. */ - struct mutex hwctx_lock; - /* To avoid deadlock, do NOT wait this srcu when hwctx_lock is hold */ + /* To avoid deadlock, do NOT wait this srcu when dev_lock is hold */ struct srcu_struct hwctx_srcu; struct xarray hwctx_xa; u32 next_hwctxid; diff --git a/src/driver/amdxdna/amdxdna_mailbox.c b/src/driver/amdxdna/amdxdna_mailbox.c index 66f9dc5..49449df 100644 --- a/src/driver/amdxdna/amdxdna_mailbox.c +++ b/src/driver/amdxdna/amdxdna_mailbox.c @@ -1,17 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #include #include #include #include -#include -#include +#include +#include #include #include -#include #include #include #include @@ -49,8 +48,6 @@ #define MAGIC_VAL_MASK 0xFF000000 #define MAX_MSG_ID_ENTRIES 256 #define MAILBOX_NAME "xdna_mailbox" -#define CHANN_RX_RETRY 10 -#define CHANN_RX_INTERVAL 200 /* milliseconds */ #define MSG_ID2ENTRY(msg_id) ((msg_id) & ~MAGIC_VAL_MASK) #ifdef AMDXDNA_DEVEL @@ -73,8 +70,7 @@ enum channel_res_type { struct mailbox { struct device *dev; struct xdna_mailbox_res res; - /* protect channel list */ - struct mutex mbox_lock; + spinlock_t mbox_lock; /* protect channel list */ struct list_head chann_list; struct list_head poll_chann_list; struct task_struct *polld; @@ -559,7 +555,7 @@ static bool mailbox_polld_event(struct mailbox *mb) { struct mailbox_channel *mb_chann; - mutex_lock(&mb->mbox_lock); + spin_lock(&mb->mbox_lock); list_for_each_entry(mb_chann, &mb->poll_chann_list, chann_entry) { if (mb_chann->type == MB_CHANNEL_MGMT) break; @@ -570,7 +566,7 @@ static bool mailbox_polld_event(struct mailbox *mb) mb->sent_msg = true; break; } - mutex_unlock(&mb->mbox_lock); + spin_unlock(&mb->mbox_lock); return mb->sent_msg; } @@ -591,7 +587,7 @@ static int mailbox_polld(void *data) if (!mb->sent_msg) continue; - mutex_lock(&mb->mbox_lock); + spin_lock(&mb->mbox_lock); chann_all_empty = true; list_for_each_entry(mb_chann, &mb->poll_chann_list, chann_entry) { if (mb_chann->type == MB_CHANNEL_MGMT) @@ -603,7 +599,7 @@ static int mailbox_polld(void *data) chann_all_empty = false; mailbox_polld_handle_chann(mb_chann); } - mutex_unlock(&mb->mbox_lock); + spin_unlock(&mb->mbox_lock); if (chann_all_empty) mb->sent_msg = false; @@ -706,7 +702,7 @@ xdna_mailbox_get_record(struct mailbox *mb, int mb_irq, struct mailbox_res_record *record; int record_found = 0; - mutex_lock(&mb->mbox_lock); + spin_lock(&mb->mbox_lock); list_for_each_entry(record, &mb->res_records, re_entry) { if (record->re_irq != mb_irq) continue; @@ -714,6 +710,7 @@ xdna_mailbox_get_record(struct mailbox *mb, int mb_irq, record_found = 1; break; } + spin_unlock(&mb->mbox_lock); if (record_found) { record->type = type; @@ -722,16 +719,17 @@ xdna_mailbox_get_record(struct mailbox *mb, int mb_irq, record = kzalloc(sizeof(*record), GFP_KERNEL); if (!record) - goto out; + return record; + + spin_lock(&mb->mbox_lock); list_add_tail(&record->re_entry, &mb->res_records); + spin_unlock(&mb->mbox_lock); record->re_irq = mb_irq; found: record->type = type; memcpy(&record->re_x2i, x2i, sizeof(*x2i)); memcpy(&record->re_i2x, i2x, sizeof(*i2x)); -out: - mutex_unlock(&mb->mbox_lock); return record; } @@ -762,12 +760,13 @@ int xdna_mailbox_info_show(struct mailbox *mb, struct seq_file *m) seq_printf(m, ring_fmt, mbox_irq, #_dir, _act, type, rb_start, rb_size); \ seq_printf(m, mbox_fmt, head_ptr, tail_ptr, head_val, tail_val); \ } - mutex_lock(&mb->mbox_lock); + + spin_lock(&mb->mbox_lock); list_for_each_entry(record, &mb->res_records, re_entry) { xdna_mbox_dump_queue(x2i, record->active); xdna_mbox_dump_queue(i2x, record->active); } - mutex_unlock(&mb->mbox_lock); + spin_unlock(&mb->mbox_lock); return 0; } @@ -790,13 +789,13 @@ int xdna_mailbox_ringbuf_show(struct mailbox *mb, struct seq_file *m) memcpy_fromio(buf, base + record->re_##_dir.rb_start_addr, size); \ seq_hex_dump(m, pfx, DUMP_PREFIX_OFFSET, 16, 4, buf, size, true); \ } while (0) - mutex_lock(&mb->mbox_lock); + spin_lock(&mb->mbox_lock); base = (void *)mb->res.ringbuf_base; list_for_each_entry(record, &mb->res_records, re_entry) { xdna_mbox_dump_ringbuf(x2i); xdna_mbox_dump_ringbuf(i2x); } - mutex_unlock(&mb->mbox_lock); + spin_unlock(&mb->mbox_lock); vfree(buf); return 0; @@ -805,13 +804,22 @@ int xdna_mailbox_ringbuf_show(struct mailbox *mb, struct seq_file *m) struct mailbox_channel * xdna_mailbox_create_channel(struct mailbox *mb, - const struct xdna_mailbox_chann_res *x2i, - const struct xdna_mailbox_chann_res *i2x, - u32 iohub_int_addr, int mb_irq, + struct xdna_mailbox_chann_info *info, enum xdna_mailbox_channel_type type) { + struct xdna_mailbox_chann_res *x2i = &info->x2i; + struct xdna_mailbox_chann_res *i2x = &info->i2x; + u32 iohub_int_addr = info->intr_reg; struct mailbox_channel *mb_chann; + u32 mb_irq; int ret; + + mb_irq = pci_irq_vector(to_pci_dev(mb->dev), info->msix_id); + if (mb_irq < 0) { + pr_err("failed to alloc irq vector %d", mb_irq); + return NULL; + } + #if defined(CONFIG_DEBUG_FS) struct mailbox_res_record *record; /* Record will be released when mailbox device destroy*/ @@ -874,7 +882,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, skip_irq: #endif mb_chann->bad_state = false; - mutex_lock(&mb->mbox_lock); + spin_lock(&mb->mbox_lock); if (mb_chann->type == MB_CHANNEL_USER_POLL) list_add_tail(&mb_chann->chann_entry, &mb->poll_chann_list); else @@ -883,7 +891,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, mb_chann->record = record; record->active = 1; #endif - mutex_unlock(&mb->mbox_lock); + spin_unlock(&mb->mbox_lock); MB_DBG(mb_chann, "Mailbox channel created type %d (irq: %d)", mb_chann->type, mb_chann->msix_irq); @@ -896,20 +904,20 @@ xdna_mailbox_create_channel(struct mailbox *mb, return NULL; } -int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) +void xdna_mailbox_release_channel(struct mailbox_channel *mb_chann) { struct mailbox_msg *mb_msg; unsigned long msg_id; if (!mb_chann) - return 0; + return; - mutex_lock(&mb_chann->mb->mbox_lock); + spin_lock(&mb_chann->mb->mbox_lock); list_del(&mb_chann->chann_entry); #if defined(CONFIG_DEBUG_FS) mb_chann->record->active = 0; #endif - mutex_unlock(&mb_chann->mb->mbox_lock); + spin_unlock(&mb_chann->mb->mbox_lock); #ifdef AMDXDNA_DEVEL if (MB_PERIODIC_POLL) @@ -926,42 +934,27 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) mailbox_release_msg(mb_chann, mb_msg); - xa_destroy(&mb_chann->chan_xa); - - MB_DBG(mb_chann, "Mailbox channel destroyed type %d irq: %d", + MB_DBG(mb_chann, "Mailbox channel released type %d irq: %d", mb_chann->type, mb_chann->msix_irq); - kfree(mb_chann); - return 0; } -void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) +void xdna_mailbox_free_channel(struct mailbox_channel *mb_chann) { - int retry; - if (!mb_chann) return; - if (mb_chann->type != MB_CHANNEL_MGMT) { - for (retry = 0; retry < CHANN_RX_RETRY; retry++) { - if (mailbox_channel_no_msg(mb_chann)) - break; - - msleep(CHANN_RX_INTERVAL); - } - - if (!mailbox_channel_no_msg(mb_chann)) { - MB_WARN_ONCE(mb_chann, "Channel (irq %d) exceeded maximum try", - mb_chann->msix_irq); - } - } + xa_destroy(&mb_chann->chan_xa); + kfree(mb_chann); +} +void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) +{ #ifdef AMDXDNA_DEVEL if (MB_PERIODIC_POLL) { timer_delete_sync(&mb_chann->timer); goto skip_irq; } #endif - /* Disable an irq and wait. This might sleep. */ disable_irq(mb_chann->msix_irq); @@ -973,6 +966,12 @@ void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); } +void xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann) +{ + xdna_mailbox_release_channel(mailbox_chann); + xdna_mailbox_free_channel(mailbox_chann); +} + struct mailbox *xdna_mailbox_create(struct device *dev, const struct xdna_mailbox_res *res) { @@ -986,7 +985,7 @@ struct mailbox *xdna_mailbox_create(struct device *dev, /* mailbox and ring buf base and size information */ memcpy(&mb->res, res, sizeof(*res)); - mutex_init(&mb->mbox_lock); + spin_lock_init(&mb->mbox_lock); INIT_LIST_HEAD(&mb->chann_list); INIT_LIST_HEAD(&mb->poll_chann_list); @@ -1029,10 +1028,9 @@ void xdna_mailbox_destroy(struct mailbox *mb) dev_dbg(mb->dev, "Stopping polld"); (void)kthread_stop(mb->polld); - mutex_lock(&mb->mbox_lock); + spin_lock(&mb->mbox_lock); WARN_ONCE(!list_empty(&mb->chann_list), "Channel not destroy"); - mutex_unlock(&mb->mbox_lock); + spin_unlock(&mb->mbox_lock); - mutex_destroy(&mb->mbox_lock); kfree(mb); } diff --git a/src/driver/amdxdna/amdxdna_mailbox.h b/src/driver/amdxdna/amdxdna_mailbox.h index 3b68cb9..de3f13f 100644 --- a/src/driver/amdxdna/amdxdna_mailbox.h +++ b/src/driver/amdxdna/amdxdna_mailbox.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #ifndef _AIE2_MAILBOX_H_ @@ -59,6 +59,21 @@ struct xdna_mailbox_chann_res { u32 mb_tail_ptr_reg; }; +/* + * xdna_mailbox_chann_info - channel information + * + * @x2i: host to firmware mailbox resources + * @i2x: firmware to host mailbox resources + * @intr_reg: register addr of MSI-X interrupt + * @msix_id: mailbox MSI-X interrupt vector index + */ +struct xdna_mailbox_chann_info { + struct xdna_mailbox_chann_res x2i; + struct xdna_mailbox_chann_res i2x; + u32 intr_reg; + u32 msix_id; +}; + /* * xdna_mailbox_create() -- create mailbox subsystem and initialize * @@ -88,36 +103,52 @@ enum xdna_mailbox_channel_type { * xdna_mailbox_create_channel() -- Create a mailbox channel instance * * @mailbox: the handle return from xdna_mailbox_create() - * @x2i: host to firmware mailbox resources - * @i2x: firmware to host mailbox resources - * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt - * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index - * @type: Type of channel + * @info: information to create a channel + * @type: type of channel * * Return: If success, return a handle of mailbox channel. Otherwise, return NULL. */ struct mailbox_channel * xdna_mailbox_create_channel(struct mailbox *mailbox, - const struct xdna_mailbox_chann_res *x2i, - const struct xdna_mailbox_chann_res *i2x, - u32 xdna_mailbox_intr_reg, - int mb_irq, enum xdna_mailbox_channel_type type); + struct xdna_mailbox_chann_info *info, + enum xdna_mailbox_channel_type type); + +/* + * xdna_mailbox_release_channel() -- release mailbox channel + * + * @mailbox_chann: the handle return from xdna_mailbox_create_channel() + * + * Release all resources, including messages, list entries, interrupt etc. + * After this function all, the channel is not functional at all. + * This is added for more complex synchronization secnario. + */ +void xdna_mailbox_release_channel(struct mailbox_channel *mailbox_chann); + +/* + * xdna_mailbox_free_channel() -- free mailbox channel + * + * @mailbox_chann: the handle return from xdna_mailbox_create_channel() + * + * Free all resources. This must be called after xdna_mailbox_release_channel(). + */ +void xdna_mailbox_free_channel(struct mailbox_channel *mailbox_chann); /* * xdna_mailbox_destroy_channel() -- destroy mailbox channel * * @mailbox_chann: the handle return from xdna_mailbox_create_channel() * - * Return: if success, return 0. otherwise return error code + * Destroy the channel, it release all the resources that the mailbox channel is + * holding and then free all the resources. */ -int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann); +void xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann); /* * xdna_mailbox_stop_channel() -- stop mailbox channel * * @mailbox_chann: the handle return from xdna_mailbox_create_channel() * - * Return: if success, return 0. otherwise return error code + * Stop receiving response and sending messages */ void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann); diff --git a/src/driver/amdxdna/amdxdna_pci_drv.c b/src/driver/amdxdna/amdxdna_pci_drv.c index 1aadb5c..250098f 100644 --- a/src/driver/amdxdna/amdxdna_pci_drv.c +++ b/src/driver/amdxdna/amdxdna_pci_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2025, Advanced Micro Devices, Inc. */ #include @@ -12,6 +12,9 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_sysfs.h" +#ifdef AMDXDNA_DEVEL +#include "amdxdna_devel.h" +#endif int autosuspend_ms = -1; module_param(autosuspend_ms, int, 0644); @@ -155,13 +158,16 @@ static void amdxdna_remove(struct pci_dev *pdev) struct amdxdna_client *client; destroy_workqueue(xdna->notifier_wq); + amdxdna_tdr_stop(&xdna->tdr); + amdxdna_sysfs_fini(xdna); pm_runtime_get_noresume(dev); pm_runtime_forbid(dev); +#ifdef AMDXDNA_DEVEL + amdxdna_gem_dump_mm(xdna); +#endif drm_dev_unplug(&xdna->ddev); - amdxdna_sysfs_fini(xdna); - amdxdna_tdr_stop(&xdna->tdr); mutex_lock(&xdna->dev_lock); client = list_first_entry_or_null(&xdna->client_list, diff --git a/src/driver/amdxdna/amdxdna_tdr.c b/src/driver/amdxdna/amdxdna_tdr.c index 1f7026e..6af305a 100644 --- a/src/driver/amdxdna/amdxdna_tdr.c +++ b/src/driver/amdxdna/amdxdna_tdr.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2024, Advanced Micro Devices, Inc. + * Copyright (C) 2024-2025, Advanced Micro Devices, Inc. */ #include "amdxdna_drm.h" @@ -10,8 +10,8 @@ uint timeout_in_sec = 2; module_param(timeout_in_sec, uint, 0644); MODULE_PARM_DESC(timeout_in_sec, "Seconds to timeout and recovery, default 2; 0 - No TDR"); -int tdr_dump_ctx = 0; -module_param(tdr_dump_ctx, int, 0644); +bool tdr_dump_ctx; +module_param(tdr_dump_ctx, bool, 0644); MODULE_PARM_DESC(tdr_dump_ctx, "Instead of resetting, just dump the ctx info for debugging"); #define TDR_TIMEOUT_JIFF msecs_to_jiffies(timeout_in_sec * 1000) @@ -26,14 +26,12 @@ static void amdxdna_tdr_work(struct work_struct *work) bool active = false; int idle_cnt = 0; int ctx_cnt = 0; - int idx; xdna = tdr_to_xdna_dev(tdr); mutex_lock(&xdna->dev_lock); list_for_each_entry(client, &xdna->client_list, node) { - idx = srcu_read_lock(&client->hwctx_srcu); amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { - if (hwctx->status != HWCTX_STATE_READY) + if (!FIELD_GET(HWCTX_STATE_READY, hwctx->status)) continue; u64 completed = hwctx->completed; /* To avoid race */ @@ -52,12 +50,8 @@ static void amdxdna_tdr_work(struct work_struct *work) hwctx->tdr_last_completed = completed; active = true; break; - } else { - // Mark ready ctx to be dead so to ignore it next time - hwctx->status = HWCTX_STATE_DEAD; } } - srcu_read_unlock(&client->hwctx_srcu, idx); if (active) break; }