From 37df9560cd3e7cb23f28ab04514f0a2ab08b3cea Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 20 Aug 2021 14:03:12 -0400 Subject: drm/amd/amdgpu: New debugfs interface for MMIO registers (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new debugfs interface uses an IOCTL interface in order to pass along state information like SRBM and GRBM bank switching. This new interface also allows a full 32-bit MMIO address range which the previous didn't. With this new design we have room to grow the flexibility of the file as need be. (v2): Move read/write to .read/.write, fix style, add comment for IOCTL data structure (v3): C style comments (v4): use u32 in struct and remove offset variable (v5): Drop flag clearing in op function, use 0xFFFFFFFF for broadcast instead of 0x3FF, use mutex for op/ioctl. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 152 ++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 277128846dd1..d256215ab2c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -36,6 +36,7 @@ #include "amdgpu_rap.h" #include "amdgpu_securedisplay.h" #include "amdgpu_fw_attestation.h" +#include "amdgpu_umr.h" int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev) { @@ -279,6 +280,145 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); } +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_regs2_data *rd; + + rd = kzalloc(sizeof *rd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + rd->adev = file_inode(file)->i_private; + file->private_data = rd; + mutex_init(&rd->lock); + + return 0; +} + +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_regs2_data *rd = file->private_data; + mutex_destroy(&rd->lock); + kfree(file->private_data); + return 0; +} + +static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 offset, size_t size, int write_en) +{ + struct amdgpu_debugfs_regs2_data *rd = f->private_data; + struct amdgpu_device *adev = rd->adev; + ssize_t result = 0; + int r; + uint32_t value; + + if (size & 0x3 || offset & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + mutex_lock(&rd->lock); + + if (rd->id.use_grbm) { + if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) || + (rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + mutex_unlock(&rd->lock); + return -EINVAL; + } + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, + rd->id.grbm.sh, + rd->id.grbm.instance); + } + + if (rd->id.use_srbm) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe, + rd->id.srbm.queue, rd->id.srbm.vmid); + } + + if (rd->id.pg_lock) + mutex_lock(&adev->pm.mutex); + + while (size) { + if (!write_en) { + value = RREG32(offset >> 2); + r = put_user(value, (uint32_t *)buf); + } else { + r = get_user(value, (uint32_t *)buf); + if (!r) + amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value); + } + if (r) { + result = r; + goto end; + } + offset += 4; + size -= 4; + result += 4; + buf += 4; + } +end: + if (rd->id.use_grbm) { + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + + if (rd->id.use_srbm) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + + if (rd->id.pg_lock) + mutex_unlock(&adev->pm.mutex); + + mutex_unlock(&rd->lock); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + amdgpu_virt_disable_access_debugfs(adev); + return result; +} + +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) +{ + struct amdgpu_debugfs_regs2_data *rd = f->private_data; + int r; + + switch (cmd) { + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: + mutex_lock(&rd->lock); + r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->id); + mutex_unlock(&rd->lock); + return r ? -EINVAL : 0; + default: + return -EINVAL; + } + return 0; +} + +static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos) +{ + return amdgpu_debugfs_regs2_op(f, buf, *pos, size, 0); +} + +static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) +{ + return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1); +} + /** * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register @@ -1091,6 +1231,16 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, return result; } +static const struct file_operations amdgpu_debugfs_regs2_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, + .read = amdgpu_debugfs_regs2_read, + .write = amdgpu_debugfs_regs2_write, + .open = amdgpu_debugfs_regs2_open, + .release = amdgpu_debugfs_regs2_release, + .llseek = default_llseek +}; + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, @@ -1148,6 +1298,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, + &amdgpu_debugfs_regs2_fops, &amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_smc_fops, @@ -1160,6 +1311,7 @@ static const struct file_operations *debugfs_regs[] = { static const char *debugfs_regs_names[] = { "amdgpu_regs", + "amdgpu_regs2", "amdgpu_regs_didt", "amdgpu_regs_pcie", "amdgpu_regs_smc", -- cgit v1.2.3 From 59715cffce19cfd4f7cffcf6d7ecc18478af8c4a Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 2 Sep 2021 13:27:56 +0200 Subject: drm/amdgpu: use IS_ERR for debugfs APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debugfs APIs returns encoded error so use IS_ERR for checking return value. v2: return PTR_ERR(ent) References: https://gitlab.freedesktop.org/drm/amd/-/issues/1686 Signed-off-by: Nirmoy Das Reviewed-by: Christian König Reviewed-By: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 ++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index d256215ab2c7..60f46a4b0144 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1696,20 +1696,18 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) struct dentry *ent; int r, i; - - ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); - if (!ent) { + if (IS_ERR(ent)) { DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); - return -EIO; + return PTR_ERR(ent); } ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev, &fops_sclk_set); - if (!ent) { + if (IS_ERR(ent)) { DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n"); - return -EIO; + return PTR_ERR(ent); } /* Register debugfs entries for amdgpu_ttm */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7b634a1517f9..0554576d3695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -428,8 +428,8 @@ int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ent = debugfs_create_file(name, S_IFREG | S_IRUGO, root, ring, &amdgpu_debugfs_ring_fops); - if (!ent) - return -ENOMEM; + if (IS_ERR(ent)) + return PTR_ERR(ent); i_size_write(ent->d_inode, ring->ring_size + 12); ring->ent = ent; -- cgit v1.2.3 From 62d266b2bd4afb216791d6eff8f3d65542fd4d16 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 2 Sep 2021 15:31:07 +0200 Subject: drm/amdgpu: cleanup debugfs for amdgpu rings Use debugfs_create_file_size API for creating ring debugfs, and as its a NULL returning API, change the return type for amdgpu_debugfs_ring_init API as well. Also cleanup surrounding code. Signed-off-by: Nirmoy Das Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 18 ++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 10 ++-------- 3 files changed, 9 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 60f46a4b0144..97d88f3e1c4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1734,9 +1734,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (!ring) continue; - if (amdgpu_debugfs_ring_init(adev, ring)) { - DRM_ERROR("Failed to register debugfs file for rings !\n"); - } + amdgpu_debugfs_ring_init(adev, ring); } amdgpu_ras_debugfs_create_all(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 0554576d3695..ab2351ba9574 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -415,26 +415,20 @@ static const struct file_operations amdgpu_debugfs_ring_fops = { #endif -int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, - struct amdgpu_ring *ring) +void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, + struct amdgpu_ring *ring) { #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; - struct dentry *ent, *root = minor->debugfs_root; + struct dentry *root = minor->debugfs_root; char name[32]; sprintf(name, "amdgpu_ring_%s", ring->name); + debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring, + &amdgpu_debugfs_ring_fops, + ring->ring_size + 12); - ent = debugfs_create_file(name, - S_IFREG | S_IRUGO, root, - ring, &amdgpu_debugfs_ring_fops); - if (IS_ERR(ent)) - return PTR_ERR(ent); - - i_size_write(ent->d_inode, ring->ring_size + 12); - ring->ent = ent; #endif - return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 88d80eb3fea1..4d380e79752c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -253,10 +253,6 @@ struct amdgpu_ring { bool has_compute_vm_bug; bool no_scheduler; int hw_prio; - -#if defined(CONFIG_DEBUG_FS) - struct dentry *ent; -#endif }; #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) @@ -356,8 +352,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, int amdgpu_ring_test_helper(struct amdgpu_ring *ring); -int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, - struct amdgpu_ring *ring); -void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring); - +void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif -- cgit v1.2.3 From 0fcfb30019d3e0b891a201e41262b926648c38b0 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Sat, 11 Sep 2021 09:41:29 +0800 Subject: drm/amdgpu: Fix a race of IB test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Direct IB submission should be exclusive. So use write lock. Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 97d88f3e1c4c..074ffcf0dac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1358,7 +1358,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) } /* Avoid accidently unparking the sched thread during GPU reset */ - r = down_read_killable(&adev->reset_sem); + r = down_write_killable(&adev->reset_sem); if (r) return r; @@ -1387,7 +1387,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) kthread_unpark(ring->sched.thread); } - up_read(&adev->reset_sem); + up_write(&adev->reset_sem); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); -- cgit v1.2.3 From 81d1bf01e4820962d6ea218ff5b9719e81e5812d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Jul 2021 14:53:37 -0400 Subject: drm/amdgpu: add debugfs access to the IP discovery table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Useful for debugging and new asic validation. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d356e329e6f8..cdb963b9bea0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -830,6 +830,7 @@ struct amdgpu_device { struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct debugfs_blob_wrapper debugfs_vbios_blob; + struct debugfs_blob_wrapper debugfs_discovery_blob; struct mutex srbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ struct mutex grbm_idx_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 074ffcf0dac2..31e16a42d4e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1757,6 +1757,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) debugfs_create_blob("amdgpu_vbios", 0444, root, &adev->debugfs_vbios_blob); + adev->debugfs_discovery_blob.data = adev->mman.discovery_bin; + adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size; + debugfs_create_blob("amdgpu_discovery", 0444, root, + &adev->debugfs_discovery_blob); + return 0; } -- cgit v1.2.3 From c8365dbda056578eebe164bf110816b1a39b4b7f Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Sep 2021 11:22:51 +0200 Subject: drm/amdgpu: revert "Add autodump debugfs node for gpu reset v8" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 728e7e0cd61899208e924472b9e641dbeb0775c4. Further discussion reveals that this feature is severely broken and needs to be reverted ASAP. GPU reset can never be delayed by userspace even for debugging or otherwise we can run into in kernel deadlocks. Signed-off-by: Christian König Acked-by: Alex Deucher Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 80 ----------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 5 -- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 -- 4 files changed, 91 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f4bceb2624fb..eb1c117e82b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1085,8 +1085,6 @@ struct amdgpu_device { char product_name[32]; char serial[20]; - struct amdgpu_autodump autodump; - atomic_t throttling_logging_enabled; struct ratelimit_state throttling_logging_rs; uint32_t ras_hw_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 31e16a42d4e1..6611b3c7c149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "amdgpu.h" #include "amdgpu_pm.h" @@ -38,85 +37,7 @@ #include "amdgpu_fw_attestation.h" #include "amdgpu_umr.h" -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev) -{ #if defined(CONFIG_DEBUG_FS) - unsigned long timeout = 600 * HZ; - int ret; - - wake_up_interruptible(&adev->autodump.gpu_hang); - - ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout); - if (ret == 0) { - pr_err("autodump: timeout, move on to gpu recovery\n"); - return -ETIMEDOUT; - } -#endif - return 0; -} - -#if defined(CONFIG_DEBUG_FS) - -static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file) -{ - struct amdgpu_device *adev = inode->i_private; - int ret; - - file->private_data = adev; - - ret = down_read_killable(&adev->reset_sem); - if (ret) - return ret; - - if (adev->autodump.dumping.done) { - reinit_completion(&adev->autodump.dumping); - ret = 0; - } else { - ret = -EBUSY; - } - - up_read(&adev->reset_sem); - - return ret; -} - -static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file) -{ - struct amdgpu_device *adev = file->private_data; - - complete_all(&adev->autodump.dumping); - return 0; -} - -static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table) -{ - struct amdgpu_device *adev = file->private_data; - - poll_wait(file, &adev->autodump.gpu_hang, poll_table); - - if (amdgpu_in_reset(adev)) - return POLLIN | POLLRDNORM | POLLWRNORM; - - return 0; -} - -static const struct file_operations autodump_debug_fops = { - .owner = THIS_MODULE, - .open = amdgpu_debugfs_autodump_open, - .poll = amdgpu_debugfs_autodump_poll, - .release = amdgpu_debugfs_autodump_release, -}; - -static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev) -{ - init_completion(&adev->autodump.dumping); - complete_all(&adev->autodump.dumping); - init_waitqueue_head(&adev->autodump.gpu_hang); - - debugfs_create_file("amdgpu_autodump", 0600, - adev_to_drm(adev)->primary->debugfs_root, - adev, &autodump_debug_fops); -} /** * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes @@ -1738,7 +1659,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) } amdgpu_ras_debugfs_create_all(adev); - amdgpu_debugfs_autodump_init(adev); amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 6d4965b2d01e..371a6f0deb29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -25,10 +25,6 @@ /* * Debugfs */ -struct amdgpu_autodump { - struct completion dumping; - struct wait_queue_head gpu_hang; -}; int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev); @@ -36,4 +32,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev); void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 221520f375d4..0207b25c2e6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4440,10 +4440,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (reset_context->reset_req_dev == adev) job = reset_context->job; - /* no need to dump if device is not in good state during probe period */ - if (!adev->gmc.xgmi.pending_reset) - amdgpu_debugfs_wait_dump(adev); - if (amdgpu_sriov_vf(adev)) { /* stop the data exchange thread */ amdgpu_virt_fini_data_exchange(adev); -- cgit v1.2.3 From 5b9581df9f17b3e356d67735a07da97ba8e1fdd0 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 6 Oct 2021 11:06:33 +0200 Subject: drm/amdgpu: return early if debugfs is not initialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check first if debugfs is initialized before creating amdgpu debugfs files. References: https://gitlab.freedesktop.org/drm/amd/-/issues/1686 Signed-off-by: Nirmoy Das Reviewed-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6611b3c7c149..5497e2d31d1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1617,6 +1617,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) struct dentry *ent; int r, i; + if (!debugfs_initialized()) + return 0; + ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); if (IS_ERR(ent)) { -- cgit v1.2.3 From 58144d283712c9e80e528e001af6ac5aeee71af2 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 6 Oct 2021 17:55:00 +0200 Subject: drm/amdgpu: unify BO evicting method in amdgpu_ttm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify BO evicting functionality for possible memory types in amdgpu_ttm.c. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 30 +++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 23 ---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 30 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + 6 files changed, 58 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5497e2d31d1a..164d6a9e9fbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1328,7 +1328,7 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val) return r; } - *val = amdgpu_bo_evict_vram(adev); + *val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -1341,17 +1341,15 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val) { struct amdgpu_device *adev = (struct amdgpu_device *)data; struct drm_device *dev = adev_to_drm(adev); - struct ttm_resource_manager *man; int r; r = pm_runtime_get_sync(dev->dev); if (r < 0) { - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(dev->dev); return r; } - man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - *val = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + *val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cda04918fd55..5ea36c1951f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3885,6 +3885,25 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) } +/** + * amdgpu_device_evict_resources - evict device resources + * @adev: amdgpu device object + * + * Evicts all ttm device resources(vram BOs, gart table) from the lru list + * of the vram memory type. Mainly used for evicting device resources + * at suspend time. + * + */ +static void amdgpu_device_evict_resources(struct amdgpu_device *adev) +{ + /* No need to evict vram on APUs for suspend to ram */ + if (adev->in_s3 && (adev->flags & AMD_IS_APU)) + return; + + if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM)) + DRM_WARN("evicting device resources failed\n"); + +} /* * Suspend & resume. @@ -3925,17 +3944,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - /* evict vram memory */ - amdgpu_bo_evict_vram(adev); + /* First evict vram memory */ + amdgpu_device_evict_resources(adev); amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); - /* evict remaining vram memory - * This second call to evict vram is to evict the gart page table - * using the CPU. + /* This second call to evict device resources is to evict + * the gart page table using the CPU. */ - amdgpu_bo_evict_vram(adev); + amdgpu_device_evict_resources(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 32f36992291e..aeb92e5677ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1004,29 +1004,6 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } } -/** - * amdgpu_bo_evict_vram - evict VRAM buffers - * @adev: amdgpu device object - * - * Evicts all VRAM buffers on the lru list of the memory type. - * Mainly used for evicting vram at suspend time. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_evict_vram(struct amdgpu_device *adev) -{ - struct ttm_resource_manager *man; - - if (adev->in_s3 && (adev->flags & AMD_IS_APU)) { - /* No need to evict vram on APUs for suspend to ram */ - return 0; - } - - man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - return ttm_resource_manager_evict_all(&adev->mman.bdev, man); -} - static const char *amdgpu_vram_names[] = { "UNKNOWN", "GDDR1", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d2cbabaf2a46..4c9cbdc66995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -304,7 +304,6 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 min_offset, u64 max_offset); void amdgpu_bo_unpin(struct amdgpu_bo *bo); -int amdgpu_bo_evict_vram(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 820fcb24231f..5d5c42ae388b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2036,6 +2036,36 @@ error_free: return r; } +/** + * amdgpu_ttm_evict_resources - evict memory buffers + * @adev: amdgpu device object + * @mem_type: evicted BO's memory type + * + * Evicts all @mem_type buffers on the lru list of the memory type. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) +{ + struct ttm_resource_manager *man; + + switch (mem_type) { + case TTM_PL_VRAM: + case TTM_PL_TT: + case AMDGPU_PL_GWS: + case AMDGPU_PL_GDS: + case AMDGPU_PL_OA: + man = ttm_manager_type(&adev->mman.bdev, mem_type); + break; + default: + DRM_ERROR("Trying to evict invalid memory type\n"); + return -EINVAL; + } + + return ttm_resource_manager_evict_all(&adev->mman.bdev, man); +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 3205fd520060..639c7b41e30b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -190,6 +190,7 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_resource *mem); +int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type); void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); -- cgit v1.2.3