summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c178
1 files changed, 97 insertions, 81 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 41c6b3aacd37..90d22a376632 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -125,6 +125,7 @@ const char *amdgpu_asic_name[] = {
"DIMGREY_CAVEFISH",
"BEIGE_GOBY",
"YELLOW_CARP",
+ "IP DISCOVERY",
"LAST",
};
@@ -305,7 +306,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
uint64_t last;
int idx;
- if (!drm_dev_enter(&adev->ddev, &idx))
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
return;
BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
@@ -2126,46 +2127,11 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_ALDEBARAN:
- if (adev->flags & AMD_IS_APU)
- adev->family = AMDGPU_FAMILY_RV;
- else
- adev->family = AMDGPU_FAMILY_AI;
-
- r = soc15_set_ip_blocks(adev);
- if (r)
- return r;
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_VANGOGH:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- if (adev->asic_type == CHIP_VANGOGH)
- adev->family = AMDGPU_FAMILY_VGH;
- else if (adev->asic_type == CHIP_YELLOW_CARP)
- adev->family = AMDGPU_FAMILY_YC;
- else
- adev->family = AMDGPU_FAMILY_NV;
-
- r = nv_set_ip_blocks(adev);
+ default:
+ r = amdgpu_discovery_set_ip_blocks(adev);
if (r)
return r;
break;
- default:
- /* FIXME: not supported yet */
- return -EINVAL;
}
amdgpu_amdkfd_device_probe(adev);
@@ -2741,6 +2707,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
}
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_release_full_gpu(adev, false))
+ DRM_ERROR("failed to release exclusive mode on fini\n");
+ }
+
return 0;
}
@@ -2801,10 +2772,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_ras_fini(adev);
- if (amdgpu_sriov_vf(adev))
- if (amdgpu_virt_release_full_gpu(adev, false))
- DRM_ERROR("failed to release exclusive mode on fini\n");
-
return 0;
}
@@ -3148,6 +3115,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
{
int r;
+ r = amdgpu_amdkfd_resume_iommu(adev);
+ if (r)
+ return r;
+
r = amdgpu_device_ip_resume_phase1(adev);
if (r)
return r;
@@ -3196,11 +3167,21 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
{
switch (asic_type) {
#if defined(CONFIG_DRM_AMD_DC)
-#if defined(CONFIG_DRM_AMD_DC_SI)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
+ /*
+ * We have systems in the wild with these ASICs that require
+ * LVDS and VGA support which is not supported with DC.
+ *
+ * Fallback to the non-DC driver here by default so as not to
+ * cause regressions.
+ */
+#if defined(CONFIG_DRM_AMD_DC_SI)
+ return amdgpu_dc > 0;
+#else
+ return false;
#endif
case CHIP_BONAIRE:
case CHIP_KAVERI:
@@ -3232,6 +3213,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_RENOIR:
+ case CHIP_CYAN_SKILLFISH:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
@@ -3239,13 +3221,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_VANGOGH:
case CHIP_YELLOW_CARP:
#endif
+ default:
return amdgpu_dc != 0;
-#endif
+#else
default:
if (amdgpu_dc > 0)
DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
"but isn't supported by ASIC, ignoring\n");
return false;
+#endif
}
}
@@ -3346,6 +3330,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
continue;
} else if (timeout < 0) {
timeout = MAX_SCHEDULE_TIMEOUT;
+ dev_warn(adev->dev, "lockup timeout disabled");
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
} else {
timeout = msecs_to_jiffies(timeout);
}
@@ -3523,6 +3509,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->rmmio_size = pci_resource_len(adev->pdev, 2);
}
+ for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
+ atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
+
adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
if (adev->rmmio == NULL) {
return -ENOMEM;
@@ -3530,17 +3519,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
- /* enable PCIE atomic ops */
- r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
- if (r) {
- adev->have_atomics_support = false;
- DRM_INFO("PCIE atomic ops is not supported\n");
- } else {
- adev->have_atomics_support = true;
- }
-
amdgpu_device_get_pcie_info(adev);
if (amdgpu_mcbp)
@@ -3563,6 +3541,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
+ /* enable PCIE atomic ops */
+ if (amdgpu_sriov_vf(adev))
+ adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
+ adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+ (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ else
+ adev->have_atomics_support =
+ !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (!adev->have_atomics_support)
+ dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
@@ -3696,8 +3687,6 @@ fence_driver_init:
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- amdgpu_fbdev_init(adev);
-
r = amdgpu_pm_sysfs_init(adev);
if (r) {
adev->pm_sysfs_en = false;
@@ -3842,7 +3831,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){
- if (!amdgpu_device_has_dc_support(adev))
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
drm_helper_force_disable_all(adev_to_drm(adev));
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
@@ -3855,11 +3844,11 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_ucode_sysfs_fini(adev);
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
- amdgpu_fbdev_fini(adev);
+ amdgpu_device_ip_fini_early(adev);
amdgpu_irq_fini_hw(adev);
- amdgpu_device_ip_fini_early(adev);
+ ttm_device_clear_dma_mappings(&adev->mman.bdev);
amdgpu_gart_dummy_page_fini(adev);
@@ -3868,8 +3857,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
- amdgpu_device_ip_fini(adev);
amdgpu_fence_driver_sw_fini(adev);
+ amdgpu_device_ip_fini(adev);
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
@@ -3901,6 +3890,25 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
}
+/**
+ * amdgpu_device_evict_resources - evict device resources
+ * @adev: amdgpu device object
+ *
+ * Evicts all ttm device resources(vram BOs, gart table) from the lru list
+ * of the vram memory type. Mainly used for evicting device resources
+ * at suspend time.
+ *
+ */
+static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+{
+ /* No need to evict vram on APUs for suspend to ram */
+ if (adev->in_s3 && (adev->flags & AMD_IS_APU))
+ return;
+
+ if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
+ DRM_WARN("evicting device resources failed\n");
+
+}
/*
* Suspend & resume.
@@ -3930,7 +3938,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
drm_kms_helper_poll_disable(dev);
if (fbcon)
- amdgpu_fbdev_set_suspend(adev, 1);
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
cancel_delayed_work_sync(&adev->delayed_init_work);
@@ -3941,17 +3949,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
- /* evict vram memory */
- amdgpu_bo_evict_vram(adev);
+ /* First evict vram memory */
+ amdgpu_device_evict_resources(adev);
amdgpu_fence_driver_hw_fini(adev);
amdgpu_device_ip_suspend_phase2(adev);
- /* evict remaining vram memory
- * This second call to evict vram is to evict the gart page table
- * using the CPU.
+ /* This second call to evict device resources is to evict
+ * the gart page table using the CPU.
*/
- amdgpu_bo_evict_vram(adev);
+ amdgpu_device_evict_resources(adev);
return 0;
}
@@ -4008,7 +4015,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
flush_delayed_work(&adev->delayed_init_work);
if (fbcon)
- amdgpu_fbdev_set_suspend(adev, 0);
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
drm_kms_helper_poll_enable(dev);
@@ -4278,6 +4285,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
{
int r;
+ amdgpu_amdkfd_pre_reset(adev);
+
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
@@ -4285,8 +4294,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_amdkfd_pre_reset(adev);
-
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
@@ -4307,7 +4314,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
- amdgpu_amdkfd_post_reset(adev);
error:
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
@@ -4458,10 +4464,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
- /* no need to dump if device is not in good state during probe period */
- if (!adev->gmc.xgmi.pending_reset)
- amdgpu_debugfs_wait_dump(adev);
-
if (amdgpu_sriov_vf(adev)) {
/* stop the data exchange thread */
amdgpu_virt_fini_data_exchange(adev);
@@ -4601,6 +4603,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
dev_warn(tmp_adev->dev, "asic atom init failed!");
} else {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
+ r = amdgpu_amdkfd_resume_iommu(tmp_adev);
+ if (r)
+ goto out;
+
r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r)
goto out;
@@ -4640,7 +4646,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
goto out;
- amdgpu_fbdev_set_suspend(tmp_adev, 0);
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
/*
* The GPU enters bad state once faulty pages
@@ -4848,6 +4854,9 @@ static void amdgpu_device_recheck_guilty_jobs(
/* clear job's guilty and depend the folowing step to decide the real one */
drm_sched_reset_karma(s_job);
+ /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
+ * to make sure fence is balanced */
+ dma_fence_get(s_job->s_fence->parent);
drm_sched_resubmit_jobs_ext(&ring->sched, 1);
ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
@@ -4883,6 +4892,7 @@ retry:
/* got the hw fence, signal finished fence */
atomic_dec(ring->sched.score);
+ dma_fence_put(s_job->s_fence->parent);
dma_fence_get(&s_job->s_fence->finished);
dma_fence_signal(&s_job->s_fence->finished);
dma_fence_put(&s_job->s_fence->finished);
@@ -5027,7 +5037,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- amdgpu_fbdev_set_suspend(tmp_adev, 1);
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
/* disable ras on ALL IPs */
if (!need_emergency_restart &&
@@ -5077,7 +5087,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
/* Actual ASIC resets if needed.*/
- /* TODO Implement XGMI hive reset logic for SRIOV */
+ /* Host driver will handle XGMI hive reset for SRIOV */
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_device_reset_sriov(adev, job ? false : true);
if (r)
@@ -5118,7 +5128,7 @@ skip_hw_reset:
drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
}
- if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
}
@@ -5139,7 +5149,7 @@ skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
/* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
- amdgpu_amdkfd_post_reset(tmp_adev);
+ amdgpu_amdkfd_post_reset(tmp_adev);
/* kfd_post_reset will do nothing if kfd device is not initialized,
* need to bring up kfd here if it's not be initialized before
@@ -5387,6 +5397,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
return PCI_ERS_RESULT_DISCONNECT;
}
+ adev->pci_channel_state = state;
+
switch (state) {
case pci_channel_io_normal:
return PCI_ERS_RESULT_CAN_RECOVER;
@@ -5529,6 +5541,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
DRM_INFO("PCI error: resume callback!!\n");
+ /* Only continue execution for the case of pci_channel_io_frozen */
+ if (adev->pci_channel_state != pci_channel_io_frozen)
+ return;
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];