diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r-- | drivers/gpu/drm/i915/gvt/cmd_parser.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gtt.c | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gtt.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/handlers.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/kvmgt.c | 73 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/mmio_context.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.c | 108 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/trace.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/vgpu.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_pmu.c | 32 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_sysfs.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ddi.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_hangcheck.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 21 |
19 files changed, 295 insertions, 87 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c8454ac43fae..db6b94dda5df 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -471,6 +471,7 @@ struct parser_exec_state { * used when ret from 2nd level batch buffer */ int saved_buf_addr_type; + bool is_ctx_wa; struct cmd_info *info; @@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s) bb->accessing = true; bb->bb_start_cmd_va = s->ip_va; + if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa)) + bb->bb_offset = s->ip_va - s->rb_va; + else + bb->bb_offset = 0; + /* * ip_va saves the virtual address of the shadow batch buffer, while * ip_gma saves the graphics address of the original batch buffer. @@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload) s.ring_tail = gma_tail; s.rb_va = workload->shadow_ring_buffer_va; s.workload = workload; + s.is_ctx_wa = false; if ((bypass_scan_mask & (1 << workload->ring_id)) || gma_head == gma_tail) @@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.ring_tail = gma_tail; s.rb_va = wa_ctx->indirect_ctx.shadow_va; s.workload = workload; + s.is_ctx_wa = true; if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 0a100a288e6d..d29281231507 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2046,7 +2046,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) } if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) - gvt_err("vgpu ppgtt mm is not fully destoried\n"); + gvt_err("vgpu ppgtt mm is not fully destroyed\n"); if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { gvt_err("Why we still has spt not freed?\n"); @@ -2291,6 +2291,28 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt) } /** + * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances + * @vgpu: a vGPU + * + * This function is called when invalidate all PPGTT instances of a vGPU. + * + */ +void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) +{ + struct list_head *pos, *n; + struct intel_vgpu_mm *mm; + + list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); + if (mm->type == INTEL_GVT_MM_PPGTT) { + list_del_init(&mm->ppgtt_mm.lru_list); + if (mm->ppgtt_mm.shadowed) + invalidate_ppgtt_mm(mm); + } + } +} + +/** * intel_vgpu_reset_ggtt - reset the GGTT entry * @vgpu: a vGPU * diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index e831507e17c3..a8b369cd352b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -194,6 +194,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); +void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 112f2ec7c25f..8c5d5d005854 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1767,6 +1767,10 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(CURBASE(PIPE_B), D_ALL); MMIO_D(CURBASE(PIPE_C), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_A), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_B), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_C), D_ALL); + MMIO_D(_MMIO(0x700ac), D_ALL); MMIO_D(_MMIO(0x710ac), D_ALL); MMIO_D(_MMIO(0x720ac), D_ALL); @@ -2228,6 +2232,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL); MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL); + MMIO_D(HSW_AUD_MISC_CTRL(PIPE_A), D_ALL); MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL); MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL); @@ -2559,6 +2564,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(WM_MISC, D_BDW); MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); + MMIO_D(_MMIO(0x6671c), D_BDW_PLUS); MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); MMIO_D(_MMIO(0x66c04), D_BDW_PLUS); @@ -2787,6 +2793,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x70380), D_SKL_PLUS); MMIO_D(_MMIO(0x71380), D_SKL_PLUS); MMIO_D(_MMIO(0x72380), D_SKL_PLUS); + MMIO_D(_MMIO(0x7239c), D_SKL_PLUS); MMIO_D(_MMIO(0x7039c), D_SKL_PLUS); MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL); @@ -2801,7 +2808,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_D(RPM_CONFIG0, D_SKL_PLUS); MMIO_D(_MMIO(0xd08), D_SKL_PLUS); + MMIO_D(RC6_LOCATION, D_SKL_PLUS); MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL); MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 8a428678e4b5..c16a492449d7 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -184,7 +184,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) return NULL; } -static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, +static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, dma_addr_t dma_addr) { struct gvt_dma *new, *itr; @@ -192,7 +192,7 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) - return; + return -ENOMEM; new->vgpu = vgpu; new->gfn = gfn; @@ -229,6 +229,7 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); vgpu->vdev.nr_cache_entries++; + return 0; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, @@ -749,6 +750,25 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, return ret == 0 ? count : ret; } +static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) +{ + struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + struct intel_gvt *gvt = vgpu->gvt; + int offset; + + /* Only allow MMIO GGTT entry access */ + if (index != PCI_BASE_ADDRESS_0) + return false; + + offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) - + intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); + + return (offset >= gvt->device_info.gtt_start_offset && + offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ? + true : false; +} + static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, size_t count, loff_t *ppos) { @@ -758,7 +778,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, while (count) { size_t filled; - if (count >= 4 && !(*ppos % 4)) { + /* Only support GGTT entry 8 bytes read */ + if (count >= 8 && !(*ppos % 8) && + gtt_entry(mdev, ppos)) { + u64 val; + + ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 8; + } else if (count >= 4 && !(*ppos % 4)) { u32 val; ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), @@ -818,7 +852,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, while (count) { size_t filled; - if (count >= 4 && !(*ppos % 4)) { + /* Only support GGTT entry 8 bytes write */ + if (count >= 8 && !(*ppos % 8) && + gtt_entry(mdev, ppos)) { + u64 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ppos, true); + if (ret <= 0) + goto write_err; + + filled = 8; + } else if (count >= 4 && !(*ppos % 4)) { u32 val; if (copy_from_user(&val, buf, sizeof(val))) @@ -1586,11 +1634,12 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { ret = gvt_dma_map_page(vgpu, gfn, dma_addr); - if (ret) { - mutex_unlock(&info->vgpu->vdev.cache_lock); - return ret; - } - __gvt_cache_add(info->vgpu, gfn, *dma_addr); + if (ret) + goto err_unlock; + + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + if (ret) + goto err_unmap; } else { kref_get(&entry->ref); *dma_addr = entry->dma_addr; @@ -1598,6 +1647,12 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, mutex_unlock(&info->vgpu->vdev.cache_lock); return 0; + +err_unmap: + gvt_dma_unmap_page(vgpu, gfn, *dma_addr); +err_unlock: + mutex_unlock(&info->vgpu->vdev.cache_lock); + return ret; } static void __gvt_dma_release(struct kref *ref) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 74a9c7b5516e..a5bac83d53a9 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -120,6 +120,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ + {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */ @@ -557,9 +558,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, * performace for batch mmio read/write, so we need * handle forcewake mannually. */ + intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); switch_mmio(pre, next, ring_id); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_runtime_pm_put(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9b92b4e25a20..638abe84857c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -52,6 +52,77 @@ static void set_context_pdp_root_pointer( pdp_pair[i].val = pdp[7 - i]; } +static void update_shadow_pdps(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + int ring_id = workload->ring_id; + struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; + struct drm_i915_gem_object *ctx_obj = + shadow_ctx->engine[ring_id].state->obj; + struct execlist_ring_context *shadow_ring_context; + struct page *page; + + if (WARN_ON(!workload->shadow_mm)) + return; + + if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) + return; + + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + shadow_ring_context = kmap(page); + set_context_pdp_root_pointer(shadow_ring_context, + (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); + kunmap(page); +} + +/* + * when populating shadow ctx from guest, we should not overrride oa related + * registers, so that they will not be overlapped by guest oa configs. Thus + * made it possible to capture oa data from host for both host and guests. + */ +static void sr_oa_regs(struct intel_vgpu_workload *workload, + u32 *reg_state, bool save) +{ + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; + u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; + int i = 0; + u32 flex_mmio[] = { + i915_mmio_reg_offset(EU_PERF_CNTL0), + i915_mmio_reg_offset(EU_PERF_CNTL1), + i915_mmio_reg_offset(EU_PERF_CNTL2), + i915_mmio_reg_offset(EU_PERF_CNTL3), + i915_mmio_reg_offset(EU_PERF_CNTL4), + i915_mmio_reg_offset(EU_PERF_CNTL5), + i915_mmio_reg_offset(EU_PERF_CNTL6), + }; + + if (!workload || !reg_state || workload->ring_id != RCS) + return; + + if (save) { + workload->oactxctrl = reg_state[ctx_oactxctrl + 1]; + + for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { + u32 state_offset = ctx_flexeu0 + i * 2; + + workload->flex_mmio[i] = reg_state[state_offset + 1]; + } + } else { + reg_state[ctx_oactxctrl] = + i915_mmio_reg_offset(GEN8_OACTXCONTROL); + reg_state[ctx_oactxctrl + 1] = workload->oactxctrl; + + for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { + u32 state_offset = ctx_flexeu0 + i * 2; + u32 mmio = flex_mmio[i]; + + reg_state[state_offset] = mmio; + reg_state[state_offset + 1] = workload->flex_mmio[i]; + } + } +} + static int populate_shadow_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -98,11 +169,18 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); shadow_ring_context = kmap(page); + sr_oa_regs(workload, (u32 *)shadow_ring_context, true); #define COPY_REG(name) \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) +#define COPY_REG_MASKED(name) {\ + intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + + RING_CTX_OFF(name.val),\ + &shadow_ring_context->name.val, 4);\ + shadow_ring_context->name.val |= 0xffff << 16;\ + } - COPY_REG(ctx_ctrl); + COPY_REG_MASKED(ctx_ctrl); COPY_REG(ctx_timestamp); if (ring_id == RCS) { @@ -111,9 +189,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG(rcs_indirect_ctx_offset); } #undef COPY_REG - - set_context_pdp_root_pointer(shadow_ring_context, - (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); +#undef COPY_REG_MASKED intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa + @@ -122,6 +198,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + sr_oa_regs(workload, (u32 *)shadow_ring_context, false); kunmap(page); return 0; } @@ -381,6 +458,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) goto err; } + /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va + * is only updated into ring_scan_buffer, not real ring address + * allocated in later copy_workload_to_ring_buffer. pls be noted + * shadow_ring_buffer_va is now pointed to real ring buffer va + * in copy_workload_to_ring_buffer. + */ + + if (bb->bb_offset) + bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + + bb->bb_offset; + /* relocate shadow batch buffer */ bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); if (gmadr_bytes == 8) @@ -509,6 +597,8 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + update_shadow_pdps(workload); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); @@ -1049,10 +1139,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); - s->workloads = kmem_cache_create("gvt-g_vgpu_workload", - sizeof(struct intel_vgpu_workload), 0, - SLAB_HWCACHE_ALIGN, - NULL); + s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", + sizeof(struct intel_vgpu_workload), 0, + SLAB_HWCACHE_ALIGN, + offsetof(struct intel_vgpu_workload, rb_tail), + sizeof_field(struct intel_vgpu_workload, rb_tail), + NULL); if (!s->workloads) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index bab4097aa6d7..486ed57a4ad1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -110,6 +110,10 @@ struct intel_vgpu_workload { /* shadow batch buffer */ struct list_head shadow_bb; struct intel_shadow_wa_ctx wa_ctx; + + /* oa registers */ + u32 oactxctrl; + u32 flex_mmio[7]; }; struct intel_vgpu_shadow_bb { @@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb { u32 *bb_start_cmd_va; unsigned int clflush; bool accessing; + unsigned long bb_offset; }; #define workload_q_head(vgpu, ring_id) \ diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index fc7831a62121..82093f1e8612 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -333,7 +333,7 @@ TRACE_EVENT(render_mmio, TP_PROTO(int old_id, int new_id, char *action, unsigned int reg, unsigned int old_val, unsigned int new_val), - TP_ARGS(old_id, new_id, action, reg, new_val, old_val), + TP_ARGS(old_id, new_id, action, reg, old_val, new_val), TP_STRUCT__entry( __field(int, old_id) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 41f76e86aa1f..2e0a02a80fe4 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -522,6 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); + intel_vgpu_invalidate_ppgtt(vgpu); /*fence will not be reset during virtual reset */ if (dmlr) { intel_vgpu_reset_gtt(vgpu); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d7c4de45644d..07c07d55398b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1611,15 +1611,12 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; - bool fw_csr; int ret; disable_rpm_wakeref_asserts(dev_priv); intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && - suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually * deinit the power domains. This also means the CSR/DMC firmware will @@ -1627,8 +1624,11 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) * also enable deeper system power states that would be blocked if the * firmware was inactive. */ - if (!fw_csr) + if (IS_GEN9_LP(dev_priv) || hibernation || !suspend_to_idle(dev_priv) || + dev_priv->csr.dmc_payload == NULL) { intel_power_domains_suspend(dev_priv); + dev_priv->power_domains_suspended = true; + } ret = 0; if (IS_GEN9_LP(dev_priv)) @@ -1640,8 +1640,10 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) if (ret) { DRM_ERROR("Suspend complete failed: %d\n", ret); - if (!fw_csr) + if (dev_priv->power_domains_suspended) { intel_power_domains_init_hw(dev_priv, true); + dev_priv->power_domains_suspended = false; + } goto out; } @@ -1662,8 +1664,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) if (!(hibernation && INTEL_GEN(dev_priv) < 6)) pci_set_power_state(pdev, PCI_D3hot); - dev_priv->suspended_to_idle = suspend_to_idle(dev_priv); - out: enable_rpm_wakeref_asserts(dev_priv); @@ -1830,8 +1830,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_resume_early(dev_priv); if (IS_GEN9_LP(dev_priv)) { - if (!dev_priv->suspended_to_idle) - gen9_sanitize_dc_state(dev_priv); + gen9_sanitize_dc_state(dev_priv); bxt_disable_dc9(dev_priv); } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { hsw_disable_pc8(dev_priv); @@ -1839,8 +1838,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_sanitize(dev_priv); - if (IS_GEN9_LP(dev_priv) || - !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) + if (dev_priv->power_domains_suspended) intel_power_domains_init_hw(dev_priv, true); else intel_display_set_init_power(dev_priv, true); @@ -1850,7 +1848,7 @@ static int i915_drm_resume_early(struct drm_device *dev) enable_rpm_wakeref_asserts(dev_priv); out: - dev_priv->suspended_to_idle = false; + dev_priv->power_domains_suspended = false; return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6e740f6fe33f..ce18b6cf6e68 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2119,7 +2119,7 @@ struct drm_i915_private { u32 bxt_phy_grc; u32 suspend_count; - bool suspended_to_idle; + bool power_domains_suspended; struct i915_suspend_saved_registers regfile; struct vlv_s0ix_state vlv_s0ix_state; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a5bd07338b46..7b5a9d7c9593 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -433,20 +433,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, dma_fence_put(shared[i]); kfree(shared); + /* + * If both shared fences and an exclusive fence exist, + * then by construction the shared fences must be later + * than the exclusive fence. If we successfully wait for + * all the shared fences, we know that the exclusive fence + * must all be signaled. If all the shared fences are + * signaled, we can prune the array and recover the + * floating references on the fences/requests. + */ prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) { + if (excl && timeout >= 0) timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps_client); - prune_fences = timeout >= 0; - } dma_fence_put(excl); - /* Oportunistically prune the fences iff we know they have *all* been + /* + * Opportunistically prune the fences iff we know they have *all* been * signaled and that the reservation object has not been changed (i.e. * no new fences have been added). */ @@ -471,10 +479,11 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - if (!engine->schedule) - return; - engine->schedule(rq, prio); + rcu_read_lock(); + if (engine->schedule) + engine->schedule(rq, prio); + rcu_read_unlock(); } static void fence_set_priority(struct dma_fence *fence, int prio) @@ -2939,8 +2948,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) * calling engine->init_hw() and also writing the ELSP. * Turning off the execlists->tasklet until the reset is over * prevents the race. + * + * Note that this needs to be a single atomic operation on the + * tasklet (flush existing tasks, prevent new tasks) to prevent + * a race between reset and set-wedged. It is not, so we do the best + * we can atm and make sure we don't lock the machine up in the more + * common case of recursively being called from set-wedged from inside + * i915_reset. */ - tasklet_kill(&engine->execlists.tasklet); + if (!atomic_read(&engine->execlists.tasklet.count)) + tasklet_kill(&engine->execlists.tasklet); tasklet_disable(&engine->execlists.tasklet); /* @@ -3214,8 +3231,11 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) */ for_each_engine(engine, i915, id) { i915_gem_reset_prepare_engine(engine); + engine->submit_request = nop_submit_request; + engine->schedule = NULL; } + i915->caps.scheduler = 0; /* * Make sure no one is running the old callback before we proceed with @@ -3233,11 +3253,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * start to complete all requests. */ engine->submit_request = nop_complete_submit_request; - engine->schedule = NULL; } - i915->caps.scheduler = 0; - /* * Make sure no request can slip through without getting completed by * either this call here to intel_engine_init_global_seqno, or the one diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 964467b03e4d..d8feb9053e0c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -433,7 +433,7 @@ static u64 __get_rc6(struct drm_i915_private *i915) return val; } -static u64 get_rc6(struct drm_i915_private *i915, bool locked) +static u64 get_rc6(struct drm_i915_private *i915) { #if IS_ENABLED(CONFIG_PM) unsigned long flags; @@ -449,8 +449,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * previously. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&i915->pmu.lock, flags); if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; @@ -459,12 +458,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; } - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } else { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - unsigned long flags2; /* * We are runtime suspended. @@ -473,10 +470,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * on top of the last known real value, as the approximated RC6 * counter value. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); - - spin_lock_irqsave(&kdev->power.lock, flags2); + spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock(&kdev->power.lock); if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) i915->pmu.suspended_jiffies_last = @@ -486,14 +481,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) i915->pmu.suspended_jiffies_last; val += jiffies - kdev->power.accounting_timestamp; - spin_unlock_irqrestore(&kdev->power.lock, flags2); + spin_unlock(&kdev->power.lock); val = jiffies_to_nsecs(val); val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } return val; @@ -502,7 +496,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) #endif } -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) +static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); @@ -540,7 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) val = count_interrupts(i915); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(i915, locked); + val = get_rc6(i915); break; } } @@ -555,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event) again: prev = local64_read(&hwc->prev_count); - new = __i915_pmu_event_read(event, false); + new = __i915_pmu_event_read(event); if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) goto again; @@ -605,14 +599,14 @@ static void i915_pmu_enable(struct perf_event *event) engine->pmu.enable_count[sample]++; } + spin_unlock_irqrestore(&i915->pmu.lock, flags); + /* * Store the current counter value so we can report the correct delta * for all listeners. Even when the event was already enabled and has * an existing non-zero value. */ - local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true)); - - spin_unlock_irqrestore(&i915->pmu.lock, flags); + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); } static void i915_pmu_disable(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d437beac3969..282f57630cc1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1081,8 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ + rcu_read_lock(); if (engine->schedule) engine->schedule(request, request->ctx->priority); + rcu_read_unlock(); local_bh_disable(); i915_sw_fence_commit(&request->submit); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index b33d2158c234..e5e6f6bb2b05 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; + bool boost = false; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) @@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, return -EINVAL; mutex_lock(&dev_priv->pcu_lock); - rps->boost_freq = val; + if (val != rps->boost_freq) { + rps->boost_freq = val; + boost = atomic_read(&rps->num_waiters); + } mutex_unlock(&dev_priv->pcu_lock); + if (boost) + schedule_work(&rps->work); return count; } diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ac8fc2a44ac6..8c2d778560f0 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2205,8 +2205,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_init_dp_buf_reg(encoder); - if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); @@ -2304,14 +2303,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_dp *intel_dp = &dig_port->dp; - bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); /* * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. */ - if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_disable_ddi_buf(encoder); @@ -3080,9 +3077,12 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; - intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & - (DDI_BUF_PORT_REVERSAL | - DDI_A_4_LANES); + if (INTEL_GEN(dev_priv) >= 11) + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + DDI_BUF_PORT_REVERSAL; + else + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 42e45ae87393..c8ea510629fa 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, 0, + i915_handle_error(dev_priv, BIT(engine->id), "Kicking stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); @@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) default: return ENGINE_DEAD; case 1: - i915_handle_error(dev_priv, 0, + i915_handle_error(dev_priv, ALL_ENGINES, "Kicking stuck semaphore on %s", engine->name); I915_WRITE_CTL(engine, tmp); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3a69b367e565..697af5add78b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -831,7 +831,8 @@ static void execlists_submission_tasklet(unsigned long data) struct drm_i915_private *dev_priv = engine->i915; bool fw = false; - /* We can skip acquiring intel_runtime_pm_get() here as it was taken + /* + * We can skip acquiring intel_runtime_pm_get() here as it was taken * on our behalf by the request (see i915_gem_mark_busy()) and it will * not be relinquished until the device is idle (see * i915_gem_idle_work_handler()). As a precaution, we make sure @@ -840,7 +841,8 @@ static void execlists_submission_tasklet(unsigned long data) */ GEM_BUG_ON(!dev_priv->gt.awake); - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid + /* + * Prefer doing test_and_clear_bit() as a two stage operation to avoid * imposing the cost of a locked atomic transaction when submitting a * new request (outside of the context-switch interrupt). */ @@ -856,17 +858,10 @@ static void execlists_submission_tasklet(unsigned long data) execlists->csb_head = -1; /* force mmio read of CSB ptrs */ } - /* The write will be ordered by the uncached read (itself - * a memory barrier), so we do not need another in the form - * of a locked instruction. The race between the interrupt - * handler and the split test/clear is harmless as we order - * our clear before the CSB read. If the interrupt arrived - * first between the test and the clear, we read the updated - * CSB and clear the bit. If the interrupt arrives as we read - * the CSB or later (i.e. after we had cleared the bit) the bit - * is set and we do a new loop. - */ - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + /* Clear before reading to catch new interrupts */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + smp_mb__after_atomic(); + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ if (!fw) { intel_uncore_forcewake_get(dev_priv, |