19 files changed, 295 insertions, 87 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index c8454ac43fae..db6b94dda5df 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -471,6 +471,7 @@ struct parser_exec_state {
 	 * used when ret from 2nd level batch buffer
 	 */
 	int saved_buf_addr_type;
+	bool is_ctx_wa;
 
 	struct cmd_info *info;
 
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	bb->accessing = true;
 	bb->bb_start_cmd_va = s->ip_va;
 
+	if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+		bb->bb_offset = s->ip_va - s->rb_va;
+	else
+		bb->bb_offset = 0;
+
 	/*
 	 * ip_va saves the virtual address of the shadow batch buffer, while
 	 * ip_gma saves the graphics address of the original batch buffer.
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
 	s.ring_tail = gma_tail;
 	s.rb_va = workload->shadow_ring_buffer_va;
 	s.workload = workload;
+	s.is_ctx_wa = false;
 
 	if ((bypass_scan_mask & (1 << workload->ring_id)) ||
 		gma_head == gma_tail)
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	s.ring_tail = gma_tail;
 	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
 	s.workload = workload;
+	s.is_ctx_wa = true;
 
 	if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 0a100a288e6d..d29281231507 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2046,7 +2046,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
 	}
 
 	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
-		gvt_err("vgpu ppgtt mm is not fully destoried\n");
+		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
 
 	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
 		gvt_err("Why we still has spt not freed?\n");
@@ -2291,6 +2291,28 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt)
 }
 
 /**
+ * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
+ * @vgpu: a vGPU
+ *
+ * This function is called when invalidate all PPGTT instances of a vGPU.
+ *
+ */
+void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_mm *mm;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
+		if (mm->type == INTEL_GVT_MM_PPGTT) {
+			list_del_init(&mm->ppgtt_mm.lru_list);
+			if (mm->ppgtt_mm.shadowed)
+				invalidate_ppgtt_mm(mm);
+		}
+	}
+}
+
+/**
  * intel_vgpu_reset_ggtt - reset the GGTT entry
  * @vgpu: a vGPU
  *
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index e831507e17c3..a8b369cd352b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -194,6 +194,7 @@ struct intel_vgpu_gtt {
 extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);
 extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu);
 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu);
+void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
 
 extern int intel_gvt_init_gtt(struct intel_gvt *gvt);
 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 112f2ec7c25f..8c5d5d005854 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1767,6 +1767,10 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(CURBASE(PIPE_B), D_ALL);
 	MMIO_D(CURBASE(PIPE_C), D_ALL);
 
+	MMIO_D(CUR_FBC_CTL(PIPE_A), D_ALL);
+	MMIO_D(CUR_FBC_CTL(PIPE_B), D_ALL);
+	MMIO_D(CUR_FBC_CTL(PIPE_C), D_ALL);
+
 	MMIO_D(_MMIO(0x700ac), D_ALL);
 	MMIO_D(_MMIO(0x710ac), D_ALL);
 	MMIO_D(_MMIO(0x720ac), D_ALL);
@@ -2228,6 +2232,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL);
 	MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL);
+	MMIO_D(HSW_AUD_MISC_CTRL(PIPE_A), D_ALL);
 
 	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL);
 	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL);
@@ -2559,6 +2564,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(WM_MISC, D_BDW);
 	MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW);
 
+	MMIO_D(_MMIO(0x6671c), D_BDW_PLUS);
 	MMIO_D(_MMIO(0x66c00), D_BDW_PLUS);
 	MMIO_D(_MMIO(0x66c04), D_BDW_PLUS);
 
@@ -2787,6 +2793,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(_MMIO(0x70380), D_SKL_PLUS);
 	MMIO_D(_MMIO(0x71380), D_SKL_PLUS);
 	MMIO_D(_MMIO(0x72380), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x7239c), D_SKL_PLUS);
 	MMIO_D(_MMIO(0x7039c), D_SKL_PLUS);
 
 	MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL);
@@ -2801,7 +2808,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
 	MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
 
+	MMIO_D(RPM_CONFIG0, D_SKL_PLUS);
 	MMIO_D(_MMIO(0xd08), D_SKL_PLUS);
+	MMIO_D(RC6_LOCATION, D_SKL_PLUS);
 	MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 8a428678e4b5..c16a492449d7 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -184,7 +184,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
 	return NULL;
 }
 
-static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
+static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 		dma_addr_t dma_addr)
 {
 	struct gvt_dma *new, *itr;
@@ -192,7 +192,7 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 
 	new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
 	if (!new)
-		return;
+		return -ENOMEM;
 
 	new->vgpu = vgpu;
 	new->gfn = gfn;
@@ -229,6 +229,7 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 	rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache);
 
 	vgpu->vdev.nr_cache_entries++;
+	return 0;
 }
 
 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
@@ -749,6 +750,25 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	return ret == 0 ? count : ret;
 }
 
+static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
+{
+	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	struct intel_gvt *gvt = vgpu->gvt;
+	int offset;
+
+	/* Only allow MMIO GGTT entry access */
+	if (index != PCI_BASE_ADDRESS_0)
+		return false;
+
+	offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+		intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+
+	return (offset >= gvt->device_info.gtt_start_offset &&
+		offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+			true : false;
+}
+
 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
 			size_t count, loff_t *ppos)
 {
@@ -758,7 +778,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
 	while (count) {
 		size_t filled;
 
-		if (count >= 4 && !(*ppos % 4)) {
+		/* Only support GGTT entry 8 bytes read */
+		if (count >= 8 && !(*ppos % 8) &&
+			gtt_entry(mdev, ppos)) {
+			u64 val;
+
+			ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+					ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 8;
+		} else if (count >= 4 && !(*ppos % 4)) {
 			u32 val;
 
 			ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
@@ -818,7 +852,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
 	while (count) {
 		size_t filled;
 
-		if (count >= 4 && !(*ppos % 4)) {
+		/* Only support GGTT entry 8 bytes write */
+		if (count >= 8 && !(*ppos % 8) &&
+			gtt_entry(mdev, ppos)) {
+			u64 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+					ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 8;
+		} else if (count >= 4 && !(*ppos % 4)) {
 			u32 val;
 
 			if (copy_from_user(&val, buf, sizeof(val)))
@@ -1586,11 +1634,12 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
 	entry = __gvt_cache_find_gfn(info->vgpu, gfn);
 	if (!entry) {
 		ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
-		if (ret) {
-			mutex_unlock(&info->vgpu->vdev.cache_lock);
-			return ret;
-		}
-		__gvt_cache_add(info->vgpu, gfn, *dma_addr);
+		if (ret)
+			goto err_unlock;
+
+		ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr);
+		if (ret)
+			goto err_unmap;
 	} else {
 		kref_get(&entry->ref);
 		*dma_addr = entry->dma_addr;
@@ -1598,6 +1647,12 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
 
 	mutex_unlock(&info->vgpu->vdev.cache_lock);
 	return 0;
+
+err_unmap:
+	gvt_dma_unmap_page(vgpu, gfn, *dma_addr);
+err_unlock:
+	mutex_unlock(&info->vgpu->vdev.cache_lock);
+	return ret;
 }
 
 static void __gvt_dma_release(struct kref *ref)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 74a9c7b5516e..a5bac83d53a9 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -120,6 +120,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 	{RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
 	{RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
 	{RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+	{RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
 	{RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
 	{RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
 	{RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
@@ -557,9 +558,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 	 * performace for batch mmio read/write, so we need
 	 * handle forcewake mannually.
 	 */
+	intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 	switch_mmio(pre, next, ring_id);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_runtime_pm_put(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 9b92b4e25a20..638abe84857c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -52,6 +52,77 @@ static void set_context_pdp_root_pointer(
 		pdp_pair[i].val = pdp[7 - i];
 }
 
+static void update_shadow_pdps(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	int ring_id = workload->ring_id;
+	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
+	struct drm_i915_gem_object *ctx_obj =
+		shadow_ctx->engine[ring_id].state->obj;
+	struct execlist_ring_context *shadow_ring_context;
+	struct page *page;
+
+	if (WARN_ON(!workload->shadow_mm))
+		return;
+
+	if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount)))
+		return;
+
+	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
+	shadow_ring_context = kmap(page);
+	set_context_pdp_root_pointer(shadow_ring_context,
+			(void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
+	kunmap(page);
+}
+
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+		u32 *reg_state, bool save)
+{
+	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+	int i = 0;
+	u32 flex_mmio[] = {
+		i915_mmio_reg_offset(EU_PERF_CNTL0),
+		i915_mmio_reg_offset(EU_PERF_CNTL1),
+		i915_mmio_reg_offset(EU_PERF_CNTL2),
+		i915_mmio_reg_offset(EU_PERF_CNTL3),
+		i915_mmio_reg_offset(EU_PERF_CNTL4),
+		i915_mmio_reg_offset(EU_PERF_CNTL5),
+		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	};
+
+	if (!workload || !reg_state || workload->ring_id != RCS)
+		return;
+
+	if (save) {
+		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+
+			workload->flex_mmio[i] = reg_state[state_offset + 1];
+		}
+	} else {
+		reg_state[ctx_oactxctrl] =
+			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+			u32 mmio = flex_mmio[i];
+
+			reg_state[state_offset] = mmio;
+			reg_state[state_offset + 1] = workload->flex_mmio[i];
+		}
+	}
+}
+
 static int populate_shadow_context(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,11 +169,18 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
 	shadow_ring_context = kmap(page);
 
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
 #define COPY_REG(name) \
 	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
 		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
+#define COPY_REG_MASKED(name) {\
+		intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+					      + RING_CTX_OFF(name.val),\
+					      &shadow_ring_context->name.val, 4);\
+		shadow_ring_context->name.val |= 0xffff << 16;\
+	}
 
-	COPY_REG(ctx_ctrl);
+	COPY_REG_MASKED(ctx_ctrl);
 	COPY_REG(ctx_timestamp);
 
 	if (ring_id == RCS) {
@@ -111,9 +189,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 		COPY_REG(rcs_indirect_ctx_offset);
 	}
 #undef COPY_REG
-
-	set_context_pdp_root_pointer(shadow_ring_context,
-				     (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
+#undef COPY_REG_MASKED
 
 	intel_gvt_hypervisor_read_gpa(vgpu,
 			workload->ring_context_gpa +
@@ -122,6 +198,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			sizeof(*shadow_ring_context),
 			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
 	kunmap(page);
 	return 0;
 }
@@ -381,6 +458,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			goto err;
 		}
 
+		/* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+		 * is only updated into ring_scan_buffer, not real ring address
+		 * allocated in later copy_workload_to_ring_buffer. pls be noted
+		 * shadow_ring_buffer_va is now pointed to real ring buffer va
+		 * in copy_workload_to_ring_buffer.
+		 */
+
+		if (bb->bb_offset)
+			bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+				+ bb->bb_offset;
+
 		/* relocate shadow batch buffer */
 		bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
 		if (gmadr_bytes == 8)
@@ -509,6 +597,8 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
 		return ret;
 	}
 
+	update_shadow_pdps(workload);
+
 	ret = intel_vgpu_sync_oos_pages(workload->vgpu);
 	if (ret) {
 		gvt_vgpu_err("fail to vgpu sync oos pages\n");
@@ -1049,10 +1139,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
-	s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
-			sizeof(struct intel_vgpu_workload), 0,
-			SLAB_HWCACHE_ALIGN,
-			NULL);
+	s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+						  sizeof(struct intel_vgpu_workload), 0,
+						  SLAB_HWCACHE_ALIGN,
+						  offsetof(struct intel_vgpu_workload, rb_tail),
+						  sizeof_field(struct intel_vgpu_workload, rb_tail),
+						  NULL);
 
 	if (!s->workloads) {
 		ret = -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index bab4097aa6d7..486ed57a4ad1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
 	/* shadow batch buffer */
 	struct list_head shadow_bb;
 	struct intel_shadow_wa_ctx wa_ctx;
+
+	/* oa registers */
+	u32 oactxctrl;
+	u32 flex_mmio[7];
 };
 
 struct intel_vgpu_shadow_bb {
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
 	u32 *bb_start_cmd_va;
 	unsigned int clflush;
 	bool accessing;
+	unsigned long bb_offset;
 };
 
 #define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index fc7831a62121..82093f1e8612 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -333,7 +333,7 @@ TRACE_EVENT(render_mmio,
 	TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
 		 unsigned int old_val, unsigned int new_val),
 
-	TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
+	TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
 
 	TP_STRUCT__entry(
 		__field(int, old_id)
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 41f76e86aa1f..2e0a02a80fe4 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -522,6 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 	/* full GPU reset or device model level reset */
 	if (engine_mask == ALL_ENGINES || dmlr) {
 		intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
+		intel_vgpu_invalidate_ppgtt(vgpu);
 		/*fence will not be reset during virtual reset */
 		if (dmlr) {
 			intel_vgpu_reset_gtt(vgpu);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d7c4de45644d..07c07d55398b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1611,15 +1611,12 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = dev_priv->drm.pdev;
-	bool fw_csr;
 	int ret;
 
 	disable_rpm_wakeref_asserts(dev_priv);
 
 	intel_display_set_init_power(dev_priv, false);
 
-	fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation &&
-		suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
 	/*
 	 * In case of firmware assisted context save/restore don't manually
 	 * deinit the power domains. This also means the CSR/DMC firmware will
@@ -1627,8 +1624,11 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 	 * also enable deeper system power states that would be blocked if the
 	 * firmware was inactive.
 	 */
-	if (!fw_csr)
+	if (IS_GEN9_LP(dev_priv) || hibernation || !suspend_to_idle(dev_priv) ||
+	    dev_priv->csr.dmc_payload == NULL) {
 		intel_power_domains_suspend(dev_priv);
+		dev_priv->power_domains_suspended = true;
+	}
 
 	ret = 0;
 	if (IS_GEN9_LP(dev_priv))
@@ -1640,8 +1640,10 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 	if (ret) {
 		DRM_ERROR("Suspend complete failed: %d\n", ret);
-		if (!fw_csr)
+		if (dev_priv->power_domains_suspended) {
 			intel_power_domains_init_hw(dev_priv, true);
+			dev_priv->power_domains_suspended = false;
+		}
 
 		goto out;
 	}
@@ -1662,8 +1664,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 	if (!(hibernation && INTEL_GEN(dev_priv) < 6))
 		pci_set_power_state(pdev, PCI_D3hot);
 
-	dev_priv->suspended_to_idle = suspend_to_idle(dev_priv);
-
 out:
 	enable_rpm_wakeref_asserts(dev_priv);
 
@@ -1830,8 +1830,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
 	intel_uncore_resume_early(dev_priv);
 
 	if (IS_GEN9_LP(dev_priv)) {
-		if (!dev_priv->suspended_to_idle)
-			gen9_sanitize_dc_state(dev_priv);
+		gen9_sanitize_dc_state(dev_priv);
 		bxt_disable_dc9(dev_priv);
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		hsw_disable_pc8(dev_priv);
@@ -1839,8 +1838,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
 	intel_uncore_sanitize(dev_priv);
 
-	if (IS_GEN9_LP(dev_priv) ||
-	    !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload))
+	if (dev_priv->power_domains_suspended)
 		intel_power_domains_init_hw(dev_priv, true);
 	else
 		intel_display_set_init_power(dev_priv, true);
@@ -1850,7 +1848,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
 	enable_rpm_wakeref_asserts(dev_priv);
 
 out:
-	dev_priv->suspended_to_idle = false;
+	dev_priv->power_domains_suspended = false;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6e740f6fe33f..ce18b6cf6e68 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2119,7 +2119,7 @@ struct drm_i915_private {
 	u32 bxt_phy_grc;
 
 	u32 suspend_count;
-	bool suspended_to_idle;
+	bool power_domains_suspended;
 	struct i915_suspend_saved_registers regfile;
 	struct vlv_s0ix_state vlv_s0ix_state;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a5bd07338b46..7b5a9d7c9593 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -433,20 +433,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 			dma_fence_put(shared[i]);
 		kfree(shared);
 
+		/*
+		 * If both shared fences and an exclusive fence exist,
+		 * then by construction the shared fences must be later
+		 * than the exclusive fence. If we successfully wait for
+		 * all the shared fences, we know that the exclusive fence
+		 * must all be signaled. If all the shared fences are
+		 * signaled, we can prune the array and recover the
+		 * floating references on the fences/requests.
+		 */
 		prune_fences = count && timeout >= 0;
 	} else {
 		excl = reservation_object_get_excl_rcu(resv);
 	}
 
-	if (excl && timeout >= 0) {
+	if (excl && timeout >= 0)
 		timeout = i915_gem_object_wait_fence(excl, flags, timeout,
 						     rps_client);
-		prune_fences = timeout >= 0;
-	}
 
 	dma_fence_put(excl);
 
-	/* Oportunistically prune the fences iff we know they have *all* been
+	/*
+	 * Opportunistically prune the fences iff we know they have *all* been
 	 * signaled and that the reservation object has not been changed (i.e.
 	 * no new fences have been added).
 	 */
@@ -471,10 +479,11 @@ static void __fence_set_priority(struct dma_fence *fence, int prio)
 
 	rq = to_request(fence);
 	engine = rq->engine;
-	if (!engine->schedule)
-		return;
 
-	engine->schedule(rq, prio);
+	rcu_read_lock();
+	if (engine->schedule)
+		engine->schedule(rq, prio);
+	rcu_read_unlock();
 }
 
 static void fence_set_priority(struct dma_fence *fence, int prio)
@@ -2939,8 +2948,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	 * calling engine->init_hw() and also writing the ELSP.
 	 * Turning off the execlists->tasklet until the reset is over
 	 * prevents the race.
+	 *
+	 * Note that this needs to be a single atomic operation on the
+	 * tasklet (flush existing tasks, prevent new tasks) to prevent
+	 * a race between reset and set-wedged. It is not, so we do the best
+	 * we can atm and make sure we don't lock the machine up in the more
+	 * common case of recursively being called from set-wedged from inside
+	 * i915_reset.
 	 */
-	tasklet_kill(&engine->execlists.tasklet);
+	if (!atomic_read(&engine->execlists.tasklet.count))
+		tasklet_kill(&engine->execlists.tasklet);
 	tasklet_disable(&engine->execlists.tasklet);
 
 	/*
@@ -3214,8 +3231,11 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 	 */
 	for_each_engine(engine, i915, id) {
 		i915_gem_reset_prepare_engine(engine);
+
 		engine->submit_request = nop_submit_request;
+		engine->schedule = NULL;
 	}
+	i915->caps.scheduler = 0;
 
 	/*
 	 * Make sure no one is running the old callback before we proceed with
@@ -3233,11 +3253,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		 * start to complete all requests.
 		 */
 		engine->submit_request = nop_complete_submit_request;
-		engine->schedule = NULL;
 	}
 
-	i915->caps.scheduler = 0;
-
 	/*
 	 * Make sure no request can slip through without getting completed by
 	 * either this call here to intel_engine_init_global_seqno, or the one
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 964467b03e4d..d8feb9053e0c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -433,7 +433,7 @@ static u64 __get_rc6(struct drm_i915_private *i915)
 	return val;
 }
 
-static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
 	unsigned long flags;
@@ -449,8 +449,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 		 * previously.
 		 */
 
-		if (!locked)
-			spin_lock_irqsave(&i915->pmu.lock, flags);
+		spin_lock_irqsave(&i915->pmu.lock, flags);
 
 		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
@@ -459,12 +458,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
 		}
 
-		if (!locked)
-			spin_unlock_irqrestore(&i915->pmu.lock, flags);
+		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	} else {
 		struct pci_dev *pdev = i915->drm.pdev;
 		struct device *kdev = &pdev->dev;
-		unsigned long flags2;
 
 		/*
 		 * We are runtime suspended.
@@ -473,10 +470,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 		 * on top of the last known real value, as the approximated RC6
 		 * counter value.
 		 */
-		if (!locked)
-			spin_lock_irqsave(&i915->pmu.lock, flags);
-
-		spin_lock_irqsave(&kdev->power.lock, flags2);
+		spin_lock_irqsave(&i915->pmu.lock, flags);
+		spin_lock(&kdev->power.lock);
 
 		if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
 			i915->pmu.suspended_jiffies_last =
@@ -486,14 +481,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 		      i915->pmu.suspended_jiffies_last;
 		val += jiffies - kdev->power.accounting_timestamp;
 
-		spin_unlock_irqrestore(&kdev->power.lock, flags2);
+		spin_unlock(&kdev->power.lock);
 
 		val = jiffies_to_nsecs(val);
 		val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
 
-		if (!locked)
-			spin_unlock_irqrestore(&i915->pmu.lock, flags);
+		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	}
 
 	return val;
@@ -502,7 +496,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 #endif
 }
 
-static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
+static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
@@ -540,7 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
 			val = count_interrupts(i915);
 			break;
 		case I915_PMU_RC6_RESIDENCY:
-			val = get_rc6(i915, locked);
+			val = get_rc6(i915);
 			break;
 		}
 	}
@@ -555,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 again:
 	prev = local64_read(&hwc->prev_count);
-	new = __i915_pmu_event_read(event, false);
+	new = __i915_pmu_event_read(event);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
 		goto again;
@@ -605,14 +599,14 @@ static void i915_pmu_enable(struct perf_event *event)
 		engine->pmu.enable_count[sample]++;
 	}
 
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+
 	/*
 	 * Store the current counter value so we can report the correct delta
 	 * for all listeners. Even when the event was already enabled and has
 	 * an existing non-zero value.
 	 */
-	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
-
-	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
 }
 
 static void i915_pmu_disable(struct perf_event *event)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d437beac3969..282f57630cc1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1081,8 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
 	 * decide whether to preempt the entire chain so that it is ready to
 	 * run at the earliest possible convenience.
 	 */
+	rcu_read_lock();
 	if (engine->schedule)
 		engine->schedule(request, request->ctx->priority);
+	rcu_read_unlock();
 
 	local_bh_disable();
 	i915_sw_fence_commit(&request->submit);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index b33d2158c234..e5e6f6bb2b05 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
+	bool boost = false;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
@@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 		return -EINVAL;
 
 	mutex_lock(&dev_priv->pcu_lock);
-	rps->boost_freq = val;
+	if (val != rps->boost_freq) {
+		rps->boost_freq = val;
+		boost = atomic_read(&rps->num_waiters);
+	}
 	mutex_unlock(&dev_priv->pcu_lock);
+	if (boost)
+		schedule_work(&rps->work);
 
 	return count;
 }
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index ac8fc2a44ac6..8c2d778560f0 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2205,8 +2205,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 		intel_prepare_dp_ddi_buffers(encoder, crtc_state);
 
 	intel_ddi_init_dp_buf_reg(encoder);
-	if (!is_mst)
-		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 	intel_dp_start_link_train(intel_dp);
 	if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
 		intel_dp_stop_link_train(intel_dp);
@@ -2304,14 +2303,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
 	struct intel_dp *intel_dp = &dig_port->dp;
-	bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST);
 
 	/*
 	 * Power down sink before disabling the port, otherwise we end
 	 * up getting interrupts from the sink on detecting link loss.
 	 */
-	if (!is_mst)
-		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
 
 	intel_disable_ddi_buf(encoder);
 
@@ -3080,9 +3077,12 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 	intel_encoder->cloneable = 0;
 
-	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
-					  (DDI_BUF_PORT_REVERSAL |
-					   DDI_A_4_LANES);
+	if (INTEL_GEN(dev_priv) >= 11)
+		intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+			DDI_BUF_PORT_REVERSAL;
+	else
+		intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+			(DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES);
 	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 	intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port);
 
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 42e45ae87393..c8ea510629fa 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 	 */
 	tmp = I915_READ_CTL(engine);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev_priv, 0,
+		i915_handle_error(dev_priv, BIT(engine->id),
 				  "Kicking stuck wait on %s",
 				  engine->name);
 		I915_WRITE_CTL(engine, tmp);
@@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		default:
 			return ENGINE_DEAD;
 		case 1:
-			i915_handle_error(dev_priv, 0,
+			i915_handle_error(dev_priv, ALL_ENGINES,
 					  "Kicking stuck semaphore on %s",
 					  engine->name);
 			I915_WRITE_CTL(engine, tmp);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3a69b367e565..697af5add78b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -831,7 +831,8 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct drm_i915_private *dev_priv = engine->i915;
 	bool fw = false;
 
-	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
+	/*
+	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
 	 * not be relinquished until the device is idle (see
 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
@@ -840,7 +841,8 @@ static void execlists_submission_tasklet(unsigned long data)
 	 */
 	GEM_BUG_ON(!dev_priv->gt.awake);
 
-	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
+	/*
+	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
 	 * imposing the cost of a locked atomic transaction when submitting a
 	 * new request (outside of the context-switch interrupt).
 	 */
@@ -856,17 +858,10 @@ static void execlists_submission_tasklet(unsigned long data)
 			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
 		}
 
-		/* The write will be ordered by the uncached read (itself
-		 * a memory barrier), so we do not need another in the form
-		 * of a locked instruction. The race between the interrupt
-		 * handler and the split test/clear is harmless as we order
-		 * our clear before the CSB read. If the interrupt arrived
-		 * first between the test and the clear, we read the updated
-		 * CSB and clear the bit. If the interrupt arrives as we read
-		 * the CSB or later (i.e. after we had cleared the bit) the bit
-		 * is set and we do a new loop.
-		 */
-		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		/* Clear before reading to catch new interrupts */
+		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		smp_mb__after_atomic();
+
 		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
 			if (!fw) {
 				intel_uncore_forcewake_get(dev_priv,