summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gvt/scheduler.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c')
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c108
1 files changed, 100 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 9b92b4e25a20..638abe84857c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -52,6 +52,77 @@ static void set_context_pdp_root_pointer(
pdp_pair[i].val = pdp[7 - i];
}
+static void update_shadow_pdps(struct intel_vgpu_workload *workload)
+{
+ struct intel_vgpu *vgpu = workload->vgpu;
+ int ring_id = workload->ring_id;
+ struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
+ struct drm_i915_gem_object *ctx_obj =
+ shadow_ctx->engine[ring_id].state->obj;
+ struct execlist_ring_context *shadow_ring_context;
+ struct page *page;
+
+ if (WARN_ON(!workload->shadow_mm))
+ return;
+
+ if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount)))
+ return;
+
+ page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
+ shadow_ring_context = kmap(page);
+ set_context_pdp_root_pointer(shadow_ring_context,
+ (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
+ kunmap(page);
+}
+
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+ u32 *reg_state, bool save)
+{
+ struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+ u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+ int i = 0;
+ u32 flex_mmio[] = {
+ i915_mmio_reg_offset(EU_PERF_CNTL0),
+ i915_mmio_reg_offset(EU_PERF_CNTL1),
+ i915_mmio_reg_offset(EU_PERF_CNTL2),
+ i915_mmio_reg_offset(EU_PERF_CNTL3),
+ i915_mmio_reg_offset(EU_PERF_CNTL4),
+ i915_mmio_reg_offset(EU_PERF_CNTL5),
+ i915_mmio_reg_offset(EU_PERF_CNTL6),
+ };
+
+ if (!workload || !reg_state || workload->ring_id != RCS)
+ return;
+
+ if (save) {
+ workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+ for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+ u32 state_offset = ctx_flexeu0 + i * 2;
+
+ workload->flex_mmio[i] = reg_state[state_offset + 1];
+ }
+ } else {
+ reg_state[ctx_oactxctrl] =
+ i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+ reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+ for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+ u32 state_offset = ctx_flexeu0 + i * 2;
+ u32 mmio = flex_mmio[i];
+
+ reg_state[state_offset] = mmio;
+ reg_state[state_offset + 1] = workload->flex_mmio[i];
+ }
+ }
+}
+
static int populate_shadow_context(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,11 +169,18 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
shadow_ring_context = kmap(page);
+ sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \
intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
+#define COPY_REG_MASKED(name) {\
+ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ + RING_CTX_OFF(name.val),\
+ &shadow_ring_context->name.val, 4);\
+ shadow_ring_context->name.val |= 0xffff << 16;\
+ }
- COPY_REG(ctx_ctrl);
+ COPY_REG_MASKED(ctx_ctrl);
COPY_REG(ctx_timestamp);
if (ring_id == RCS) {
@@ -111,9 +189,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
COPY_REG(rcs_indirect_ctx_offset);
}
#undef COPY_REG
-
- set_context_pdp_root_pointer(shadow_ring_context,
- (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
+#undef COPY_REG_MASKED
intel_gvt_hypervisor_read_gpa(vgpu,
workload->ring_context_gpa +
@@ -122,6 +198,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
sizeof(*shadow_ring_context),
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
+ sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
kunmap(page);
return 0;
}
@@ -381,6 +458,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
goto err;
}
+ /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+ * is only updated into ring_scan_buffer, not real ring address
+ * allocated in later copy_workload_to_ring_buffer. pls be noted
+ * shadow_ring_buffer_va is now pointed to real ring buffer va
+ * in copy_workload_to_ring_buffer.
+ */
+
+ if (bb->bb_offset)
+ bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+ + bb->bb_offset;
+
/* relocate shadow batch buffer */
bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
if (gmadr_bytes == 8)
@@ -509,6 +597,8 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
return ret;
}
+ update_shadow_pdps(workload);
+
ret = intel_vgpu_sync_oos_pages(workload->vgpu);
if (ret) {
gvt_vgpu_err("fail to vgpu sync oos pages\n");
@@ -1049,10 +1139,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
- s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
- sizeof(struct intel_vgpu_workload), 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
+ s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+ sizeof(struct intel_vgpu_workload), 0,
+ SLAB_HWCACHE_ALIGN,
+ offsetof(struct intel_vgpu_workload, rb_tail),
+ sizeof_field(struct intel_vgpu_workload, rb_tail),
+ NULL);
if (!s->workloads) {
ret = -ENOMEM;