summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
authorUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>2019-08-06 16:30:02 -0700
committerChris Wilson <chris@chris-wilson.co.uk>2019-08-07 20:34:39 +0100
commita37f08a882b01a6e86a07512a082b14d20ee0773 (patch)
tree08d24532e55f5d905a3e61cc9ff23aaeadc368c1 /drivers/gpu/drm/i915/i915_perf.c
parent26f00514d944db8761d62ebf6f94766a81b97480 (diff)
downloadlinux-a37f08a882b01a6e86a07512a082b14d20ee0773.tar.bz2
drm/i915/perf: Refactor oa object to better manage resources
The oa object manages the oa buffer and must be allocated when the user intends to read performance counter snapshots. This can be achieved by making the oa object part of the stream object which is allocated when a stream is opened by the user. Attributes in the oa object that are gen-specific are moved to the perf object so that they can be initialized on driver load. The split provides a better separation of the objects used in perf implementation of i915 driver so that resources are allocated and initialized only when needed. v2: Fix checkpatch warnings v3: Addressed Lionel's review comment v4: Rebase v5: Fix rebase/merge issue with ratelimit_state_init Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190806233002.984-1-umesh.nerlige.ramappa@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c540
1 files changed, 276 insertions, 264 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2bcf0d6e2ee2..7b59a7038556 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -364,6 +364,8 @@ struct perf_open_properties {
int oa_period_exponent;
};
+static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
+
static void free_oa_config(struct drm_i915_private *dev_priv,
struct i915_oa_config *oa_config)
{
@@ -392,8 +394,8 @@ static int get_oa_config(struct drm_i915_private *dev_priv,
int ret;
if (metrics_set == 1) {
- *out_config = &dev_priv->perf.oa.test_config;
- atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
+ *out_config = &dev_priv->perf.test_config;
+ atomic_inc(&dev_priv->perf.test_config.ref_count);
return 0;
}
@@ -412,13 +414,16 @@ static int get_oa_config(struct drm_i915_private *dev_priv,
return ret;
}
-static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
+static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
}
-static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
+static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
@@ -426,7 +431,7 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
- * @dev_priv: i915 device instance
+ * @stream: i915 stream instance
*
* This is either called via fops (for blocking reads in user ctx) or the poll
* check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
@@ -448,9 +453,10 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
*
* Returns: %true if the OA buffer contains data, else %false
*/
-static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
+static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ int report_size = stream->oa_buffer.format_size;
unsigned long flags;
unsigned int aged_idx;
u32 head, hw_tail, aged_tail, aging_tail;
@@ -460,19 +466,19 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
* could result in an OA buffer reset which might reset the head,
* tails[] and aged_tail state.
*/
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/* NB: The head we observe here might effectively be a little out of
* date (between head and tails[aged_idx].offset if there is currently
* a read() in progress.
*/
- head = dev_priv->perf.oa.oa_buffer.head;
+ head = stream->oa_buffer.head;
- aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
- aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
+ aged_idx = stream->oa_buffer.aged_tail_idx;
+ aged_tail = stream->oa_buffer.tails[aged_idx].offset;
+ aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
- hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
+ hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
/* The tail pointer increases in 64 byte increments,
* not in report_size steps...
@@ -492,16 +498,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
* available) without needing to wait for a later hrtimer callback.
*/
if (aging_tail != INVALID_TAIL_PTR &&
- ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
+ ((now - stream->oa_buffer.aging_timestamp) >
OA_TAIL_MARGIN_NSEC)) {
aged_idx ^= 1;
- dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
+ stream->oa_buffer.aged_tail_idx = aged_idx;
aged_tail = aging_tail;
/* Mark that we need a new pointer to start aging... */
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
aging_tail = INVALID_TAIL_PTR;
}
@@ -516,7 +522,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
if (aging_tail == INVALID_TAIL_PTR &&
(aged_tail == INVALID_TAIL_PTR ||
OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
- struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
+ struct i915_vma *vma = stream->oa_buffer.vma;
u32 gtt_offset = i915_ggtt_offset(vma);
/* Be paranoid and do a bounds check on the pointer read back
@@ -525,16 +531,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
*/
if (hw_tail >= gtt_offset &&
hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
+ stream->oa_buffer.tails[!aged_idx].offset =
aging_tail = hw_tail;
- dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
+ stream->oa_buffer.aging_timestamp = now;
} else {
DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
hw_tail);
}
}
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
return aged_tail == INVALID_TAIL_PTR ?
false : OA_TAKEN(aged_tail, head) >= report_size;
@@ -597,8 +603,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
size_t *offset,
const u8 *report)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format_size;
struct drm_i915_perf_record_header header;
u32 sample_flags = stream->sample_flags;
@@ -650,9 +655,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ int report_size = stream->oa_buffer.format_size;
+ u8 *oa_buf_base = stream->oa_buffer.vaddr;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
@@ -664,13 +669,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+ head = stream->oa_buffer.head;
+ aged_tail_idx = stream->oa_buffer.aged_tail_idx;
+ tail = stream->oa_buffer.tails[aged_tail_idx].offset;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/*
* An invalid tail pointer here means we're still waiting for the poll
@@ -734,12 +739,12 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
OAREPORT_REASON_MASK);
if (reason == 0) {
- if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+ if (__ratelimit(&dev_priv->perf.spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
continue;
}
- ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
+ ctx_id = report32[2] & stream->specific_ctx_id_mask;
/*
* Squash whatever is in the CTX_ID field if it's marked as
@@ -749,7 +754,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
+ if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit))
ctx_id = report32[2] = INVALID_CTX_ID;
/*
@@ -783,18 +788,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* switches since it's not-uncommon for periodic samples to
* identify a switch before any 'context switch' report.
*/
- if (!dev_priv->perf.oa.exclusive_stream->ctx ||
- dev_priv->perf.oa.specific_ctx_id == ctx_id ||
- (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
- dev_priv->perf.oa.specific_ctx_id) ||
+ if (!dev_priv->perf.exclusive_stream->ctx ||
+ stream->specific_ctx_id == ctx_id ||
+ stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
reason & OAREPORT_REASON_CTX_SWITCH) {
/*
* While filtering for a single context we avoid
* leaking the IDs of other contexts.
*/
- if (dev_priv->perf.oa.exclusive_stream->ctx &&
- dev_priv->perf.oa.specific_ctx_id != ctx_id) {
+ if (dev_priv->perf.exclusive_stream->ctx &&
+ stream->specific_ctx_id != ctx_id) {
report32[2] = INVALID_CTX_ID;
}
@@ -803,7 +807,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (ret)
break;
- dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
+ stream->oa_buffer.last_ctx_id = ctx_id;
}
/*
@@ -817,7 +821,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/*
* We removed the gtt_offset for the copy loop above, indexing
@@ -826,9 +830,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
head += gtt_offset;
I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
- dev_priv->perf.oa.oa_buffer.head = head;
+ stream->oa_buffer.head = head;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
}
return ret;
@@ -863,7 +867,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
u32 oastatus;
int ret;
- if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
+ if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
oastatus = I915_READ(GEN8_OASTATUS);
@@ -889,10 +893,10 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return ret;
DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- dev_priv->perf.oa.period_exponent);
+ stream->period_exponent);
- dev_priv->perf.oa.ops.oa_disable(stream);
- dev_priv->perf.oa.ops.oa_enable(stream);
+ dev_priv->perf.ops.oa_disable(stream);
+ dev_priv->perf.ops.oa_enable(stream);
/*
* Note: .oa_enable() is expected to re-init the oabuffer and
@@ -939,9 +943,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ int report_size = stream->oa_buffer.format_size;
+ u8 *oa_buf_base = stream->oa_buffer.vaddr;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
@@ -953,13 +957,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+ head = stream->oa_buffer.head;
+ aged_tail_idx = stream->oa_buffer.aged_tail_idx;
+ tail = stream->oa_buffer.tails[aged_tail_idx].offset;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* An invalid tail pointer here means we're still waiting for the poll
* hrtimer callback to give us a pointer
@@ -1012,7 +1016,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
* copying it to userspace...
*/
if (report32[0] == 0) {
- if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+ if (__ratelimit(&dev_priv->perf.spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
continue;
}
@@ -1031,7 +1035,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
@@ -1041,9 +1045,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
I915_WRITE(GEN7_OASTATUS2,
((head & GEN7_OASTATUS2_HEAD_MASK) |
GEN7_OASTATUS2_MEM_SELECT_GGTT));
- dev_priv->perf.oa.oa_buffer.head = head;
+ stream->oa_buffer.head = head;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
}
return ret;
@@ -1074,7 +1078,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
u32 oastatus1;
int ret;
- if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
+ if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
oastatus1 = I915_READ(GEN7_OASTATUS1);
@@ -1084,7 +1088,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
* may be updated asynchronously) so we ignore status bits
* that have already been reported to userspace.
*/
- oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
+ oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1;
/* We treat OABUFFER_OVERFLOW as a significant error:
*
@@ -1113,10 +1117,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
return ret;
DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- dev_priv->perf.oa.period_exponent);
+ stream->period_exponent);
- dev_priv->perf.oa.ops.oa_disable(stream);
- dev_priv->perf.oa.ops.oa_enable(stream);
+ dev_priv->perf.ops.oa_disable(stream);
+ dev_priv->perf.ops.oa_enable(stream);
oastatus1 = I915_READ(GEN7_OASTATUS1);
}
@@ -1126,7 +1130,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
DRM_I915_PERF_RECORD_OA_REPORT_LOST);
if (ret)
return ret;
- dev_priv->perf.oa.gen7_latched_oastatus1 |=
+ dev_priv->perf.gen7_latched_oastatus1 |=
GEN7_OASTATUS1_REPORT_LOST;
}
@@ -1149,14 +1153,12 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
*/
static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
/* We would wait indefinitely if periodic sampling is not enabled */
- if (!dev_priv->perf.oa.periodic)
+ if (!stream->periodic)
return -EIO;
- return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
- oa_buffer_check_unlocked(dev_priv));
+ return wait_event_interruptible(stream->poll_wq,
+ oa_buffer_check_unlocked(stream));
}
/**
@@ -1173,9 +1175,7 @@ static void i915_oa_poll_wait(struct i915_perf_stream *stream,
struct file *file,
poll_table *wait)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
- poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
+ poll_wait(file, &stream->poll_wq, wait);
}
/**
@@ -1197,13 +1197,14 @@ static int i915_oa_read(struct i915_perf_stream *stream,
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
+ return dev_priv->perf.ops.read(stream, buf, count, offset);
}
-static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
- struct i915_gem_context *ctx)
+static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
{
struct i915_gem_engines_iter it;
+ struct drm_i915_private *i915 = stream->dev_priv;
+ struct i915_gem_context *ctx = stream->ctx;
struct intel_context *ce;
int err;
@@ -1221,7 +1222,7 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
*/
err = intel_context_pin(ce);
if (err == 0) {
- i915->perf.oa.pinned_ctx = ce;
+ stream->pinned_ctx = ce;
break;
}
}
@@ -1231,7 +1232,7 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
if (err)
return ERR_PTR(err);
- return i915->perf.oa.pinned_ctx;
+ return stream->pinned_ctx;
}
/**
@@ -1249,7 +1250,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
struct drm_i915_private *i915 = stream->dev_priv;
struct intel_context *ce;
- ce = oa_pin_context(i915, stream->ctx);
+ ce = oa_pin_context(stream);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -1259,8 +1260,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* On Haswell we don't do any post processing of the reports
* and don't need to use the mask.
*/
- i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
- i915->perf.oa.specific_ctx_id_mask = 0;
+ stream->specific_ctx_id = i915_ggtt_offset(ce->state);
+ stream->specific_ctx_id_mask = 0;
break;
}
@@ -1278,33 +1279,33 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* dropped by GuC. They won't be part of the context
* ID in the OA reports, so squash those lower bits.
*/
- i915->perf.oa.specific_ctx_id =
+ stream->specific_ctx_id =
lower_32_bits(ce->lrc_desc) >> 12;
/*
* GuC uses the top bit to signal proxy submission, so
* ignore that bit.
*/
- i915->perf.oa.specific_ctx_id_mask =
+ stream->specific_ctx_id_mask =
(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
} else {
- i915->perf.oa.specific_ctx_id_mask =
+ stream->specific_ctx_id_mask =
(1U << GEN8_CTX_ID_WIDTH) - 1;
- i915->perf.oa.specific_ctx_id =
+ stream->specific_ctx_id =
upper_32_bits(ce->lrc_desc);
- i915->perf.oa.specific_ctx_id &=
- i915->perf.oa.specific_ctx_id_mask;
+ stream->specific_ctx_id &=
+ stream->specific_ctx_id_mask;
}
break;
case 11: {
- i915->perf.oa.specific_ctx_id_mask =
+ stream->specific_ctx_id_mask =
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
- i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc);
- i915->perf.oa.specific_ctx_id &=
- i915->perf.oa.specific_ctx_id_mask;
+ stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
+ stream->specific_ctx_id &=
+ stream->specific_ctx_id_mask;
break;
}
@@ -1313,8 +1314,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
}
DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
- i915->perf.oa.specific_ctx_id,
- i915->perf.oa.specific_ctx_id_mask);
+ stream->specific_ctx_id,
+ stream->specific_ctx_id_mask);
return 0;
}
@@ -1331,10 +1332,10 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
struct drm_i915_private *dev_priv = stream->dev_priv;
struct intel_context *ce;
- dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
- dev_priv->perf.oa.specific_ctx_id_mask = 0;
+ stream->specific_ctx_id = INVALID_CTX_ID;
+ stream->specific_ctx_id_mask = 0;
- ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
+ ce = fetch_and_zero(&stream->pinned_ctx);
if (ce) {
mutex_lock(&dev_priv->drm.struct_mutex);
intel_context_unpin(ce);
@@ -1343,34 +1344,36 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
}
static void
-free_oa_buffer(struct drm_i915_private *i915)
+free_oa_buffer(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *i915 = stream->dev_priv;
+
mutex_lock(&i915->drm.struct_mutex);
- i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma,
+ i915_vma_unpin_and_release(&stream->oa_buffer.vma,
I915_VMA_RELEASE_MAP);
mutex_unlock(&i915->drm.struct_mutex);
- i915->perf.oa.oa_buffer.vaddr = NULL;
+ stream->oa_buffer.vaddr = NULL;
}
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
+ BUG_ON(stream != dev_priv->perf.exclusive_stream);
/*
* Unset exclusive_stream first, it will be checked while disabling
* the metric set on gen8+.
*/
mutex_lock(&dev_priv->drm.struct_mutex);
- dev_priv->perf.oa.exclusive_stream = NULL;
- dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+ dev_priv->perf.exclusive_stream = NULL;
+ dev_priv->perf.ops.disable_metric_set(stream);
mutex_unlock(&dev_priv->drm.struct_mutex);
- free_oa_buffer(dev_priv);
+ free_oa_buffer(stream);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -1380,41 +1383,42 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
put_oa_config(dev_priv, stream->oa_config);
- if (dev_priv->perf.oa.spurious_report_rs.missed) {
+ if (dev_priv->perf.spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
- dev_priv->perf.oa.spurious_report_rs.missed);
+ dev_priv->perf.spurious_report_rs.missed);
}
}
-static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
+static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
{
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/* Pre-DevBDW: OABUFFER must be set with counters off,
* before OASTATUS1, but after OASTATUS2
*/
I915_WRITE(GEN7_OASTATUS2,
gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */
- dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = gtt_offset;
I915_WRITE(GEN7_OABUFFER, gtt_offset);
I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
/* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* On Haswell we have to track which OASTATUS1 flags we've
* already seen since they can't be cleared while periodic
* sampling is enabled.
*/
- dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
+ dev_priv->perf.gen7_latched_oastatus1 = 0;
/* NB: although the OA buffer will initially be allocated
* zeroed via shmfs (and so this memset is redundant when
@@ -1427,24 +1431,25 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
* the assumption that new reports are being written to zeroed
* memory...
*/
- memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
/* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
}
-static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
+static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
{
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
I915_WRITE(GEN8_OASTATUS, 0);
I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
- dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = gtt_offset;
I915_WRITE(GEN8_OABUFFER_UDW, 0);
@@ -1461,17 +1466,17 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
/*
* Reset state used to recognise context switches, affecting which
* reports we will forward to userspace while filtering for a single
* context.
*/
- dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
+ stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/*
* NB: although the OA buffer will initially be allocated
@@ -1485,22 +1490,23 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
* the assumption that new reports are being written to zeroed
* memory...
*/
- memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
/*
* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
}
-static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
+static int alloc_oa_buffer(struct i915_perf_stream *stream)
{
struct drm_i915_gem_object *bo;
+ struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_vma *vma;
int ret;
- if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
+ if (WARN_ON(stream->oa_buffer.vma))
return -ENODEV;
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -1525,18 +1531,18 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
ret = PTR_ERR(vma);
goto err_unref;
}
- dev_priv->perf.oa.oa_buffer.vma = vma;
+ stream->oa_buffer.vma = vma;
- dev_priv->perf.oa.oa_buffer.vaddr =
+ stream->oa_buffer.vaddr =
i915_gem_object_pin_map(bo, I915_MAP_WB);
- if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
- ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
+ if (IS_ERR(stream->oa_buffer.vaddr)) {
+ ret = PTR_ERR(stream->oa_buffer.vaddr);
goto err_unpin;
}
DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
- i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
- dev_priv->perf.oa.oa_buffer.vaddr);
+ i915_ggtt_offset(stream->oa_buffer.vma),
+ stream->oa_buffer.vaddr);
goto unlock;
@@ -1546,8 +1552,8 @@ err_unpin:
err_unref:
i915_gem_object_put(bo);
- dev_priv->perf.oa.oa_buffer.vaddr = NULL;
- dev_priv->perf.oa.oa_buffer.vma = NULL;
+ stream->oa_buffer.vaddr = NULL;
+ stream->oa_buffer.vma = NULL;
unlock:
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -1623,8 +1629,10 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
return 0;
}
-static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
+static void hsw_disable_metric_set(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
@@ -1663,13 +1671,14 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
struct drm_i915_private *i915 = ce->engine->i915;
- u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset;
- u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
+ u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
i915_reg_t flex_regs[] = {
EU_PERF_CNTL0,
@@ -1683,8 +1692,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
int i;
CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
- (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
@@ -1846,11 +1855,12 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
*
* Note: it's only the RCS/Render context that has any OA state.
*/
-static int gen8_configure_all_contexts(struct drm_i915_private *i915,
+static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config)
{
+ struct drm_i915_private *i915 = stream->dev_priv;
/* The MMIO offsets for Flex EU registers aren't contiguous */
- const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
+ const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
struct flex regs[] = {
{
@@ -1859,9 +1869,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *i915,
},
{
GEN8_OACTXCONTROL,
- i915->perf.oa.ctx_oactxctrl_offset,
- ((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ i915->perf.ctx_oactxctrl_offset,
+ ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME)
},
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
@@ -1971,7 +1981,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = gen8_configure_all_contexts(dev_priv, oa_config);
+ ret = gen8_configure_all_contexts(stream, oa_config);
if (ret)
return ret;
@@ -1984,19 +1994,23 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
return 0;
}
-static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
+static void gen8_disable_metric_set(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL);
+ gen8_configure_all_contexts(stream, NULL);
I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
~GT_NOA_ENABLE));
}
-static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
+static void gen10_disable_metric_set(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL);
+ gen8_configure_all_contexts(stream, NULL);
/* Make sure we disable noa to save power. */
I915_WRITE(RPM_CONFIG1,
@@ -2007,10 +2021,10 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_gem_context *ctx = stream->ctx;
- u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
- bool periodic = dev_priv->perf.oa.periodic;
- u32 period_exponent = dev_priv->perf.oa.period_exponent;
- u32 report_format = dev_priv->perf.oa.oa_buffer.format;
+ u32 ctx_id = stream->specific_ctx_id;
+ bool periodic = stream->periodic;
+ u32 period_exponent = stream->period_exponent;
+ u32 report_format = stream->oa_buffer.format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -2021,7 +2035,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
* on the assumption that certain fields are written to zeroed
* memory which this helps maintains.
*/
- gen7_init_oa_buffer(dev_priv);
+ gen7_init_oa_buffer(stream);
I915_WRITE(GEN7_OACONTROL,
(ctx_id & GEN7_OACONTROL_CTX_MASK) |
@@ -2036,7 +2050,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
static void gen8_oa_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- u32 report_format = dev_priv->perf.oa.oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -2047,7 +2061,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
* on the assumption that certain fields are written to zeroed
* memory which this helps maintains.
*/
- gen8_init_oa_buffer(dev_priv);
+ gen8_init_oa_buffer(stream);
/*
* Note: we don't rely on the hardware to perform single context
@@ -2072,10 +2086,10 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- dev_priv->perf.oa.ops.oa_enable(stream);
+ dev_priv->perf.ops.oa_enable(stream);
- if (dev_priv->perf.oa.periodic)
- hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
+ if (stream->periodic)
+ hrtimer_start(&stream->poll_check_timer,
ns_to_ktime(POLL_PERIOD),
HRTIMER_MODE_REL_PINNED);
}
@@ -2114,10 +2128,10 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- dev_priv->perf.oa.ops.oa_disable(stream);
+ dev_priv->perf.ops.oa_disable(stream);
- if (dev_priv->perf.oa.periodic)
- hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
+ if (stream->periodic)
+ hrtimer_cancel(&stream->poll_check_timer);
}
static const struct i915_perf_stream_ops i915_oa_stream_ops = {
@@ -2169,7 +2183,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- if (!dev_priv->perf.oa.ops.enable_metric_set) {
+ if (!dev_priv->perf.ops.enable_metric_set) {
DRM_DEBUG("OA unit not supported\n");
return -ENODEV;
}
@@ -2178,7 +2192,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (dev_priv->perf.oa.exclusive_stream) {
+ if (dev_priv->perf.exclusive_stream) {
DRM_DEBUG("OA unit already in use\n");
return -EBUSY;
}
@@ -2188,43 +2202,23 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- /* We set up some ratelimit state to potentially throttle any _NOTES
- * about spurious, invalid OA reports which we don't forward to
- * userspace.
- *
- * The initialization is associated with opening the stream (not driver
- * init) considering we print a _NOTE about any throttling when closing
- * the stream instead of waiting until driver _fini which no one would
- * ever see.
- *
- * Using the same limiting factors as printk_ratelimit()
- */
- ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
- 5 * HZ, 10);
- /* Since we use a DRM_NOTE for spurious reports it would be
- * inconsistent to let __ratelimit() automatically print a warning for
- * throttling.
- */
- ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
- RATELIMIT_MSG_ON_RELEASE);
-
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
- format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
+ format_size = dev_priv->perf.oa_formats[props->oa_format].size;
stream->sample_flags |= SAMPLE_OA_REPORT;
stream->sample_size += format_size;
- dev_priv->perf.oa.oa_buffer.format_size = format_size;
- if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
+ stream->oa_buffer.format_size = format_size;
+ if (WARN_ON(stream->oa_buffer.format_size == 0))
return -EINVAL;
- dev_priv->perf.oa.oa_buffer.format =
- dev_priv->perf.oa.oa_formats[props->oa_format].format;
+ stream->oa_buffer.format =
+ dev_priv->perf.oa_formats[props->oa_format].format;
- dev_priv->perf.oa.periodic = props->oa_periodic;
- if (dev_priv->perf.oa.periodic)
- dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
+ stream->periodic = props->oa_periodic;
+ if (stream->periodic)
+ stream->period_exponent = props->oa_period_exponent;
if (stream->ctx) {
ret = oa_get_render_ctx_id(stream);
@@ -2255,7 +2249,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
- ret = alloc_oa_buffer(dev_priv);
+ ret = alloc_oa_buffer(stream);
if (ret)
goto err_oa_buf_alloc;
@@ -2264,9 +2258,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
goto err_lock;
stream->ops = &i915_oa_stream_ops;
- dev_priv->perf.oa.exclusive_stream = stream;
+ dev_priv->perf.exclusive_stream = stream;
- ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
+ ret = dev_priv->perf.ops.enable_metric_set(stream);
if (ret) {
DRM_DEBUG("Unable to enable metric set\n");
goto err_enable;
@@ -2274,15 +2268,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
mutex_unlock(&dev_priv->drm.struct_mutex);
+ hrtimer_init(&stream->poll_check_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ stream->poll_check_timer.function = oa_poll_check_timer_cb;
+ init_waitqueue_head(&stream->poll_wq);
+ spin_lock_init(&stream->oa_buffer.ptr_lock);
+
return 0;
err_enable:
- dev_priv->perf.oa.exclusive_stream = NULL;
- dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+ dev_priv->perf.exclusive_stream = NULL;
+ dev_priv->perf.ops.disable_metric_set(stream);
mutex_unlock(&dev_priv->drm.struct_mutex);
err_lock:
- free_oa_buffer(dev_priv);
+ free_oa_buffer(stream);
err_oa_buf_alloc:
put_oa_config(dev_priv, stream->oa_config);
@@ -2306,9 +2306,9 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
return;
- stream = engine->i915->perf.oa.exclusive_stream;
+ stream = engine->i915->perf.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
}
/**
@@ -2424,7 +2424,7 @@ static ssize_t i915_perf_read(struct file *file,
/* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
}
return ret;
@@ -2432,13 +2432,12 @@ static ssize_t i915_perf_read(struct file *file,
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
{
- struct drm_i915_private *dev_priv =
- container_of(hrtimer, typeof(*dev_priv),
- perf.oa.poll_check_timer);
+ struct i915_perf_stream *stream =
+ container_of(hrtimer, typeof(*stream), poll_check_timer);
- if (oa_buffer_check_unlocked(dev_priv)) {
- dev_priv->perf.oa.pollin = true;
- wake_up(&dev_priv->perf.oa.poll_wq);
+ if (oa_buffer_check_unlocked(stream)) {
+ stream->pollin = true;
+ wake_up(&stream->poll_wq);
}
hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
@@ -2477,7 +2476,7 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
* the hrtimer/oa_poll_check_timer_cb to notify us when there are
* samples to read.
*/
- if (dev_priv->perf.oa.pollin)
+ if (stream->pollin)
events |= EPOLLIN;
return events;
@@ -2903,7 +2902,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
value);
return -EINVAL;
}
- if (!dev_priv->perf.oa.oa_formats[value].size) {
+ if (!dev_priv->perf.oa_formats[value].size) {
DRM_DEBUG("Unsupported OA report format %llu\n",
value);
return -EINVAL;
@@ -3047,7 +3046,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
if (!dev_priv->perf.metrics_kobj)
goto exit;
- sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
+ sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr);
if (INTEL_GEN(dev_priv) >= 11) {
i915_perf_load_test_config_icl(dev_priv);
@@ -3082,15 +3081,15 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
i915_perf_load_test_config_hsw(dev_priv);
}
- if (dev_priv->perf.oa.test_config.id == 0)
+ if (dev_priv->perf.test_config.id == 0)
goto sysfs_error;
ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
- &dev_priv->perf.oa.test_config.sysfs_metric);
+ &dev_priv->perf.test_config.sysfs_metric);
if (ret)
goto sysfs_error;
- atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
+ atomic_set(&dev_priv->perf.test_config.ref_count, 1);
goto exit;
@@ -3117,7 +3116,7 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv)
return;
sysfs_remove_group(dev_priv->perf.metrics_kobj,
- &dev_priv->perf.oa.test_config.sysfs_metric);
+ &dev_priv->perf.test_config.sysfs_metric);
kobject_put(dev_priv->perf.metrics_kobj);
dev_priv->perf.metrics_kobj = NULL;
@@ -3362,7 +3361,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
oa_config->mux_regs_len = args->n_mux_regs;
oa_config->mux_regs =
alloc_oa_regs(dev_priv,
- dev_priv->perf.oa.ops.is_valid_mux_reg,
+ dev_priv->perf.ops.is_valid_mux_reg,
u64_to_user_ptr(args->mux_regs_ptr),
args->n_mux_regs);
@@ -3375,7 +3374,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
oa_config->b_counter_regs_len = args->n_boolean_regs;
oa_config->b_counter_regs =
alloc_oa_regs(dev_priv,
- dev_priv->perf.oa.ops.is_valid_b_counter_reg,
+ dev_priv->perf.ops.is_valid_b_counter_reg,
u64_to_user_ptr(args->boolean_regs_ptr),
args->n_boolean_regs);
@@ -3394,7 +3393,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
oa_config->flex_regs_len = args->n_flex_regs;
oa_config->flex_regs =
alloc_oa_regs(dev_priv,
- dev_priv->perf.oa.ops.is_valid_flex_reg,
+ dev_priv->perf.ops.is_valid_flex_reg,
u64_to_user_ptr(args->flex_regs_ptr),
args->n_flex_regs);
@@ -3561,20 +3560,20 @@ static struct ctl_table dev_root[] = {
void i915_perf_init(struct drm_i915_private *dev_priv)
{
if (IS_HASWELL(dev_priv)) {
- dev_priv->perf.oa.ops.is_valid_b_counter_reg =
+ dev_priv->perf.ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
hsw_is_valid_mux_addr;
- dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
- dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
- dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
- dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
- dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
- dev_priv->perf.oa.ops.read = gen7_oa_read;
- dev_priv->perf.oa.ops.oa_hw_tail_read =
+ dev_priv->perf.ops.is_valid_flex_reg = NULL;
+ dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set;
+ dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set;
+ dev_priv->perf.ops.oa_enable = gen7_oa_enable;
+ dev_priv->perf.ops.oa_disable = gen7_oa_disable;
+ dev_priv->perf.ops.read = gen7_oa_read;
+ dev_priv->perf.ops.oa_hw_tail_read =
gen7_oa_hw_tail_read;
- dev_priv->perf.oa.oa_formats = hsw_oa_formats;
+ dev_priv->perf.oa_formats = hsw_oa_formats;
} else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
/* Note: that although we could theoretically also support the
* legacy ringbuffer mode on BDW (and earlier iterations of
@@ -3582,71 +3581,65 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
* worth the complexity to maintain now that BDW+ enable
* execlist mode by default.
*/
- dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
+ dev_priv->perf.oa_formats = gen8_plus_oa_formats;
- dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
- dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
- dev_priv->perf.oa.ops.read = gen8_oa_read;
- dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
+ dev_priv->perf.ops.oa_enable = gen8_oa_enable;
+ dev_priv->perf.ops.oa_disable = gen8_oa_disable;
+ dev_priv->perf.ops.read = gen8_oa_read;
+ dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
if (IS_GEN_RANGE(dev_priv, 8, 9)) {
- dev_priv->perf.oa.ops.is_valid_b_counter_reg =
+ dev_priv->perf.ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
gen8_is_valid_mux_addr;
- dev_priv->perf.oa.ops.is_valid_flex_reg =
+ dev_priv->perf.ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
if (IS_CHERRYVIEW(dev_priv)) {
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
chv_is_valid_mux_addr;
}
- dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
- dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
+ dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
+ dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set;
if (IS_GEN(dev_priv, 8)) {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x120;
+ dev_priv->perf.ctx_flexeu0_offset = 0x2ce;
- dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
+ dev_priv->perf.gen8_valid_ctx_bit = BIT(25);
} else {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x128;
+ dev_priv->perf.ctx_flexeu0_offset = 0x3de;
- dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
+ dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
}
} else if (IS_GEN_RANGE(dev_priv, 10, 11)) {
- dev_priv->perf.oa.ops.is_valid_b_counter_reg =
+ dev_priv->perf.ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
gen10_is_valid_mux_addr;
- dev_priv->perf.oa.ops.is_valid_flex_reg =
+ dev_priv->perf.ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
- dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
- dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
+ dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
+ dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set;
if (IS_GEN(dev_priv, 10)) {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x128;
+ dev_priv->perf.ctx_flexeu0_offset = 0x3de;
} else {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x124;
+ dev_priv->perf.ctx_flexeu0_offset = 0x78e;
}
- dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
+ dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
}
}
- if (dev_priv->perf.oa.ops.enable_metric_set) {
- hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
- init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
-
+ if (dev_priv->perf.ops.enable_metric_set) {
INIT_LIST_HEAD(&dev_priv->perf.streams);
mutex_init(&dev_priv->perf.lock);
- spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
oa_sample_rate_hard_limit = 1000 *
(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
@@ -3655,6 +3648,25 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
mutex_init(&dev_priv->perf.metrics_lock);
idr_init(&dev_priv->perf.metrics_idr);
+ /* We set up some ratelimit state to potentially throttle any
+ * _NOTES about spurious, invalid OA reports which we don't
+ * forward to userspace.
+ *
+ * We print a _NOTE about any throttling when closing the
+ * stream instead of waiting until driver _fini which no one
+ * would ever see.
+ *
+ * Using the same limiting factors as printk_ratelimit()
+ */
+ ratelimit_state_init(&dev_priv->perf.spurious_report_rs,
+ 5 * HZ, 10);
+ /* Since we use a DRM_NOTE for spurious reports it would be
+ * inconsistent to let __ratelimit() automatically print a
+ * warning for throttling.
+ */
+ ratelimit_set_flags(&dev_priv->perf.spurious_report_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+
dev_priv->perf.initialized = true;
}
}
@@ -3683,7 +3695,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
unregister_sysctl_table(dev_priv->perf.sysctl_header);
- memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
+ memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops));
dev_priv->perf.initialized = false;
}