summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJuan A. Suarez Romero <jasuarez@igalia.com>2021-06-08 13:15:41 +0200
committerMelissa Wen <melissa.srw@gmail.com>2021-07-21 00:19:59 +0100
commit26a4dc29b74a137f45665089f6d3d633fcc9b662 (patch)
treeafe6c1cade475491fd9a159a8c53b27b2de73ec2 /include
parent56f0729a510f92151682ff6c89f69724d5595d6e (diff)
downloadlinux-26a4dc29b74a137f45665089f6d3d633fcc9b662.tar.bz2
drm/v3d: Expose performance counters to userspace
The V3D engine has several hardware performance counters that can of interest for userspace performance analysis tools. This exposes new ioctls to create and destroy performance monitor objects, as well as to query the counter values. Each created performance monitor object has an ID that can be attached to CL/CSD submissions, so the driver enables the requested counters when the job is submitted, and updates the performance monitor values when the job is done. It is up to the user to ensure all the jobs have been finished before getting the performance monitor values. It is also up to the user to properly synchronize BCL jobs when submitting jobs with different performance monitors attached. Cc: Daniel Vetter <daniel@ffwll.ch> Cc: David Airlie <airlied@linux.ie> Cc: Emma Anholt <emma@anholt.net> To: dri-devel@lists.freedesktop.org Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com> Acked-by: Melissa Wen <mwen@igalia.com> Signed-off-by: Melissa Wen <melissa.srw@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210608111541.461991-1-jasuarez@igalia.com
Diffstat (limited to 'include')
-rw-r--r--include/uapi/drm/v3d_drm.h136
1 files changed, 136 insertions, 0 deletions
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 1ce746e228d9..4104f22fb3d3 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -38,6 +38,9 @@ extern "C" {
#define DRM_V3D_GET_BO_OFFSET 0x05
#define DRM_V3D_SUBMIT_TFU 0x06
#define DRM_V3D_SUBMIT_CSD 0x07
+#define DRM_V3D_PERFMON_CREATE 0x08
+#define DRM_V3D_PERFMON_DESTROY 0x09
+#define DRM_V3D_PERFMON_GET_VALUES 0x0a
#define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -47,6 +50,12 @@ extern "C" {
#define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
#define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
#define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
+#define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \
+ struct drm_v3d_perfmon_create)
+#define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \
+ struct drm_v3d_perfmon_destroy)
+#define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \
+ struct drm_v3d_perfmon_get_values)
#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01
@@ -127,6 +136,11 @@ struct drm_v3d_submit_cl {
__u32 bo_handle_count;
__u32 flags;
+
+ /* ID of the perfmon to attach to this job. 0 means no perfmon. */
+ __u32 perfmon_id;
+
+ __u32 pad;
};
/**
@@ -195,6 +209,7 @@ enum drm_v3d_param {
DRM_V3D_PARAM_SUPPORTS_TFU,
DRM_V3D_PARAM_SUPPORTS_CSD,
DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
+ DRM_V3D_PARAM_SUPPORTS_PERFMON,
};
struct drm_v3d_get_param {
@@ -258,6 +273,127 @@ struct drm_v3d_submit_csd {
__u32 in_sync;
/* Sync object to signal when the CSD job is done. */
__u32 out_sync;
+
+ /* ID of the perfmon to attach to this job. 0 means no perfmon. */
+ __u32 perfmon_id;
+};
+
+enum {
+ V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
+ V3D_PERFCNT_FEP_VALID_PRIMS,
+ V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS,
+ V3D_PERFCNT_FEP_VALID_QUADS,
+ V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL,
+ V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL,
+ V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS,
+ V3D_PERFCNT_TLB_QUADS_ZERO_COV,
+ V3D_PERFCNT_TLB_QUADS_NONZERO_COV,
+ V3D_PERFCNT_TLB_QUADS_WRITTEN,
+ V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD,
+ V3D_PERFCNT_PTB_PRIM_CLIP,
+ V3D_PERFCNT_PTB_PRIM_REV,
+ V3D_PERFCNT_QPU_IDLE_CYCLES,
+ V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER,
+ V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG,
+ V3D_PERFCNT_QPU_CYCLES_VALID_INSTR,
+ V3D_PERFCNT_QPU_CYCLES_TMU_STALL,
+ V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL,
+ V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL,
+ V3D_PERFCNT_QPU_IC_HIT,
+ V3D_PERFCNT_QPU_IC_MISS,
+ V3D_PERFCNT_QPU_UC_HIT,
+ V3D_PERFCNT_QPU_UC_MISS,
+ V3D_PERFCNT_TMU_TCACHE_ACCESS,
+ V3D_PERFCNT_TMU_TCACHE_MISS,
+ V3D_PERFCNT_VPM_VDW_STALL,
+ V3D_PERFCNT_VPM_VCD_STALL,
+ V3D_PERFCNT_BIN_ACTIVE,
+ V3D_PERFCNT_RDR_ACTIVE,
+ V3D_PERFCNT_L2T_HITS,
+ V3D_PERFCNT_L2T_MISSES,
+ V3D_PERFCNT_CYCLE_COUNT,
+ V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER,
+ V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT,
+ V3D_PERFCNT_PTB_PRIMS_BINNED,
+ V3D_PERFCNT_AXI_WRITES_WATCH_0,
+ V3D_PERFCNT_AXI_READS_WATCH_0,
+ V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0,
+ V3D_PERFCNT_AXI_READ_STALLS_WATCH_0,
+ V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0,
+ V3D_PERFCNT_AXI_READ_BYTES_WATCH_0,
+ V3D_PERFCNT_AXI_WRITES_WATCH_1,
+ V3D_PERFCNT_AXI_READS_WATCH_1,
+ V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1,
+ V3D_PERFCNT_AXI_READ_STALLS_WATCH_1,
+ V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1,
+ V3D_PERFCNT_AXI_READ_BYTES_WATCH_1,
+ V3D_PERFCNT_TLB_PARTIAL_QUADS,
+ V3D_PERFCNT_TMU_CONFIG_ACCESSES,
+ V3D_PERFCNT_L2T_NO_ID_STALL,
+ V3D_PERFCNT_L2T_COM_QUE_STALL,
+ V3D_PERFCNT_L2T_TMU_WRITES,
+ V3D_PERFCNT_TMU_ACTIVE_CYCLES,
+ V3D_PERFCNT_TMU_STALLED_CYCLES,
+ V3D_PERFCNT_CLE_ACTIVE,
+ V3D_PERFCNT_L2T_TMU_READS,
+ V3D_PERFCNT_L2T_CLE_READS,
+ V3D_PERFCNT_L2T_VCD_READS,
+ V3D_PERFCNT_L2T_TMUCFG_READS,
+ V3D_PERFCNT_L2T_SLC0_READS,
+ V3D_PERFCNT_L2T_SLC1_READS,
+ V3D_PERFCNT_L2T_SLC2_READS,
+ V3D_PERFCNT_L2T_TMU_W_MISSES,
+ V3D_PERFCNT_L2T_TMU_R_MISSES,
+ V3D_PERFCNT_L2T_CLE_MISSES,
+ V3D_PERFCNT_L2T_VCD_MISSES,
+ V3D_PERFCNT_L2T_TMUCFG_MISSES,
+ V3D_PERFCNT_L2T_SLC0_MISSES,
+ V3D_PERFCNT_L2T_SLC1_MISSES,
+ V3D_PERFCNT_L2T_SLC2_MISSES,
+ V3D_PERFCNT_CORE_MEM_WRITES,
+ V3D_PERFCNT_L2T_MEM_WRITES,
+ V3D_PERFCNT_PTB_MEM_WRITES,
+ V3D_PERFCNT_TLB_MEM_WRITES,
+ V3D_PERFCNT_CORE_MEM_READS,
+ V3D_PERFCNT_L2T_MEM_READS,
+ V3D_PERFCNT_PTB_MEM_READS,
+ V3D_PERFCNT_PSE_MEM_READS,
+ V3D_PERFCNT_TLB_MEM_READS,
+ V3D_PERFCNT_GMP_MEM_READS,
+ V3D_PERFCNT_PTB_W_MEM_WORDS,
+ V3D_PERFCNT_TLB_W_MEM_WORDS,
+ V3D_PERFCNT_PSE_R_MEM_WORDS,
+ V3D_PERFCNT_TLB_R_MEM_WORDS,
+ V3D_PERFCNT_TMU_MRU_HITS,
+ V3D_PERFCNT_COMPUTE_ACTIVE,
+ V3D_PERFCNT_NUM,
+};
+
+#define DRM_V3D_MAX_PERF_COUNTERS 32
+
+struct drm_v3d_perfmon_create {
+ __u32 id;
+ __u32 ncounters;
+ __u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
+};
+
+struct drm_v3d_perfmon_destroy {
+ __u32 id;
+};
+
+/*
+ * Returns the values of the performance counters tracked by this
+ * perfmon (as an array of ncounters u64 values).
+ *
+ * No implicit synchronization is performed, so the user has to
+ * guarantee that any jobs using this perfmon have already been
+ * completed (probably by blocking on the seqno returned by the
+ * last exec that used the perfmon).
+ */
+struct drm_v3d_perfmon_get_values {
+ __u32 id;
+ __u32 pad;
+ __u64 values_ptr;
};
#if defined(__cplusplus)