diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
100 files changed, 2826 insertions, 1486 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 0bfd276c19fe..35bbe2b80596 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -1,3 +1,15 @@ +config DRM_I915_FENCE_TIMEOUT + int "Timeout for unsignaled foreign fences (ms, jiffy granularity)" + default 10000 # milliseconds + help + When listening to a foreign fence, we install a supplementary timer + to ensure that we are always signaled and our userspace is able to + make forward progress. This value specifies the timeout used for an + unsignaled foreign fence. + + May be 0 to disable the timeout, and rely on the foreign fence being + eventually signaled. + config DRM_I915_USERFAULT_AUTOSUSPEND int "Runtime autosuspend delay for userspace GGTT mmaps (ms)" default 250 # milliseconds diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index caf00d92ea9d..b0da6ea6e3f1 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src) # core driver code i915-y += i915_drv.o \ + i915_config.o \ i915_irq.o \ i915_getparam.o \ i915_params.o \ @@ -87,11 +88,11 @@ gt-y += \ gt/intel_engine_cs.o \ gt/intel_engine_heartbeat.o \ gt/intel_engine_pm.o \ - gt/intel_engine_pool.o \ gt/intel_engine_user.o \ gt/intel_ggtt.o \ gt/intel_ggtt_fencing.o \ gt/intel_gt.o \ + gt/intel_gt_buffer_pool.o \ gt/intel_gt_clock_utils.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 36aaee8536f1..ad4aa66fd676 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -514,85 +514,67 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder, mutex_unlock(&dev_priv->av_mutex); } -/* Add a factor to take care of rounding and truncations */ -#define ROUNDING_FACTOR 10000 - -static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); unsigned int link_clks_available, link_clks_required; unsigned int tu_data, tu_line, link_clks_active; - unsigned int hblank_rise, hblank_early_prog; - unsigned int h_active, h_total, hblank_delta, pixel_clk, v_total; - unsigned int fec_coeff, refresh_rate, cdclk, vdsc_bpp; + unsigned int h_active, h_total, hblank_delta, pixel_clk; + unsigned int fec_coeff, cdclk, vdsc_bpp; + unsigned int link_clk, lanes; + unsigned int hblank_rise; h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; h_total = crtc_state->hw.adjusted_mode.crtc_htotal; - v_total = crtc_state->hw.adjusted_mode.crtc_vtotal; pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; - refresh_rate = crtc_state->hw.adjusted_mode.vrefresh; vdsc_bpp = crtc_state->dsc.compressed_bpp; cdclk = i915->cdclk.hw.cdclk; /* fec= 0.972261, using rounding multiplier of 1000000 */ fec_coeff = 972261; + link_clk = crtc_state->port_clock; + lanes = crtc_state->lane_count; drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" "lanes = %u vdsc_bpp = %u cdclk = %u\n", - h_active, crtc_state->port_clock, crtc_state->lane_count, - vdsc_bpp, cdclk); + h_active, link_clk, lanes, vdsc_bpp, cdclk); - if (WARN_ON(!crtc_state->port_clock || !crtc_state->lane_count || - !crtc_state->dsc.compressed_bpp || !i915->cdclk.hw.cdclk)) + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk)) return 0; - link_clks_available = ((((h_total - h_active) * - ((crtc_state->port_clock * ROUNDING_FACTOR) / - pixel_clk)) / ROUNDING_FACTOR) - 28); - - link_clks_required = DIV_ROUND_UP(192000, (refresh_rate * - v_total)) * ((48 / - crtc_state->lane_count) + 2); + link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28; + link_clks_required = DIV_ROUND_UP(192000 * h_total, 1000 * pixel_clk) * (48 / lanes + 2); if (link_clks_available > link_clks_required) hblank_delta = 32; else - hblank_delta = DIV_ROUND_UP(((((5 * ROUNDING_FACTOR) / - crtc_state->port_clock) + ((5 * - ROUNDING_FACTOR) / - cdclk)) * pixel_clk), - ROUNDING_FACTOR); - - tu_data = (pixel_clk * vdsc_bpp * 8) / ((crtc_state->port_clock * - crtc_state->lane_count * fec_coeff) / 1000000); - tu_line = (((h_active * crtc_state->port_clock * fec_coeff) / - 1000000) / (64 * pixel_clk)); - link_clks_active = (tu_line - 1) * 64 + tu_data; + hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk), + mul_u32_u32(link_clk, cdclk)); - hblank_rise = ((link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, - 250) + 4) * ((pixel_clk * ROUNDING_FACTOR) / - crtc_state->port_clock)) / ROUNDING_FACTOR; + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000), + mul_u32_u32(link_clk * lanes, fec_coeff)); + tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff), + mul_u32_u32(64 * pixel_clk, 1000000)); + link_clks_active = (tu_line - 1) * 64 + tu_data; - hblank_early_prog = h_active - hblank_rise + hblank_delta; + hblank_rise = (link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, 250) + 4) * pixel_clk / link_clk; - return hblank_early_prog; + return h_active - hblank_rise + hblank_delta; } -static unsigned int get_sample_room_req_config(const struct intel_crtc_state *crtc_state) +static unsigned int calc_samples_room(const struct intel_crtc_state *crtc_state) { unsigned int h_active, h_total, pixel_clk; - unsigned int samples_room; + unsigned int link_clk, lanes; h_active = crtc_state->hw.adjusted_mode.hdisplay; h_total = crtc_state->hw.adjusted_mode.htotal; pixel_clk = crtc_state->hw.adjusted_mode.clock; + link_clk = crtc_state->port_clock; + lanes = crtc_state->lane_count; - samples_room = ((((h_total - h_active) * ((crtc_state->port_clock * - ROUNDING_FACTOR) / pixel_clk)) / - ROUNDING_FACTOR) - 12) / ((48 / - crtc_state->lane_count) + 2); - - return samples_room; + return ((h_total - h_active) * link_clk - 12 * pixel_clk) / + (pixel_clk * (48 / lanes + 2)); } static void enable_audio_dsc_wa(struct intel_encoder *encoder, @@ -618,8 +600,7 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, (crtc_state->hw.adjusted_mode.hdisplay >= 3840 && crtc_state->hw.adjusted_mode.vdisplay >= 2160)) { /* Get hblank early enable value required */ - hblank_early_prog = get_hblank_early_enable_config(encoder, - crtc_state); + hblank_early_prog = calc_hblank_early_prog(encoder, crtc_state); if (hblank_early_prog < 32) { val &= ~HBLANK_START_COUNT_MASK(pipe); val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_32); @@ -635,7 +616,7 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, } /* Get samples room value required */ - samples_room = get_sample_room_req_config(crtc_state); + samples_room = calc_samples_room(crtc_state); if (samples_room < 3) { val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); val |= NUMBER_SAMPLES_PER_LINE(pipe, samples_room); diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 4aa54fcb0629..fef04e2d954e 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -8,6 +8,9 @@ #include "intel_bw.h" #include "intel_display_types.h" #include "intel_sideband.h" +#include "intel_atomic.h" +#include "intel_pm.h" + /* Parameters for Qclk Geyserville (QGV) */ struct intel_qgv_point { @@ -113,6 +116,26 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, return 0; } +int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, + u32 points_mask) +{ + int ret; + + /* bspec says to keep retrying for at least 1 ms */ + ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, + points_mask, + ICL_PCODE_POINTS_RESTRICTED_MASK, + ICL_PCODE_POINTS_RESTRICTED, + 1); + + if (ret < 0) { + drm_err(&dev_priv->drm, "Failed to disable qgv points (%d)\n", ret); + return ret; + } + + return 0; +} + static int icl_get_qgv_points(struct drm_i915_private *dev_priv, struct intel_qgv_info *qi) { @@ -240,6 +263,16 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel break; } + /* + * In case if SAGV is disabled in BIOS, we always get 1 + * SAGV point, but we can't send PCode commands to restrict it + * as it will fail and pointless anyway. + */ + if (qi.num_points == 1) + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; + else + dev_priv->sagv_status = I915_SAGV_ENABLED; + return 0; } @@ -248,6 +281,11 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, { int i; + /* + * Let's return max bw for 0 planes + */ + num_planes = max(1, num_planes); + for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { const struct intel_bw_info *bi = &dev_priv->max_bw[i]; @@ -277,34 +315,6 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv) icl_get_bw_info(dev_priv, &icl_sa_info); } -static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv, - int num_planes) -{ - if (INTEL_GEN(dev_priv) >= 11) { - /* - * Any bw group has same amount of QGV points - */ - const struct intel_bw_info *bi = - &dev_priv->max_bw[0]; - unsigned int min_bw = UINT_MAX; - int i; - - /* - * FIXME with SAGV disabled maybe we can assume - * point 1 will always be used? Seems to match - * the behaviour observed in the wild. - */ - for (i = 0; i < bi->num_qgv_points; i++) { - unsigned int bw = icl_max_bw(dev_priv, num_planes, i); - - min_bw = min(bw, min_bw); - } - return min_bw; - } else { - return UINT_MAX; - } -} - static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) { /* @@ -414,11 +424,16 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *new_crtc_state, *old_crtc_state; - struct intel_bw_state *bw_state = NULL; - unsigned int data_rate, max_data_rate; + struct intel_bw_state *new_bw_state = NULL; + const struct intel_bw_state *old_bw_state = NULL; + unsigned int data_rate; unsigned int num_active_planes; struct intel_crtc *crtc; int i, ret; + u32 allowed_points = 0; + unsigned int max_bw_point = 0, max_bw = 0; + unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; + u32 mask = (1 << num_qgv_points) - 1; /* FIXME earlier gens need some checks too */ if (INTEL_GEN(dev_priv) < 11) @@ -443,41 +458,93 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) old_active_planes == new_active_planes) continue; - bw_state = intel_atomic_get_bw_state(state); - if (IS_ERR(bw_state)) - return PTR_ERR(bw_state); + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); - bw_state->data_rate[crtc->pipe] = new_data_rate; - bw_state->num_active_planes[crtc->pipe] = new_active_planes; + new_bw_state->data_rate[crtc->pipe] = new_data_rate; + new_bw_state->num_active_planes[crtc->pipe] = new_active_planes; drm_dbg_kms(&dev_priv->drm, "pipe %c data rate %u num active planes %u\n", pipe_name(crtc->pipe), - bw_state->data_rate[crtc->pipe], - bw_state->num_active_planes[crtc->pipe]); + new_bw_state->data_rate[crtc->pipe], + new_bw_state->num_active_planes[crtc->pipe]); } - if (!bw_state) + if (!new_bw_state) return 0; - ret = intel_atomic_lock_global_state(&bw_state->base); + ret = intel_atomic_lock_global_state(&new_bw_state->base); if (ret) return ret; - data_rate = intel_bw_data_rate(dev_priv, bw_state); - num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state); + data_rate = intel_bw_data_rate(dev_priv, new_bw_state); + data_rate = DIV_ROUND_UP(data_rate, 1000); - max_data_rate = intel_max_data_rate(dev_priv, num_active_planes); + num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); - data_rate = DIV_ROUND_UP(data_rate, 1000); + for (i = 0; i < num_qgv_points; i++) { + unsigned int max_data_rate; - if (data_rate > max_data_rate) { - drm_dbg_kms(&dev_priv->drm, - "Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n", - data_rate, max_data_rate, num_active_planes); + max_data_rate = icl_max_bw(dev_priv, num_active_planes, i); + /* + * We need to know which qgv point gives us + * maximum bandwidth in order to disable SAGV + * if we find that we exceed SAGV block time + * with watermarks. By that moment we already + * have those, as it is calculated earlier in + * intel_atomic_check, + */ + if (max_data_rate > max_bw) { + max_bw_point = i; + max_bw = max_data_rate; + } + if (max_data_rate >= data_rate) + allowed_points |= BIT(i); + drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", + i, max_data_rate, data_rate); + } + + /* + * BSpec states that we always should have at least one allowed point + * left, so if we couldn't - simply reject the configuration for obvious + * reasons. + */ + if (allowed_points == 0) { + drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" + " bandwidth %d for display configuration(%d active planes).\n", + data_rate, num_active_planes); return -EINVAL; } + /* + * Leave only single point with highest bandwidth, if + * we can't enable SAGV due to the increased memory latency it may + * cause. + */ + if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { + allowed_points = BIT(max_bw_point); + drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", + max_bw_point); + } + /* + * We store the ones which need to be masked as that is what PCode + * actually accepts as a parameter. + */ + new_bw_state->qgv_points_mask = ~allowed_points & mask; + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * If the actual mask had changed we need to make sure that + * the commits are serialized(in case this is a nomodeset, nonblocking) + */ + if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index ac004d6f4276..bbcaaa73ec1b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -18,8 +18,24 @@ struct intel_crtc_state; struct intel_bw_state { struct intel_global_state base; + /* + * Contains a bit mask, used to determine, whether correspondent + * pipe allows SAGV or not. + */ + u8 pipe_sagv_reject; + + /* + * Current QGV points mask, which restricts + * some particular SAGV states, not to confuse + * with pipe_sagv_mask. + */ + u8 qgv_points_mask; + unsigned int data_rate[I915_MAX_PIPES]; u8 num_active_planes[I915_MAX_PIPES]; + + /* bitmask of active pipes */ + u8 active_pipes; }; #define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base) @@ -38,5 +54,7 @@ int intel_bw_init(struct drm_i915_private *dev_priv); int intel_bw_atomic_check(struct intel_atomic_state *state); void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state); +int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, + u32 points_mask); #endif /* __INTEL_BW_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 5601673c3f30..aa22465bb56e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3472,6 +3472,8 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, INTEL_OUTPUT_DP_MST); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state); + /* * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. @@ -3680,9 +3682,8 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state, intel_dp_stop_link_train(intel_dp); intel_edp_backlight_on(crtc_state, conn_state); - intel_psr_enable(intel_dp, crtc_state); - intel_dp_vsc_enable(intel_dp, crtc_state, conn_state); - intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state); + intel_psr_enable(intel_dp, crtc_state, conn_state); + intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) @@ -3864,7 +3865,8 @@ static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state, intel_ddi_set_dp_msa(crtc_state, conn_state); - intel_psr_update(intel_dp, crtc_state); + intel_psr_update(intel_dp, crtc_state, conn_state); + intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); intel_panel_update_backlight(state, encoder, crtc_state, conn_state); @@ -4235,6 +4237,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->fec_enable); } + pipe_config->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, pipe_config); + break; case TRANS_DDI_MODE_SELECT_DP_MST: pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); @@ -4246,6 +4251,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); intel_dp_get_m_n(intel_crtc, pipe_config); + + pipe_config->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, pipe_config); break; default: break; @@ -4300,6 +4308,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) >= 8) bdw_get_trans_port_sync_config(pipe_config); + + intel_read_dp_sdp(encoder, pipe_config, HDMI_PACKET_TYPE_GAMUT_METADATA); + intel_read_dp_sdp(encoder, pipe_config, DP_SDP_VSC); } static enum intel_output_type @@ -4689,6 +4700,30 @@ intel_ddi_hotplug(struct intel_encoder *encoder, return state; } +static bool lpt_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit = dev_priv->hotplug.pch_hpd[encoder->hpd_pin]; + + return intel_de_read(dev_priv, SDEISR) & bit; +} + +static bool hsw_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin]; + + return intel_de_read(dev_priv, DEISR) & bit; +} + +static bool bdw_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin]; + + return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; +} + static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { @@ -4885,6 +4920,23 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) port_name(port)); } + if (INTEL_GEN(dev_priv) >= 11) { + if (intel_phy_is_tc(dev_priv, phy)) + intel_dig_port->connected = intel_tc_port_connected; + else + intel_dig_port->connected = lpt_digital_port_connected; + } else if (INTEL_GEN(dev_priv) >= 8) { + if (port == PORT_A || IS_GEN9_LP(dev_priv)) + intel_dig_port->connected = bdw_digital_port_connected; + else + intel_dig_port->connected = lpt_digital_port_connected; + } else { + if (port == PORT_A) + intel_dig_port->connected = hsw_digital_port_connected; + else + intel_dig_port->connected = lpt_digital_port_connected; + } + intel_infoframe_init(intel_dig_port); return; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2a17cf38d3dc..9ea1a397d1b5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12866,6 +12866,16 @@ intel_dump_infoframe(struct drm_i915_private *dev_priv, hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, frame); } +static void +intel_dump_dp_vsc_sdp(struct drm_i915_private *dev_priv, + const struct drm_dp_vsc_sdp *vsc) +{ + if (!drm_debug_enabled(DRM_UT_KMS)) + return; + + drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, vsc); +} + #define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x static const char * const output_type_str[] = { @@ -13023,6 +13033,15 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, if (pipe_config->infoframes.enable & intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_VENDOR)) intel_dump_infoframe(dev_priv, &pipe_config->infoframes.hdmi); + if (pipe_config->infoframes.enable & + intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_DRM)) + intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm); + if (pipe_config->infoframes.enable & + intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA)) + intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm); + if (pipe_config->infoframes.enable & + intel_hdmi_infoframe_enable(DP_SDP_VSC)) + intel_dump_dp_vsc_sdp(dev_priv, &pipe_config->infoframes.vsc); drm_dbg_kms(&dev_priv->drm, "requested mode:\n"); drm_mode_debug_printmodeline(&pipe_config->hw.mode); @@ -13470,6 +13489,13 @@ intel_compare_infoframe(const union hdmi_infoframe *a, return memcmp(a, b, sizeof(*a)) == 0; } +static bool +intel_compare_dp_vsc_sdp(const struct drm_dp_vsc_sdp *a, + const struct drm_dp_vsc_sdp *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + static void pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, bool fastset, const char *name, @@ -13495,6 +13521,31 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, } } +static void +pipe_config_dp_vsc_sdp_mismatch(struct drm_i915_private *dev_priv, + bool fastset, const char *name, + const struct drm_dp_vsc_sdp *a, + const struct drm_dp_vsc_sdp *b) +{ + if (fastset) { + if (!drm_debug_enabled(DRM_UT_KMS)) + return; + + drm_dbg_kms(&dev_priv->drm, + "fastset mismatch in %s dp sdp\n", name); + drm_dbg_kms(&dev_priv->drm, "expected:\n"); + drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, a); + drm_dbg_kms(&dev_priv->drm, "found:\n"); + drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, b); + } else { + drm_err(&dev_priv->drm, "mismatch in %s dp sdp\n", name); + drm_err(&dev_priv->drm, "expected:\n"); + drm_dp_vsc_sdp_log(KERN_ERR, dev_priv->drm.dev, a); + drm_err(&dev_priv->drm, "found:\n"); + drm_dp_vsc_sdp_log(KERN_ERR, dev_priv->drm.dev, b); + } +} + static void __printf(4, 5) pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc, const char *name, const char *format, ...) @@ -13696,6 +13747,17 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, } \ } while (0) +#define PIPE_CONF_CHECK_DP_VSC_SDP(name) do { \ + if (!current_config->has_psr && !pipe_config->has_psr && \ + !intel_compare_dp_vsc_sdp(¤t_config->infoframes.name, \ + &pipe_config->infoframes.name)) { \ + pipe_config_dp_vsc_sdp_mismatch(dev_priv, fastset, __stringify(name), \ + ¤t_config->infoframes.name, \ + &pipe_config->infoframes.name); \ + ret = false; \ + } \ +} while (0) + #define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ if (current_config->name1 != pipe_config->name1) { \ pipe_config_mismatch(fastset, crtc, __stringify(name1), \ @@ -13873,6 +13935,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_INFOFRAME(spd); PIPE_CONF_CHECK_INFOFRAME(hdmi); PIPE_CONF_CHECK_INFOFRAME(drm); + PIPE_CONF_CHECK_DP_VSC_SDP(vsc); PIPE_CONF_CHECK_X(sync_mode_slaves_mask); PIPE_CONF_CHECK_I(master_transcoder); @@ -13961,7 +14024,9 @@ static void verify_wm_state(struct intel_crtc *crtc, /* Watermarks */ for (level = 0; level <= max_level; level++) { if (skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->wm[level])) + &sw_plane_wm->wm[level]) || + (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level], + &sw_plane_wm->sagv_wm0))) continue; drm_err(&dev_priv->drm, @@ -14016,7 +14081,9 @@ static void verify_wm_state(struct intel_crtc *crtc, /* Watermarks */ for (level = 0; level <= max_level; level++) { if (skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->wm[level])) + &sw_plane_wm->wm[level]) || + (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level], + &sw_plane_wm->sagv_wm0))) continue; drm_err(&dev_priv->drm, @@ -15378,11 +15445,11 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_set_cdclk_pre_plane_update(state); - intel_sagv_pre_plane_update(state); - intel_modeset_verify_disabled(dev_priv, state); } + intel_sagv_pre_plane_update(state); + /* Complete the events for pipes that have now been disabled */ for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { bool modeset = needs_modeset(new_crtc_state); @@ -15475,11 +15542,10 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_check_cpu_fifo_underruns(dev_priv); intel_check_pch_fifo_underruns(dev_priv); - if (state->modeset) { + if (state->modeset) intel_verify_planes(state); - intel_sagv_post_plane_update(state); - } + intel_sagv_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); @@ -15815,7 +15881,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (new_plane_state->uapi.fence) { /* explicit fencing */ ret = i915_sw_fence_await_dma_fence(&state->commit_ready, new_plane_state->uapi.fence, - I915_FENCE_TIMEOUT, + i915_fence_timeout(dev_priv), GFP_KERNEL); if (ret < 0) return ret; @@ -15842,7 +15908,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, ret = i915_sw_fence_await_reservation(&state->commit_ready, obj->base.resv, NULL, - false, I915_FENCE_TIMEOUT, + false, + i915_fence_timeout(dev_priv), GFP_KERNEL); if (ret < 0) goto unpin_fb; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 9488449e4b94..2bf3d4cb4ea9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -688,11 +688,13 @@ struct skl_plane_wm { struct skl_wm_level wm[8]; struct skl_wm_level uv_wm[8]; struct skl_wm_level trans_wm; + struct skl_wm_level sagv_wm0; bool is_planar; }; struct skl_pipe_wm { struct skl_plane_wm planes[I915_MAX_PLANES]; + bool use_sagv_wm; }; enum vlv_wm_level { @@ -1424,6 +1426,7 @@ struct intel_digital_port { const struct drm_connector_state *conn_state); u32 (*infoframes_enabled)(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config); + bool (*connected)(struct intel_encoder *encoder); }; struct intel_dp_mst_encoder { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 6952b0295096..408c3c1c5e81 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2487,8 +2487,8 @@ static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp, { struct drm_dp_vsc_sdp *vsc = &crtc_state->infoframes.vsc; - /* When PSR is enabled, VSC SDP is handled by PSR routine */ - if (intel_psr_enabled(intel_dp)) + /* When a crtc state has PSR, VSC SDP will be handled by PSR routine */ + if (crtc_state->has_psr) return; if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) @@ -2500,6 +2500,42 @@ static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp, &crtc_state->infoframes.vsc); } +void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state, + struct drm_dp_vsc_sdp *vsc) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + vsc->sdp_type = DP_SDP_VSC; + + if (dev_priv->psr.psr2_enabled) { + if (dev_priv->psr.colorimetry_support && + intel_dp_needs_vsc_sdp(crtc_state, conn_state)) { + /* [PSR2, +Colorimetry] */ + intel_dp_compute_vsc_colorimetry(crtc_state, conn_state, + vsc); + } else { + /* + * [PSR2, -Colorimetry] + * Prepare VSC Header for SU as per eDP 1.4 spec, Table 6-11 + * 3D stereo + PSR/PSR2 + Y-coordinate. + */ + vsc->revision = 0x4; + vsc->length = 0xe; + } + } else { + /* + * [PSR1] + * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118 + * VSC SDP supporting 3D stereo + PSR (applies to eDP v1.3 or + * higher). + */ + vsc->revision = 0x2; + vsc->length = 0x8; + } +} + static void intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, @@ -4791,6 +4827,13 @@ static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, sdp->sdp_header.HB2 = vsc->revision; /* Revision Number */ sdp->sdp_header.HB3 = vsc->length; /* Number of Valid Data Bytes */ + /* + * Only revision 0x5 supports Pixel Encoding/Colorimetry Format as + * per DP 1.4a spec. + */ + if (vsc->revision != 0x5) + goto out; + /* VSC SDP Payload for DB16 through DB18 */ /* Pixel Encoding and Colorimetry Formats */ sdp->db[16] = (vsc->pixelformat & 0xf) << 4; /* DB16[7:4] */ @@ -4823,6 +4866,7 @@ static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, /* Content Type */ sdp->db[18] = vsc->content_type & 0x7; +out: return length; } @@ -4935,6 +4979,24 @@ static void intel_write_dp_sdp(struct intel_encoder *encoder, intel_dig_port->write_infoframe(encoder, crtc_state, type, &sdp, len); } +void intel_write_dp_vsc_sdp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + struct drm_dp_vsc_sdp *vsc) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct dp_sdp sdp = {}; + ssize_t len; + + len = intel_dp_vsc_sdp_pack(vsc, &sdp, sizeof(sdp)); + + if (drm_WARN_ON(&dev_priv->drm, len < 0)) + return; + + intel_dig_port->write_infoframe(encoder, crtc_state, DP_SDP_VSC, + &sdp, len); +} + void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable, const struct intel_crtc_state *crtc_state, @@ -4971,233 +5033,191 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, intel_write_dp_sdp(encoder, crtc_state, HDMI_PACKET_TYPE_GAMUT_METADATA); } -static void -intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static int intel_dp_vsc_sdp_unpack(struct drm_dp_vsc_sdp *vsc, + const void *buffer, size_t size) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct dp_sdp vsc_sdp = {}; - - /* Prepare VSC Header for SU as per DP 1.4a spec, Table 2-119 */ - vsc_sdp.sdp_header.HB0 = 0; - vsc_sdp.sdp_header.HB1 = 0x7; + const struct dp_sdp *sdp = buffer; - /* - * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/ - * Colorimetry Format indication. - */ - vsc_sdp.sdp_header.HB2 = 0x5; + if (size < sizeof(struct dp_sdp)) + return -EINVAL; - /* - * VSC SDP supporting 3D stereo, + PSR2, + Pixel Encoding/ - * Colorimetry Format indication (HB2 = 05h). - */ - vsc_sdp.sdp_header.HB3 = 0x13; + memset(vsc, 0, size); - /* DP 1.4a spec, Table 2-120 */ - switch (crtc_state->output_format) { - case INTEL_OUTPUT_FORMAT_YCBCR444: - vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */ - break; - case INTEL_OUTPUT_FORMAT_YCBCR420: - vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */ - break; - case INTEL_OUTPUT_FORMAT_RGB: - default: - /* RGB: DB16[7:4] = 0h */ - break; - } + if (sdp->sdp_header.HB0 != 0) + return -EINVAL; - switch (conn_state->colorspace) { - case DRM_MODE_COLORIMETRY_BT709_YCC: - vsc_sdp.db[16] |= 0x1; - break; - case DRM_MODE_COLORIMETRY_XVYCC_601: - vsc_sdp.db[16] |= 0x2; - break; - case DRM_MODE_COLORIMETRY_XVYCC_709: - vsc_sdp.db[16] |= 0x3; - break; - case DRM_MODE_COLORIMETRY_SYCC_601: - vsc_sdp.db[16] |= 0x4; - break; - case DRM_MODE_COLORIMETRY_OPYCC_601: - vsc_sdp.db[16] |= 0x5; - break; - case DRM_MODE_COLORIMETRY_BT2020_CYCC: - case DRM_MODE_COLORIMETRY_BT2020_RGB: - vsc_sdp.db[16] |= 0x6; - break; - case DRM_MODE_COLORIMETRY_BT2020_YCC: - vsc_sdp.db[16] |= 0x7; - break; - case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65: - case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER: - vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */ - break; - default: - /* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */ + if (sdp->sdp_header.HB1 != DP_SDP_VSC) + return -EINVAL; - /* RGB->YCBCR color conversion uses the BT.709 color space. */ - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) - vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ - break; - } + vsc->sdp_type = sdp->sdp_header.HB1; + vsc->revision = sdp->sdp_header.HB2; + vsc->length = sdp->sdp_header.HB3; - /* - * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only, - * the following Component Bit Depth values are defined: - * 001b = 8bpc. - * 010b = 10bpc. - * 011b = 12bpc. - * 100b = 16bpc. - */ - switch (crtc_state->pipe_bpp) { - case 24: /* 8bpc */ - vsc_sdp.db[17] = 0x1; - break; - case 30: /* 10bpc */ - vsc_sdp.db[17] = 0x2; - break; - case 36: /* 12bpc */ - vsc_sdp.db[17] = 0x3; - break; - case 48: /* 16bpc */ - vsc_sdp.db[17] = 0x4; - break; - default: - MISSING_CASE(crtc_state->pipe_bpp); - break; - } + if ((sdp->sdp_header.HB2 == 0x2 && sdp->sdp_header.HB3 == 0x8) || + (sdp->sdp_header.HB2 == 0x4 && sdp->sdp_header.HB3 == 0xe)) { + /* + * - HB2 = 0x2, HB3 = 0x8 + * VSC SDP supporting 3D stereo + PSR + * - HB2 = 0x4, HB3 = 0xe + * VSC SDP supporting 3D stereo + PSR2 with Y-coordinate of + * first scan line of the SU region (applies to eDP v1.4b + * and higher). + */ + return 0; + } else if (sdp->sdp_header.HB2 == 0x5 && sdp->sdp_header.HB3 == 0x13) { + /* + * - HB2 = 0x5, HB3 = 0x13 + * VSC SDP supporting 3D stereo + PSR2 + Pixel Encoding/Colorimetry + * Format. + */ + vsc->pixelformat = (sdp->db[16] >> 4) & 0xf; + vsc->colorimetry = sdp->db[16] & 0xf; + vsc->dynamic_range = (sdp->db[17] >> 7) & 0x1; - /* - * Dynamic Range (Bit 7) - * 0 = VESA range, 1 = CTA range. - * all YCbCr are always limited range - */ - vsc_sdp.db[17] |= 0x80; + switch (sdp->db[17] & 0x7) { + case 0x0: + vsc->bpc = 6; + break; + case 0x1: + vsc->bpc = 8; + break; + case 0x2: + vsc->bpc = 10; + break; + case 0x3: + vsc->bpc = 12; + break; + case 0x4: + vsc->bpc = 16; + break; + default: + MISSING_CASE(sdp->db[17] & 0x7); + return -EINVAL; + } - /* - * Content Type (Bits 2:0) - * 000b = Not defined. - * 001b = Graphics. - * 010b = Photo. - * 011b = Video. - * 100b = Game - * All other values are RESERVED. - * Note: See CTA-861-G for the definition and expected - * processing by a stream sink for the above contect types. - */ - vsc_sdp.db[18] = 0; + vsc->content_type = sdp->db[18] & 0x7; + } else { + return -EINVAL; + } - intel_dig_port->write_infoframe(&intel_dig_port->base, - crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); + return 0; } -static void -intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static int +intel_dp_hdr_metadata_infoframe_sdp_unpack(struct hdmi_drm_infoframe *drm_infoframe, + const void *buffer, size_t size) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct dp_sdp infoframe_sdp = {}; - struct hdmi_drm_infoframe drm_infoframe = {}; - const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE; - unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE]; - ssize_t len; int ret; - ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state); - if (ret) { - drm_dbg_kms(&i915->drm, - "couldn't set HDR metadata in infoframe\n"); - return; - } + const struct dp_sdp *sdp = buffer; - len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf)); - if (len < 0) { - drm_dbg_kms(&i915->drm, - "buffer size is smaller than hdr metadata infoframe\n"); - return; - } + if (size < sizeof(struct dp_sdp)) + return -EINVAL; - if (len != infoframe_size) { - drm_dbg_kms(&i915->drm, "wrong static hdr metadata size\n"); - return; - } + if (sdp->sdp_header.HB0 != 0) + return -EINVAL; - /* - * Set up the infoframe sdp packet for HDR static metadata. - * Prepare VSC Header for SU as per DP 1.4a spec, - * Table 2-100 and Table 2-101 - */ + if (sdp->sdp_header.HB1 != HDMI_INFOFRAME_TYPE_DRM) + return -EINVAL; - /* Packet ID, 00h for non-Audio INFOFRAME */ - infoframe_sdp.sdp_header.HB0 = 0; - /* - * Packet Type 80h + Non-audio INFOFRAME Type value - * HDMI_INFOFRAME_TYPE_DRM: 0x87, - */ - infoframe_sdp.sdp_header.HB1 = drm_infoframe.type; /* * Least Significant Eight Bits of (Data Byte Count – 1) - * infoframe_size - 1, + * 1Dh (i.e., Data Byte Count = 30 bytes). */ - infoframe_sdp.sdp_header.HB2 = 0x1D; + if (sdp->sdp_header.HB2 != 0x1D) + return -EINVAL; + + /* Most Significant Two Bits of (Data Byte Count – 1), Clear to 00b. */ + if ((sdp->sdp_header.HB3 & 0x3) != 0) + return -EINVAL; + /* INFOFRAME SDP Version Number */ - infoframe_sdp.sdp_header.HB3 = (0x13 << 2); + if (((sdp->sdp_header.HB3 >> 2) & 0x3f) != 0x13) + return -EINVAL; + /* CTA Header Byte 2 (INFOFRAME Version Number) */ - infoframe_sdp.db[0] = drm_infoframe.version; + if (sdp->db[0] != 1) + return -EINVAL; + /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */ - infoframe_sdp.db[1] = drm_infoframe.length; - /* - * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after - * HDMI_INFOFRAME_HEADER_SIZE - */ - BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2); - memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE], - HDMI_DRM_INFOFRAME_SIZE); + if (sdp->db[1] != HDMI_DRM_INFOFRAME_SIZE) + return -EINVAL; - /* - * Size of DP infoframe sdp packet for HDR static metadata is consist of - * - DP SDP Header(struct dp_sdp_header): 4 bytes - * - Two Data Blocks: 2 bytes - * CTA Header Byte2 (INFOFRAME Version Number) - * CTA Header Byte3 (Length of INFOFRAME) - * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes - * - * Prior to GEN11's GMP register size is identical to DP HDR static metadata - * infoframe size. But GEN11+ has larger than that size, write_infoframe - * will pad rest of the size. - */ - intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state, - HDMI_PACKET_TYPE_GAMUT_METADATA, - &infoframe_sdp, - sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE); + ret = hdmi_drm_infoframe_unpack_only(drm_infoframe, &sdp->db[2], + HDMI_DRM_INFOFRAME_SIZE); + + return ret; } -void intel_dp_vsc_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void intel_read_dp_vsc_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_dp_vsc_sdp *vsc) { - if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + unsigned int type = DP_SDP_VSC; + struct dp_sdp sdp = {}; + int ret; + + /* When PSR is enabled, VSC SDP is handled by PSR routine */ + if (intel_psr_enabled(intel_dp)) + return; + + if ((crtc_state->infoframes.enable & + intel_hdmi_infoframe_enable(type)) == 0) return; - intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); + intel_dig_port->read_infoframe(encoder, crtc_state, type, &sdp, sizeof(sdp)); + + ret = intel_dp_vsc_sdp_unpack(vsc, &sdp, sizeof(sdp)); + + if (ret) + drm_dbg_kms(&dev_priv->drm, "Failed to unpack DP VSC SDP\n"); } -void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void intel_read_dp_hdr_metadata_infoframe_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct hdmi_drm_infoframe *drm_infoframe) { - if (!conn_state->hdr_output_metadata) + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + unsigned int type = HDMI_PACKET_TYPE_GAMUT_METADATA; + struct dp_sdp sdp = {}; + int ret; + + if ((crtc_state->infoframes.enable & + intel_hdmi_infoframe_enable(type)) == 0) return; - intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp, - crtc_state, - conn_state); + intel_dig_port->read_infoframe(encoder, crtc_state, type, &sdp, + sizeof(sdp)); + + ret = intel_dp_hdr_metadata_infoframe_sdp_unpack(drm_infoframe, &sdp, + sizeof(sdp)); + + if (ret) + drm_dbg_kms(&dev_priv->drm, + "Failed to unpack DP HDR Metadata Infoframe SDP\n"); +} + +void intel_read_dp_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + unsigned int type) +{ + switch (type) { + case DP_SDP_VSC: + intel_read_dp_vsc_sdp(encoder, crtc_state, + &crtc_state->infoframes.vsc); + break; + case HDMI_PACKET_TYPE_GAMUT_METADATA: + intel_read_dp_hdr_metadata_infoframe_sdp(encoder, crtc_state, + &crtc_state->infoframes.drm.drm); + break; + default: + MISSING_CASE(type); + break; + } } static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) @@ -5998,64 +6018,7 @@ edp_detect(struct intel_dp *intel_dp) static bool ibx_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_B: - bit = SDE_PORTB_HOTPLUG; - break; - case HPD_PORT_C: - bit = SDE_PORTC_HOTPLUG; - break; - case HPD_PORT_D: - bit = SDE_PORTD_HOTPLUG; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } - - return intel_de_read(dev_priv, SDEISR) & bit; -} - -static bool cpt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_B: - bit = SDE_PORTB_HOTPLUG_CPT; - break; - case HPD_PORT_C: - bit = SDE_PORTC_HOTPLUG_CPT; - break; - case HPD_PORT_D: - bit = SDE_PORTD_HOTPLUG_CPT; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } - - return intel_de_read(dev_priv, SDEISR) & bit; -} - -static bool spt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_A: - bit = SDE_PORTA_HOTPLUG_SPT; - break; - case HPD_PORT_E: - bit = SDE_PORTE_HOTPLUG_SPT; - break; - default: - return cpt_digital_port_connected(encoder); - } + u32 bit = dev_priv->hotplug.pch_hpd[encoder->hpd_pin]; return intel_de_read(dev_priv, SDEISR) & bit; } @@ -6109,89 +6072,9 @@ static bool gm45_digital_port_connected(struct intel_encoder *encoder) static bool ilk_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin]; - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG; - else - return ibx_digital_port_connected(encoder); -} - -static bool snb_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG; - else - return cpt_digital_port_connected(encoder); -} - -static bool ivb_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB; - else - return cpt_digital_port_connected(encoder); -} - -static bool bdw_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; - else - return cpt_digital_port_connected(encoder); -} - -static bool bxt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_A: - bit = BXT_DE_PORT_HP_DDIA; - break; - case HPD_PORT_B: - bit = BXT_DE_PORT_HP_DDIB; - break; - case HPD_PORT_C: - bit = BXT_DE_PORT_HP_DDIC; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } - - return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; -} - -static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv, - enum phy phy) -{ - if (HAS_PCH_MCC(dev_priv) && phy == PHY_C) - return intel_de_read(dev_priv, SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); - - return intel_de_read(dev_priv, SDEISR) & SDE_DDI_HOTPLUG_ICP(phy); -} - -static bool icp_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - - if (intel_phy_is_combo(dev_priv, phy)) - return intel_combo_phy_connected(dev_priv, phy); - else if (intel_phy_is_tc(dev_priv, phy)) - return intel_tc_port_connected(dig_port); - else - MISSING_CASE(encoder->hpd_pin); - - return false; + return intel_de_read(dev_priv, DEISR) & bit; } /* @@ -6205,44 +6088,15 @@ static bool icp_digital_port_connected(struct intel_encoder *encoder) * * Return %true if port is connected, %false otherwise. */ -static bool __intel_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (HAS_GMCH(dev_priv)) { - if (IS_GM45(dev_priv)) - return gm45_digital_port_connected(encoder); - else - return g4x_digital_port_connected(encoder); - } - - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - return icp_digital_port_connected(encoder); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) - return spt_digital_port_connected(encoder); - else if (IS_GEN9_LP(dev_priv)) - return bxt_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 8)) - return bdw_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 7)) - return ivb_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 6)) - return snb_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 5)) - return ilk_digital_port_connected(encoder); - - MISSING_CASE(INTEL_GEN(dev_priv)); - return false; -} - bool intel_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_connected = false; intel_wakeref_t wakeref; with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref) - is_connected = __intel_digital_port_connected(encoder); + is_connected = dig_port->connected(encoder); return is_connected; } @@ -8522,6 +8376,18 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; + if (HAS_GMCH(dev_priv)) { + if (IS_GM45(dev_priv)) + intel_dig_port->connected = gm45_digital_port_connected; + else + intel_dig_port->connected = g4x_digital_port_connected; + } else { + if (port == PORT_A) + intel_dig_port->connected = ilk_digital_port_connected; + else + intel_dig_port->connected = ibx_digital_port_connected; + } + if (port != PORT_A) intel_infoframe_init(intel_dig_port); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 6659ce15a693..1702959ca079 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -16,6 +16,7 @@ struct drm_connector_state; struct drm_encoder; struct drm_i915_private; struct drm_modeset_acquire_ctx; +struct drm_dp_vsc_sdp; struct intel_connector; struct intel_crtc_state; struct intel_digital_port; @@ -108,15 +109,19 @@ int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); -void intel_dp_vsc_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state); -void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, +void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state); + const struct drm_connector_state *conn_state, + struct drm_dp_vsc_sdp *vsc); +void intel_write_dp_vsc_sdp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + struct drm_dp_vsc_sdp *vsc); void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +void intel_read_dp_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + unsigned int type); bool intel_digital_port_connected(struct intel_encoder *encoder); void intel_dp_process_phy_request(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 4d2384650383..d18b406f2a7d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -510,10 +510,6 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); - intel_enable_pipe(pipe_config); - - intel_crtc_vblank_on(pipe_config); - drm_dbg_kms(&dev_priv->drm, "active links %d\n", intel_dp->active_mst_links); @@ -524,6 +520,11 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_dp_check_act_status(&intel_dp->mst_mgr); drm_dp_update_payload_part2(&intel_dp->mst_mgr); + + intel_enable_pipe(pipe_config); + + intel_crtc_vblank_on(pipe_config); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index c6afa10e814c..1c26673acb2d 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -540,6 +540,9 @@ static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) { struct intel_fbc *fbc = &dev_priv->fbc; + if (WARN_ON(intel_fbc_hw_is_active(dev_priv))) + return; + if (!drm_mm_node_allocated(&fbc->compressed_fb)) return; @@ -564,7 +567,7 @@ void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) } static bool stride_is_valid(struct drm_i915_private *dev_priv, - unsigned int stride) + u64 modifier, unsigned int stride) { /* This should have been caught earlier. */ if (drm_WARN_ON_ONCE(&dev_priv->drm, (stride & (64 - 1)) != 0)) @@ -580,6 +583,11 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, if (IS_GEN(dev_priv, 4) && !IS_G4X(dev_priv) && stride < 2048) return false; + /* Display WA #1105: skl,bxt,kbl,cfl,glk */ + if (IS_GEN(dev_priv, 9) && + modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) + return false; + if (stride > 16384) return false; @@ -810,7 +818,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } - if (!stride_is_valid(dev_priv, cache->fb.stride)) { + if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) { fbc->no_fbc_reason = "framebuffer stride not supported"; return false; } diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index d807c5648c87..6ff7b226f0a1 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { /* FIXME actually read this from the hw */ - return enc_to_intel_lspcon(encoder)->active; + return 0; } void lspcon_resume(struct intel_lspcon *lspcon) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index a0569fdfeb16..b7a2c102648a 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -30,6 +30,7 @@ #include "intel_display_types.h" #include "intel_psr.h" #include "intel_sprite.h" +#include "intel_hdmi.h" /** * DOC: Panel Self Refresh (PSR/SRD) @@ -357,39 +358,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) } } -static void intel_psr_setup_vsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) -{ - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct dp_sdp psr_vsc; - - if (dev_priv->psr.psr2_enabled) { - /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; - if (dev_priv->psr.colorimetry_support) { - psr_vsc.sdp_header.HB2 = 0x5; - psr_vsc.sdp_header.HB3 = 0x13; - } else { - psr_vsc.sdp_header.HB2 = 0x4; - psr_vsc.sdp_header.HB3 = 0xe; - } - } else { - /* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; - psr_vsc.sdp_header.HB2 = 0x2; - psr_vsc.sdp_header.HB3 = 0x8; - } - - intel_dig_port->write_infoframe(&intel_dig_port->base, - crtc_state, - DP_SDP_VSC, &psr_vsc, sizeof(psr_vsc)); -} - static void hsw_psr_setup_aux(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -756,6 +724,8 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, if (intel_dp != dev_priv->psr.dp) return; + if (!psr_global_enabled(dev_priv)) + return; /* * HSW spec explicitly says PSR is tied to port A. * BDW+ platforms have a instance of PSR registers per transcoder but @@ -798,6 +768,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, crtc_state->has_psr = true; crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state); + crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC); } static void intel_psr_activate(struct intel_dp *intel_dp) @@ -880,9 +851,12 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = dev_priv->psr.dp; + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &intel_dig_port->base; u32 val; drm_WARN_ON(&dev_priv->drm, dev_priv->psr.enabled); @@ -921,7 +895,9 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n", dev_priv->psr.psr2_enabled ? "2" : "1"); - intel_psr_setup_vsc(intel_dp, crtc_state); + intel_dp_compute_psr_vsc_sdp(intel_dp, crtc_state, conn_state, + &dev_priv->psr.vsc); + intel_write_dp_vsc_sdp(encoder, crtc_state, &dev_priv->psr.vsc); intel_psr_enable_sink(intel_dp); intel_psr_enable_source(intel_dp, crtc_state); dev_priv->psr.enabled = true; @@ -933,11 +909,13 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, * intel_psr_enable - Enable PSR * @intel_dp: Intel DP * @crtc_state: new CRTC state + * @conn_state: new CONNECTOR state * * This function can only be called after the pipe is fully trained and enabled. */ void intel_psr_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -958,7 +936,7 @@ void intel_psr_enable(struct intel_dp *intel_dp, goto unlock; } - intel_psr_enable_locked(dev_priv, crtc_state); + intel_psr_enable_locked(dev_priv, crtc_state, conn_state); unlock: mutex_unlock(&dev_priv->psr.lock); @@ -1091,13 +1069,15 @@ static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) * intel_psr_update - Update PSR state * @intel_dp: Intel DP * @crtc_state: new CRTC state + * @conn_state: new CONNECTOR state * * This functions will update PSR states, disabling, enabling or switching PSR * version when executing fastsets. For full modeset, intel_psr_disable() and * intel_psr_enable() should be called instead. */ void intel_psr_update(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct i915_psr *psr = &dev_priv->psr; @@ -1134,7 +1114,7 @@ void intel_psr_update(struct intel_dp *intel_dp, intel_psr_disable_locked(intel_dp); if (enable) - intel_psr_enable_locked(dev_priv, crtc_state); + intel_psr_enable_locked(dev_priv, crtc_state, conn_state); unlock: mutex_unlock(&dev_priv->psr.lock); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 274fc6bb6221..b4515186d5f4 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -17,11 +17,13 @@ struct intel_dp; #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_psr_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *old_crtc_state); void intel_psr_update(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 value); void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index d3bd5e798fbc..b161c15baf86 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -238,8 +238,8 @@ static void tc_port_fixup_legacy_flag(struct intel_digital_port *dig_port, static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; + u32 isr_bit = i915->hotplug.pch_hpd[dig_port->base.hpd_pin]; u32 mask = 0; u32 val; @@ -258,7 +258,7 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx)) mask |= BIT(TC_PORT_DP_ALT); - if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) + if (intel_uncore_read(uncore, SDEISR) & isr_bit) mask |= BIT(TC_PORT_LEGACY); /* The sink can be connected only in a single mode. */ @@ -562,8 +562,9 @@ static bool intel_tc_port_needs_reset(struct intel_digital_port *dig_port) * connected ports are usable, and avoids exposing to the users objects they * can't really use. */ -bool intel_tc_port_connected(struct intel_digital_port *dig_port) +bool intel_tc_port_connected(struct intel_encoder *encoder) { + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_connected; intel_wakeref_t tc_cold_wref; diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 463f1b3c836f..b619e4736f85 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -10,8 +10,9 @@ #include <linux/types.h> struct intel_digital_port; +struct intel_encoder; -bool intel_tc_port_connected(struct intel_digital_port *dig_port); +bool intel_tc_port_connected(struct intel_encoder *encoder); u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 05c7cbe32eb4..aef7fe932d1a 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -462,7 +462,7 @@ struct bdb_general_definitions { * number = (block_size - sizeof(bdb_general_definitions))/ * defs->child_dev_size; */ - u8 devices[0]; + u8 devices[]; } __packed; /* @@ -839,7 +839,7 @@ struct bdb_mipi_config { struct bdb_mipi_sequence { u8 version; - u8 data[0]; /* up to 6 variable length blocks */ + u8 data[]; /* up to 6 variable length blocks */ } __packed; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 34be4c0ee7c5..bc0223716906 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -108,7 +108,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, obj->base.resv, NULL, true, - I915_FENCE_TIMEOUT, + i915_fence_timeout(to_i915(obj->base.dev)), I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 0598e5382a1d..d3a86a4d5c04 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -6,7 +6,6 @@ #include "i915_drv.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" -#include "gt/intel_engine_pool.h" #include "i915_gem_client_blt.h" #include "i915_gem_object_blt.h" @@ -289,8 +288,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, i915_gem_object_lock(obj); err = i915_sw_fence_await_reservation(&work->wait, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + obj->base.resv, NULL, true, 0, I915_FENCE_GFP); if (err < 0) { dma_fence_set_error(&work->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 964f73f062c1..c0d59d48e198 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -15,8 +15,8 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" -#include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" @@ -268,7 +268,9 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; + struct i915_vma *target; struct i915_request *rq; + struct i915_vma *rq_vma; u32 *rq_cmd; unsigned int rq_size; } reloc_cache; @@ -953,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; cache->rq = NULL; - cache->rq_size = 0; + cache->target = NULL; } static inline void *unmask_page(unsigned long p) @@ -975,29 +977,122 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -static void reloc_gpu_flush(struct reloc_cache *cache) +#define RELOC_TAIL 4 + +static int reloc_gpu_chain(struct reloc_cache *cache) { - struct drm_i915_gem_object *obj = cache->rq->batch->obj; + struct intel_gt_buffer_pool_node *pool; + struct i915_request *rq = cache->rq; + struct i915_vma *batch; + u32 *cmd; + int err; - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; + pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); - i915_gem_object_unpin_map(obj); + batch = i915_vma_instance(pool->obj, rq->context->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_pool; + } - intel_gt_chipset_flush(cache->rq->engine->gt); + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + if (err) + goto out_pool; - i915_request_add(cache->rq); - cache->rq = NULL; + GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); + cmd = cache->rq_cmd + cache->rq_size; + *cmd++ = MI_ARB_CHECK; + if (cache->gen >= 8) + *cmd++ = MI_BATCH_BUFFER_START_GEN8; + else if (cache->gen >= 6) + *cmd++ = MI_BATCH_BUFFER_START; + else + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(batch->node.start); + *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */ + i915_gem_object_flush_map(cache->rq_vma->obj); + i915_gem_object_unpin_map(cache->rq_vma->obj); + cache->rq_vma = NULL; + + err = intel_gt_buffer_pool_mark_active(pool, rq); + if (err == 0) { + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + } + i915_vma_unpin(batch); + if (err) + goto out_pool; + + cmd = i915_gem_object_pin_map(batch->obj, + cache->has_llc ? + I915_MAP_FORCE_WB : + I915_MAP_FORCE_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_pool; + } + + /* Return with batch mapping (cmd) still pinned */ + cache->rq_cmd = cmd; + cache->rq_size = 0; + cache->rq_vma = batch; + +out_pool: + intel_gt_buffer_pool_put(pool); + return err; +} + +static unsigned int reloc_bb_flags(const struct reloc_cache *cache) +{ + return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; +} + +static int reloc_gpu_flush(struct reloc_cache *cache) +{ + struct i915_request *rq; + int err; + + rq = fetch_and_zero(&cache->rq); + if (!rq) + return 0; + + if (cache->rq_vma) { + struct drm_i915_gem_object *obj = cache->rq_vma->obj; + + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; + + __i915_gem_object_flush_map(obj, + 0, sizeof(u32) * cache->rq_size); + i915_gem_object_unpin_map(obj); + } + + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (!err) + err = rq->engine->emit_bb_start(rq, + rq->batch->node.start, + PAGE_SIZE, + reloc_bb_flags(cache)); + if (err) + i915_request_set_error_once(rq, err); + + intel_gt_chipset_flush(rq->engine->gt); + i915_request_add(rq); + + return err; } static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(cache); - if (!cache->vaddr) return; @@ -1190,17 +1285,17 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct i915_vma *vma, + struct intel_engine_cs *engine, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1213,7 +1308,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto out_pool; } - batch = i915_vma_instance(pool->obj, vma->vm, NULL); + batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; @@ -1223,26 +1318,32 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_create(eb->context); + if (engine == eb->context->engine) { + rq = i915_request_create(eb->context); + } else { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + + rq = intel_context_create_request(ce); + intel_context_put(ce); + } if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; } - err = intel_engine_pool_mark_active(pool, rq); - if (err) - goto err_request; - - err = reloc_move_to_gpu(rq, vma); + err = intel_gt_buffer_pool_mark_active(pool, rq); if (err) goto err_request; - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -1257,6 +1358,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; cache->rq_size = 0; + cache->rq_vma = batch; /* Return with batch mapping (cmd) still pinned */ goto out_pool; @@ -1270,31 +1372,57 @@ err_unpin: err_unmap: i915_gem_object_unpin_map(pool->obj); out_pool: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); return err; } +static bool reloc_can_use_engine(const struct intel_engine_cs *engine) +{ + return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6); +} + static u32 *reloc_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(cache); + int err; if (unlikely(!cache->rq)) { - int err; + struct intel_engine_cs *engine = eb->engine; - if (!intel_engine_can_store_dword(eb->engine)) - return ERR_PTR(-ENODEV); + if (!reloc_can_use_engine(engine)) { + engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; + if (!engine) + return ERR_PTR(-ENODEV); + } - err = __reloc_gpu_alloc(eb, vma, len); + err = __reloc_gpu_alloc(eb, engine, len); if (unlikely(err)) return ERR_PTR(err); } + if (vma != cache->target) { + err = reloc_move_to_gpu(cache->rq, vma); + if (unlikely(err)) { + i915_request_set_error_once(cache->rq, err); + return ERR_PTR(err); + } + + cache->target = vma; + } + + if (unlikely(cache->rq_size + len > + PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { + err = reloc_gpu_chain(cache); + if (unlikely(err)) { + i915_request_set_error_once(cache->rq, err); + return ERR_PTR(err); + } + } + + GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; cache->rq_size += len; @@ -1312,91 +1440,138 @@ static inline bool use_reloc_gpu(struct i915_vma *vma) return !dma_resv_test_signaled_rcu(vma->resv, true); } -static u64 -relocate_entry(struct i915_vma *vma, - const struct drm_i915_gem_relocation_entry *reloc, - struct i915_execbuffer *eb, - const struct i915_vma *target) +static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) { - u64 offset = reloc->offset; - u64 target_offset = relocation_target(reloc, target); - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; + struct page *page; + unsigned long addr; - if (!eb->reloc_cache.vaddr && use_reloc_gpu(vma)) { - const unsigned int gen = eb->reloc_cache.gen; - unsigned int len; - u32 *batch; - u64 addr; + GEM_BUG_ON(vma->pages != vma->obj->mm.pages); - if (wide) - len = offset & 7 ? 8 : 5; - else if (gen >= 4) - len = 4; - else - len = 3; + page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); + addr = PFN_PHYS(page_to_pfn(page)); + GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ - batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) - goto repeat; + return addr + offset_in_page(offset); +} + +static bool __reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + const unsigned int gen = eb->reloc_cache.gen; + unsigned int len; + u32 *batch; + u64 addr; + + if (gen >= 8) + len = offset & 7 ? 8 : 5; + else if (gen >= 4) + len = 4; + else + len = 3; + + batch = reloc_gpu(eb, vma, len); + if (IS_ERR(batch)) + return false; + + addr = gen8_canonical_addr(vma->node.start + offset); + if (gen >= 8) { + if (offset & 7) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_addr); + + addr = gen8_canonical_addr(addr + 4); - addr = gen8_canonical_addr(vma->node.start + offset); - if (wide) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_offset); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - *batch++ = upper_32_bits(target_offset); - } - } else if (gen >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else if (gen >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = upper_32_bits(target_addr); } else { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_offset; + *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_addr); + *batch++ = upper_32_bits(target_addr); } - - goto out; + } else if (gen >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_addr; + } else if (IS_I965G(eb->i915)) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = vma_phys_addr(vma, offset); + *batch++ = target_addr; + } else if (gen >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_addr; + } else if (gen >= 3 && + !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = addr; + *batch++ = target_addr; + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = vma_phys_addr(vma, offset); + *batch++ = target_addr; } + return true; +} + +static bool reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + if (eb->reloc_cache.vaddr) + return false; + + if (!use_reloc_gpu(vma)) + return false; + + return __reloc_entry_gpu(eb, vma, offset, target_addr); +} + +static u64 +relocate_entry(struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc, + struct i915_execbuffer *eb, + const struct i915_vma *target) +{ + u64 target_addr = relocation_target(reloc, target); + u64 offset = reloc->offset; + + if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; + repeat: - vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + vaddr = reloc_vaddr(vma->obj, + &eb->reloc_cache, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_offset), - eb->reloc_cache.vaddr); + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); - if (wide) { - offset += sizeof(u32); - target_offset >>= 32; - wide = false; - goto repeat; + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; + } } -out: return target->node.start | UPDATE; } @@ -1596,15 +1771,20 @@ static int eb_relocate(struct i915_execbuffer *eb) /* The objects are in their final locations, apply the relocations. */ if (eb->args->flags & __EXEC_HAS_RELOC) { struct eb_vma *ev; + int flush; list_for_each_entry(ev, &eb->relocs, reloc_link) { err = eb_relocate_vma(eb, ev); if (err) - return err; + break; } + + flush = reloc_gpu_flush(&eb->reloc_cache); + if (!err) + err = flush; } - return 0; + return err; } static int eb_move_to_gpu(struct i915_execbuffer *eb) @@ -1887,7 +2067,7 @@ err_free: static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *shadow, *trampoline; unsigned int len; int err; @@ -1910,7 +2090,7 @@ static int eb_parse(struct i915_execbuffer *eb) len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - pool = intel_engine_get_pool(eb->engine, len); + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1958,7 +2138,7 @@ err_trampoline: err_shadow: i915_vma_unpin(shadow); err: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); return err; } @@ -2402,30 +2582,6 @@ static void eb_request_add(struct i915_execbuffer *eb) /* Check that the context wasn't destroyed before submission */ if (likely(!intel_context_is_closed(eb->context))) { attr = eb->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; } else { /* Serialise with context_close via the add_to_timeline */ i915_request_set_error_once(rq, -ENOENT); @@ -2451,7 +2607,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; - struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; struct i915_vma *batch; int out_fence_fd = -1; @@ -2494,30 +2649,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; - if (args->flags & I915_EXEC_FENCE_IN) { +#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) + if (args->flags & IN_FENCES) { + if ((args->flags & IN_FENCES) == IN_FENCES) + return -EINVAL; + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); if (!in_fence) return -EINVAL; } - - if (args->flags & I915_EXEC_FENCE_SUBMIT) { - if (in_fence) { - err = -EINVAL; - goto err_in_fence; - } - - exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!exec_fence) { - err = -EINVAL; - goto err_in_fence; - } - } +#undef IN_FENCES if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { err = out_fence_fd; - goto err_exec_fence; + goto err_in_fence; } } @@ -2608,14 +2755,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, } if (in_fence) { - err = i915_request_await_dma_fence(eb.request, in_fence); - if (err < 0) - goto err_request; - } - - if (exec_fence) { - err = i915_request_await_execution(eb.request, exec_fence, - eb.engine->bond_execute); + if (args->flags & I915_EXEC_FENCE_SUBMIT) + err = i915_request_await_execution(eb.request, + in_fence, + eb.engine->bond_execute); + else + err = i915_request_await_dma_fence(eb.request, + in_fence); if (err < 0) goto err_request; } @@ -2643,7 +2789,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = batch; if (batch->private) - intel_engine_pool_mark_active(batch->private, eb.request); + intel_gt_buffer_pool_mark_active(batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); @@ -2672,7 +2818,7 @@ err_batch_unpin: i915_vma_unpin(batch); err_parse: if (batch->private) - intel_engine_pool_put(batch->private); + intel_gt_buffer_pool_put(batch->private); err_vma: if (eb.trampoline) i915_vma_unpin(eb.trampoline); @@ -2684,8 +2830,6 @@ err_destroy: err_out_fence: if (out_fence_fd != -1) put_unused_fd(out_fence_fd); -err_exec_fence: - dma_fence_put(exec_fence); err_in_fence: dma_fence_put(in_fence); return err; @@ -2894,3 +3038,7 @@ end:; kvfree(exec2_list); return err; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_execbuffer.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c index 2f6100ec2608..8ab842c80f99 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c @@ -72,8 +72,8 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) 0, 0); if (i915_sw_fence_await_reservation(&stub->chain, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + obj->base.resv, NULL, true, + i915_fence_timeout(to_i915(obj->base.dev)), I915_FENCE_GFP) < 0) goto err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index b39c24dae64e..70f5f82da288 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -396,6 +396,38 @@ err: return i915_error_to_vmf_fault(ret); } +static int +vm_access(struct vm_area_struct *area, unsigned long addr, + void *buf, int len, int write) +{ + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + void *vaddr; + + if (i915_gem_object_is_readonly(obj) && write) + return -EACCES; + + addr -= area->vm_start; + if (addr >= obj->base.size) + return -EINVAL; + + /* As this is primarily for debugging, let's focus on simplicity */ + vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + if (write) { + memcpy(vaddr + addr, buf, len); + __i915_gem_object_flush_map(obj, addr, len); + } else { + memcpy(buf, vaddr + addr, len); + } + + i915_gem_object_unpin_map(obj); + + return len; +} + void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -745,12 +777,14 @@ static void vm_close(struct vm_area_struct *vma) static const struct vm_operations_struct vm_ops_gtt = { .fault = vm_fault_gtt, + .access = vm_access, .open = vm_open, .close = vm_close, }; static const struct vm_operations_struct vm_ops_cpu = { .fault = vm_fault_cpu, + .access = vm_access, .open = vm_open, .close = vm_close, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 9d1d0131f7c2..99356c00c19e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -162,9 +162,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { struct drm_i915_gem_object *obj, *on; - intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_mmap_offset *mmo, *mn; @@ -224,7 +222,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, call_rcu(&obj->rcu, __i915_gem_free_object_rcu); cond_resched(); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); } void i915_gem_flush_free_objects(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index e00792158f13..f457d7130491 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -6,8 +6,8 @@ #include "i915_drv.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" -#include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_ring.h" #include "i915_gem_clflush.h" #include "i915_gem_object_blt.h" @@ -18,7 +18,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, { struct drm_i915_private *i915 = ce->vm->i915; const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *batch; u64 offset; u64 count; @@ -33,7 +33,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(round_up(vma->size, block_size), block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_get_pool(ce->engine, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -78,10 +78,12 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, } while (rem); *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(ce->vm->gt); + i915_gem_object_flush_map(pool->obj); i915_gem_object_unpin_map(pool->obj); + intel_gt_chipset_flush(ce->vm->gt); + batch = i915_vma_instance(pool->obj, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); @@ -96,7 +98,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, return batch; out_put: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); out_pm: intel_engine_pm_put(ce->engine); return ERR_PTR(err); @@ -114,13 +116,13 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) if (unlikely(err)) return err; - return intel_engine_pool_mark_active(vma->private, rq); + return intel_gt_buffer_pool_mark_active(vma->private, rq); } void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) { i915_vma_unpin(vma); - intel_engine_pool_put(vma->private); + intel_gt_buffer_pool_put(vma->private); intel_engine_pm_put(ce->engine); } @@ -213,7 +215,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, { struct drm_i915_private *i915 = ce->vm->i915; const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *batch; u64 src_offset, dst_offset; u64 count, rem; @@ -228,7 +230,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(round_up(dst->size, block_size), block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_get_pool(ce->engine, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -289,10 +291,12 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, } while (rem); *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(ce->vm->gt); + i915_gem_object_flush_map(pool->obj); i915_gem_object_unpin_map(pool->obj); + intel_gt_chipset_flush(ce->vm->gt); + batch = i915_vma_instance(pool->obj, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); @@ -307,7 +311,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, return batch; out_put: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); out_pm: intel_engine_pm_put(ce->engine); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 243a43a87824..8bcd336a90dc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -10,7 +10,6 @@ #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" -#include "gt/intel_engine_pool.h" #include "i915_vma.h" struct drm_i915_gem_object; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 5d855fcd5c0f..af9e48ee4a33 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -391,6 +391,7 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, GEM_BUG_ON(range_overflows_t(typeof(obj->base.size), offset, size, obj->base.size)); + wmb(); /* let all previous writes be visible to coherent partners */ obj->mm.dirty = true; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index be268511cb6d..8fe3ad2ee34e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -302,35 +302,6 @@ static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val) i915_gem_object_flush_map(t->scratch.vma->obj); } -static void hexdump(const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - pr_info("*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - pr_info("[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static u64 swizzle_bit(unsigned int bit, u64 offset) { return (offset & BIT_ULL(bit)) >> (bit - 6); @@ -426,7 +397,7 @@ static int verify_buffer(const struct tiled_blits *t, pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n", repr_tiling(buf->tiling), x, y, buf->start_val); - hexdump(vaddr, 4096); + igt_hexdump(vaddr, 4096); } i915_gem_object_unpin_map(buf->vma->obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 3f6079e1dfb6..87d7d8aa080f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -158,6 +158,8 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(map); map[offset / sizeof(*map)] = v; + + __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map)); i915_gem_object_unpin_map(ctx->obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 87d264fe54b2..b81978890641 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -972,12 +972,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, goto err_batch; } - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - if (err) - goto err_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -994,6 +988,18 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto skip_request; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); + if (err) + goto skip_request; + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); @@ -1005,7 +1011,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, skip_request: i915_request_set_error_once(rq, err); -err_request: i915_request_add(rq); err_batch: i915_vma_unpin_and_release(&batch, 0); @@ -1541,10 +1546,6 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto err_unpin; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); - if (err) - goto err_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) @@ -1553,6 +1554,16 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto skip_request; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + if (err) + goto skip_request; + i915_vma_unpin(vma); i915_request_add(rq); @@ -1560,7 +1571,6 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto out_vm; skip_request: i915_request_set_error_once(rq, err); -err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); @@ -1674,10 +1684,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_unpin; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); - if (err) - goto err_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) @@ -1686,6 +1692,16 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto skip_request; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); + if (err) + goto skip_request; + i915_vma_unpin(vma); i915_request_add(rq); @@ -1708,7 +1724,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto out_vm; skip_request: i915_request_set_error_once(rq, err); -err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c new file mode 100644 index 000000000000..a49016f8ee0d --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "gt/intel_engine_pm.h" +#include "selftests/igt_flush_test.h" + +static u64 read_reloc(const u32 *map, int x, const u64 mask) +{ + u64 reloc; + + memcpy(&reloc, &map[x], sizeof(reloc)); + return reloc & mask; +} + +static int __igt_gpu_reloc(struct i915_execbuffer *eb, + struct drm_i915_gem_object *obj) +{ + const unsigned int offsets[] = { 8, 3, 0 }; + const u64 mask = + GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0); + const u32 *map = page_mask_bits(obj->mm.mapping); + struct i915_request *rq; + struct i915_vma *vma; + int err; + int i; + + vma = i915_vma_instance(obj, eb->context->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) + return err; + + /* 8-Byte aligned */ + if (!__reloc_entry_gpu(eb, vma, + offsets[0] * sizeof(u32), + 0)) { + err = -EIO; + goto unpin_vma; + } + + /* !8-Byte aligned */ + if (!__reloc_entry_gpu(eb, vma, + offsets[1] * sizeof(u32), + 1)) { + err = -EIO; + goto unpin_vma; + } + + /* Skip to the end of the cmd page */ + i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; + i -= eb->reloc_cache.rq_size; + memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, + MI_NOOP, i); + eb->reloc_cache.rq_size += i; + + /* Force batch chaining */ + if (!__reloc_entry_gpu(eb, vma, + offsets[2] * sizeof(u32), + 2)) { + err = -EIO; + goto unpin_vma; + } + + GEM_BUG_ON(!eb->reloc_cache.rq); + rq = i915_request_get(eb->reloc_cache.rq); + err = reloc_gpu_flush(&eb->reloc_cache); + if (err) + goto put_rq; + GEM_BUG_ON(eb->reloc_cache.rq); + + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); + if (err) { + intel_gt_set_wedged(eb->engine->gt); + goto put_rq; + } + + if (!i915_request_completed(rq)) { + pr_err("%s: did not wait for relocations!\n", eb->engine->name); + err = -EINVAL; + goto put_rq; + } + + for (i = 0; i < ARRAY_SIZE(offsets); i++) { + u64 reloc = read_reloc(map, offsets[i], mask); + + if (reloc != i) { + pr_err("%s[%d]: map[%d] %llx != %x\n", + eb->engine->name, i, offsets[i], reloc, i); + err = -EINVAL; + } + } + if (err) + igt_hexdump(map, 4096); + +put_rq: + i915_request_put(rq); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static int igt_gpu_reloc(void *arg) +{ + struct i915_execbuffer eb; + struct drm_i915_gem_object *scratch; + int err = 0; + u32 *map; + + eb.i915 = arg; + + scratch = i915_gem_object_create_internal(eb.i915, 4096); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + map = i915_gem_object_pin_map(scratch, I915_MAP_WC); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_scratch; + } + + for_each_uabi_engine(eb.engine, eb.i915) { + reloc_cache_init(&eb.reloc_cache, eb.i915); + memset(map, POISON_INUSE, 4096); + + intel_engine_pm_get(eb.engine); + eb.context = intel_context_create(eb.engine); + if (IS_ERR(eb.context)) { + err = PTR_ERR(eb.context); + goto err_pm; + } + + err = intel_context_pin(eb.context); + if (err) + goto err_put; + + err = __igt_gpu_reloc(&eb, scratch); + + intel_context_unpin(eb.context); +err_put: + intel_context_put(eb.context); +err_pm: + intel_engine_pm_put(eb.engine); + if (err) + break; + } + + if (igt_flush_test(eb.i915)) + err = -EIO; + +err_scratch: + i915_gem_object_put(scratch); + return err; +} + +int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gpu_reloc), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index ef7abcb3f4ee..9c7402ce5bf9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -952,6 +952,129 @@ static int igt_mmap(void *arg) return 0; } +static const char *repr_mmap_type(enum i915_mmap_type type) +{ + switch (type) { + case I915_MMAP_TYPE_GTT: return "gtt"; + case I915_MMAP_TYPE_WB: return "wb"; + case I915_MMAP_TYPE_WC: return "wc"; + case I915_MMAP_TYPE_UC: return "uc"; + default: return "unknown"; + } +} + +static bool can_access(const struct drm_i915_gem_object *obj) +{ + unsigned int flags = + I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; + + return i915_gem_object_type_has(obj, flags); +} + +static int __igt_mmap_access(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + unsigned long __user *ptr; + unsigned long A, B; + unsigned long x, y; + unsigned long addr; + int err; + + memset(&A, 0xAA, sizeof(A)); + memset(&B, 0xBB, sizeof(B)); + + if (!can_mmap(obj, type) || !can_access(obj)) + return 0; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + ptr = (unsigned long __user *)addr; + + err = __put_user(A, ptr); + if (err) { + pr_err("%s(%s): failed to write into user mmap\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + intel_gt_flush_ggtt_writes(&i915->gt); + + err = access_process_vm(current, addr, &x, sizeof(x), 0); + if (err != sizeof(x)) { + pr_err("%s(%s): access_process_vm() read failed\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + err = access_process_vm(current, addr, &B, sizeof(B), FOLL_WRITE); + if (err != sizeof(B)) { + pr_err("%s(%s): access_process_vm() write failed\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + intel_gt_flush_ggtt_writes(&i915->gt); + + err = __get_user(y, ptr); + if (err) { + pr_err("%s(%s): failed to read from user mmap\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + if (x != A || y != B) { + pr_err("%s(%s): failed to read/write values, found (%lx, %lx)\n", + obj->mm.region->name, repr_mmap_type(type), + x, y); + err = -EINVAL; + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap_access(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + static int __igt_mmap_gpu(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) @@ -1229,6 +1352,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), SUBTEST(igt_mmap), + SUBTEST(igt_mmap_access), SUBTEST(igt_mmap_revoke), SUBTEST(igt_mmap_gpu), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 772d8cba7da9..e21b5023ca7d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -83,6 +83,8 @@ igt_emit_store_dw(struct i915_vma *vma, offset += PAGE_SIZE; } *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(vma->vm->gt); @@ -126,16 +128,6 @@ int igt_gpu_fill_dw(struct intel_context *ce, goto err_batch; } - flags = 0; - if (INTEL_GEN(ce->vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -152,15 +144,17 @@ int igt_gpu_fill_dw(struct intel_context *ce, if (err) goto skip_request; - i915_request_add(rq); - - i915_vma_unpin_and_release(&batch, 0); + flags = 0; + if (INTEL_GEN(ce->vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; - return 0; + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); skip_request: - i915_request_set_error_once(rq, err); -err_request: + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); err_batch: i915_vma_unpin_and_release(&batch, 0); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 94e746af8926..699125928272 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -389,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, return err; } +static __always_inline void +write_pte(gen8_pte_t *pte, const gen8_pte_t val) +{ + /* Magic delays? Or can we refine these to flush all in one pass? */ + *pte = val; + wmb(); /* cpu to cache */ + clflush(pte); /* cache to memory */ + wmb(); /* visible to all */ +} + static __always_inline u64 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, @@ -405,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); - vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; + write_pte(&vaddr[gen8_pd_index(idx, 0)], + pte_encode | iter->dma); iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { @@ -487,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, do { GEM_BUG_ON(iter->sg->length < page_size); - vaddr[index++] = encode | iter->dma; + write_pte(&vaddr[index++], encode | iter->dma); start += page_size; iter->dma += page_size; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index cbedba857d43..d907d538176e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(engine, tl); } +static void __signal_request(struct i915_request *rq, struct list_head *signals) +{ + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + if (!__dma_fence_signal(&rq->fence)) + return; + + i915_request_get(rq); + list_add_tail(&rq->signal_link, signals); +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); @@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work) if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); + list_splice_init(&b->signaled_requests, &signal); + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { GEM_BUG_ON(list_empty(&ce->signals)); @@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work) list_entry(pos, typeof(*rq), signal_link); GEM_BUG_ON(!check_signal_order(ce, rq)); - if (!__request_completed(rq)) break; - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, - &rq->fence.flags)); - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - - if (!__dma_fence_signal(&rq->fence)) - continue; - /* * Queue for execution after dropping the signaling * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - i915_request_get(rq); - list_add_tail(&rq->signal_link, &signal); + __signal_request(rq, &signal); } /* @@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); + INIT_LIST_HEAD(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); } @@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) spin_unlock_irqrestore(&b->irq_lock, flags); } +void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + + spin_lock_irqsave(&b->irq_lock, flags); + if (!list_empty(&ce->signals)) { + struct i915_request *rq, *next; + + /* Queue for executing the signal callbacks in the irq_work */ + list_for_each_entry_safe(rq, next, &ce->signals, signal_link) { + GEM_BUG_ON(rq->engine != engine); + GEM_BUG_ON(!__request_completed(rq)); + + __signal_request(rq, &b->signaled_requests); + } + + INIT_LIST_HEAD(&ce->signals); + list_del_init(&ce->signal_link); + + irq_work_queue(&b->irq_work); + } + spin_unlock_irqrestore(&b->irq_lock, flags); +} + void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { } diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d10e52ff059f..9bf6d4989968 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -238,22 +238,35 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, + struct intel_context *ce); + void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, struct drm_printer *p); -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); - batch[0] = GFX_OP_PIPE_CONTROL(6); - batch[1] = flags; + batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; + batch[1] = flags1; batch[2] = offset; return batch + 6; } +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + return __gen8_emit_pipe_control(batch, 0, flags, offset); +} + +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + return __gen8_emit_pipe_control(batch, flags0, flags1, offset); +} + static inline u32 * -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) { /* We're using qword write, offset should be aligned to 8 bytes. */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); @@ -262,8 +275,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; + *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; *cs++ = gtt_offset; *cs++ = 0; *cs++ = value; @@ -273,6 +286,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } +static inline u32* +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags); +} + +static inline u32* +gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) +{ + return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1); +} + static inline u32 * gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { @@ -332,13 +357,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return false; - - return intel_engine_has_semaphores(engine); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c9e46c5ced43..da5b61085257 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -31,7 +31,6 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "intel_engine_user.h" #include "intel_gt.h" #include "intel_gt_requests.h" @@ -327,6 +326,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS) engine->props.preempt_timeout_ms = 0; + engine->defaults = engine->props; /* never to change again */ + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; @@ -631,8 +632,6 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init__pm(engine); intel_engine_init_retire(engine); - intel_engine_pool_init(&engine->pool); - /* Use the whole device by default */ engine->sseu = intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); @@ -829,7 +828,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); intel_engine_fini_retire(engine); - intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 446e35ac0224..d0a1078ef632 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -10,7 +10,6 @@ #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" @@ -22,18 +21,11 @@ static int __engine_unpark(struct intel_wakeref *wf) struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); struct intel_context *ce; - void *map; ENGINE_TRACE(engine, "\n"); intel_gt_pm_get(engine->gt); - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = shmem_pin_map(engine->default_state); - engine->pinned_default_state = map; - /* Discard stale context state from across idling */ ce = engine->kernel_context; if (ce) { @@ -43,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { struct drm_i915_gem_object *obj = ce->state->obj; int type = i915_coherent_map_type(engine->i915); + void *map; map = i915_gem_object_pin_map(obj, type); if (!IS_ERR(map)) { @@ -254,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); - intel_engine_pool_park(&engine->pool); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); @@ -262,12 +254,6 @@ static int __engine_park(struct intel_wakeref *wf) if (engine->park) engine->park(engine); - if (engine->pinned_default_state) { - shmem_unpin_map(engine->default_state, - engine->pinned_default_state); - engine->pinned_default_state = NULL; - } - engine->execlists.no_priolist = false; /* While gt calls i915_vma_parked(), we have to break the lock cycle */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h deleted file mode 100644 index 1bd89cadc3b7..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef INTEL_ENGINE_POOL_H -#define INTEL_ENGINE_POOL_H - -#include "intel_engine_pool_types.h" -#include "i915_active.h" -#include "i915_request.h" - -struct intel_engine_pool_node * -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); - -static inline int -intel_engine_pool_mark_active(struct intel_engine_pool_node *node, - struct i915_request *rq) -{ - return i915_active_add_request(&node->active, rq); -} - -static inline void -intel_engine_pool_put(struct intel_engine_pool_node *node) -{ - i915_active_release(&node->active); -} - -void intel_engine_pool_init(struct intel_engine_pool *pool); -void intel_engine_pool_park(struct intel_engine_pool *pool); -void intel_engine_pool_fini(struct intel_engine_pool *pool); - -#endif /* INTEL_ENGINE_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index f760e2ef285b..2b6cdf47d428 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -22,7 +22,6 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "intel_engine_pool_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_wakeref.h" @@ -181,6 +180,11 @@ struct intel_engine_execlists { u32 error_interrupt; /** + * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset + */ + u32 reset_ccid; + + /** * @no_priolist: priority lists disabled */ bool no_priolist; @@ -340,7 +344,6 @@ struct intel_engine_cs { unsigned long wakeref_serial; struct intel_wakeref wakeref; struct file *default_state; - void *pinned_default_state; struct { struct intel_ring *ring; @@ -374,6 +377,8 @@ struct intel_engine_cs { spinlock_t irq_lock; struct list_head signalers; + struct list_head signaled_requests; + struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; @@ -405,13 +410,6 @@ struct intel_engine_cs { struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct intel_engine_pool pool; - struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; struct i915_wa_list ctx_wa_list; @@ -500,10 +498,11 @@ struct intel_engine_cs { #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) -#define I915_ENGINE_IS_VIRTUAL BIT(5) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_HAS_TIMESLICES BIT(4) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) +#define I915_ENGINE_IS_VIRTUAL BIT(6) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) unsigned int flags; /* @@ -568,7 +567,7 @@ struct intel_engine_cs { unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; - } props; + } props, defaults; }; static inline bool @@ -602,6 +601,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) } static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return engine->flags & I915_ENGINE_HAS_TIMESLICES; +} + +static inline bool intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index ee10122a511e..534e435f20bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -236,9 +236,8 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ -#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 52593edf8aa0..f069551e412f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_gt_buffer_pool.h" #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -28,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); + intel_gt_init_buffer_pool(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); @@ -621,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt) intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); + intel_gt_fini_buffer_pool(gt); } void intel_gt_driver_late_release(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 397186818305..1495054a4305 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ @@ -8,15 +7,15 @@ #include "i915_drv.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" +#include "intel_gt_buffer_pool.h" -static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool) +static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool) { - return container_of(pool, struct intel_engine_cs, pool); + return container_of(pool, struct intel_gt, buffer_pool); } static struct list_head * -bucket_for_size(struct intel_engine_pool *pool, size_t sz) +bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz) { int n; @@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz) return &pool->cache_list[n]; } -static void node_free(struct intel_engine_pool_node *node) +static void node_free(struct intel_gt_buffer_pool_node *node) { i915_gem_object_put(node->obj); i915_active_fini(&node->active); kfree(node); } +static void pool_free_work(struct work_struct *wrk) +{ + struct intel_gt_buffer_pool *pool = + container_of(wrk, typeof(*pool), work.work); + struct intel_gt_buffer_pool_node *node, *next; + unsigned long old = jiffies - HZ; + bool active = false; + LIST_HEAD(stale); + int n; + + /* Free buffers that have not been used in the past second */ + spin_lock_irq(&pool->lock); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct list_head *list = &pool->cache_list[n]; + + /* Most recent at head; oldest at tail */ + list_for_each_entry_safe_reverse(node, next, list, link) { + if (time_before(node->age, old)) + break; + + list_move(&node->link, &stale); + } + active |= !list_empty(list); + } + spin_unlock_irq(&pool->lock); + + list_for_each_entry_safe(node, next, &stale, link) + node_free(node); + + if (active) + schedule_delayed_work(&pool->work, + round_jiffies_up_relative(HZ)); +} + static int pool_active(struct i915_active *ref) { - struct intel_engine_pool_node *node = + struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); struct dma_resv *resv = node->obj->base.resv; int err; @@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref) __i915_active_call static void pool_retire(struct i915_active *ref) { - struct intel_engine_pool_node *node = + struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); - struct intel_engine_pool *pool = node->pool; + struct intel_gt_buffer_pool *pool = node->pool; struct list_head *list = bucket_for_size(pool, node->obj->base.size); unsigned long flags; - GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); - i915_gem_object_unpin_pages(node->obj); /* Return this object to the shrinker pool */ i915_gem_object_make_purgeable(node->obj); spin_lock_irqsave(&pool->lock, flags); + node->age = jiffies; list_add(&node->link, list); spin_unlock_irqrestore(&pool->lock, flags); + + schedule_delayed_work(&pool->work, + round_jiffies_up_relative(HZ)); } -static struct intel_engine_pool_node * -node_create(struct intel_engine_pool *pool, size_t sz) +static struct intel_gt_buffer_pool_node * +node_create(struct intel_gt_buffer_pool *pool, size_t sz) { - struct intel_engine_cs *engine = to_engine(pool); - struct intel_engine_pool_node *node; + struct intel_gt *gt = to_gt(pool); + struct intel_gt_buffer_pool_node *node; struct drm_i915_gem_object *obj; node = kmalloc(sizeof(*node), @@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) node->pool = pool; i915_active_init(&node->active, pool_active, pool_retire); - obj = i915_gem_object_create_internal(engine->i915, sz); + obj = i915_gem_object_create_internal(gt->i915, sz); if (IS_ERR(obj)) { i915_active_fini(&node->active); kfree(node); @@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz) return node; } -static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +struct intel_gt_buffer_pool_node * +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) { - if (intel_engine_is_virtual(engine)) - engine = intel_virtual_engine_get_sibling(engine, 0); - - GEM_BUG_ON(!engine); - return &engine->pool; -} - -struct intel_engine_pool_node * -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) -{ - struct intel_engine_pool *pool = lookup_pool(engine); - struct intel_engine_pool_node *node; + struct intel_gt_buffer_pool *pool = >->buffer_pool; + struct intel_gt_buffer_pool_node *node; struct list_head *list; unsigned long flags; int ret; - GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); - size = PAGE_ALIGN(size); list = bucket_for_size(pool, size); @@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) return node; } -void intel_engine_pool_init(struct intel_engine_pool *pool) +void intel_gt_init_buffer_pool(struct intel_gt *gt) { + struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; spin_lock_init(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) INIT_LIST_HEAD(&pool->cache_list[n]); + INIT_DELAYED_WORK(&pool->work, pool_free_work); } -void intel_engine_pool_park(struct intel_engine_pool *pool) +static void pool_free_imm(struct intel_gt_buffer_pool *pool) { int n; + spin_lock_irq(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct intel_gt_buffer_pool_node *node, *next; struct list_head *list = &pool->cache_list[n]; - struct intel_engine_pool_node *node, *nn; - list_for_each_entry_safe(node, nn, list, link) + list_for_each_entry_safe(node, next, list, link) node_free(node); - INIT_LIST_HEAD(list); } + spin_unlock_irq(&pool->lock); +} + +void intel_gt_flush_buffer_pool(struct intel_gt *gt) +{ + struct intel_gt_buffer_pool *pool = >->buffer_pool; + + if (cancel_delayed_work_sync(&pool->work)) + pool_free_imm(pool); } -void intel_engine_pool_fini(struct intel_engine_pool *pool) +void intel_gt_fini_buffer_pool(struct intel_gt *gt) { + struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; + intel_gt_flush_buffer_pool(gt); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) GEM_BUG_ON(!list_empty(&pool->cache_list[n])); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h new file mode 100644 index 000000000000..42cbac003e8a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_GT_BUFFER_POOL_H +#define INTEL_GT_BUFFER_POOL_H + +#include <linux/types.h> + +#include "i915_active.h" +#include "intel_gt_buffer_pool_types.h" + +struct intel_gt; +struct i915_request; + +struct intel_gt_buffer_pool_node * +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size); + +static inline int +intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node, + struct i915_request *rq) +{ + return i915_active_add_request(&node->active, rq); +} + +static inline void +intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node) +{ + i915_active_release(&node->active); +} + +void intel_gt_init_buffer_pool(struct intel_gt *gt); +void intel_gt_flush_buffer_pool(struct intel_gt *gt); +void intel_gt_fini_buffer_pool(struct intel_gt *gt); + +#endif /* INTEL_GT_BUFFER_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index e31ee361b76f..e28bdda771ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -4,26 +4,29 @@ * Copyright © 2014-2018 Intel Corporation */ -#ifndef INTEL_ENGINE_POOL_TYPES_H -#define INTEL_ENGINE_POOL_TYPES_H +#ifndef INTEL_GT_BUFFER_POOL_TYPES_H +#define INTEL_GT_BUFFER_POOL_TYPES_H #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> #include "i915_active_types.h" struct drm_i915_gem_object; -struct intel_engine_pool { +struct intel_gt_buffer_pool { spinlock_t lock; struct list_head cache_list[4]; + struct delayed_work work; }; -struct intel_engine_pool_node { +struct intel_gt_buffer_pool_node { struct i915_active active; struct drm_i915_gem_object *obj; struct list_head link; - struct intel_engine_pool *pool; + struct intel_gt_buffer_pool *pool; + unsigned long age; }; -#endif /* INTEL_ENGINE_POOL_TYPES_H */ +#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 5097786f4375..6bdb434a442d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -150,10 +150,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - for_each_engine(engine, gt, id) - if (engine->sanitize) - engine->sanitize(engine); - intel_uc_sanitize(>->uc); for_each_engine(engine, gt, id) @@ -162,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force) intel_uc_reset_prepare(>->uc); + for_each_engine(engine, gt, id) + if (engine->sanitize) + engine->sanitize(engine); + if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); @@ -171,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (engine->reset.finish) engine->reset.finish(engine); + intel_rps_sanitize(>->rps); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(gt->uncore->rpm, wakeref); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index d02ccb735e24..0cc1d6b185dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -17,6 +17,7 @@ #include "i915_vma.h" #include "intel_engine_types.h" +#include "intel_gt_buffer_pool_types.h" #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" @@ -97,6 +98,16 @@ struct intel_gt { */ struct i915_address_space *vm; + /* + * A pool of objects to use as shadow copies of client batch buffers + * when the command parser is enabled. Prevents the client from + * modifying the batch contents after software parsing. + * + * Buffers older than 1s are periodically reaped from the pool, + * or may be reclaimed by the shrinker before then. + */ + struct intel_gt_buffer_pool buffer_pool; + struct i915_vma *scratch; }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 4311b12542fb..87e6c5bdd2dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -217,7 +217,7 @@ struct virtual_engine { /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; - struct intel_engine_cs *siblings[0]; + struct intel_engine_cs *siblings[]; }; static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) @@ -429,18 +429,7 @@ static int effective_prio(const struct i915_request *rq) if (i915_request_has_nopreempt(rq)) prio = I915_PRIORITY_UNPREEMPTABLE; - /* - * On unwinding the active request, we give it a priority bump - * if it has completed waiting on any semaphore. If we know that - * the request has already started, we can prevent an unwanted - * preempt-to-idle cycle by taking that into account now. - */ - if (__i915_request_has_started(rq)) - prio |= I915_PRIORITY_NOSEMAPHORE; - - /* Restrict mere WAIT boosts from triggering preemption */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ - return prio | __NO_PREEMPTION; + return prio; } static int queue_prio(const struct intel_engine_execlists *execlists) @@ -1271,14 +1260,11 @@ execlists_check_context(const struct intel_context *ce, static void restore_default_state(struct intel_context *ce, struct intel_engine_cs *engine) { - u32 *regs = ce->lrc_reg_state; + u32 *regs; - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_OFFSET, - engine->context_size - PAGE_SIZE); + regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); + execlists_init_reg_state(regs, ce, engine, ce->ring, true); - execlists_init_reg_state(regs, ce, engine, ce->ring, false); ce->runtime.last = intel_context_get_runtime(ce); } @@ -1372,7 +1358,7 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - unsigned int tag = ffs(engine->context_tag); + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); clear_bit(tag - 1, &engine->context_tag); @@ -1826,30 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct i915_request *rq) +static void virtual_xfer_breadcrumbs(struct virtual_engine *ve) { - struct intel_engine_cs *old = ve->siblings[0]; - - /* All unattached (rq->engine == old) must already be completed */ - - spin_lock(&old->breadcrumbs.irq_lock); - if (!list_empty(&ve->context.signal_link)) { - list_del_init(&ve->context.signal_link); - - /* - * We cannot acquire the new engine->breadcrumbs.irq_lock - * (as we are holding a breadcrumbs.irq_lock already), - * so attach this request to the signaler on submission. - * The queued irq_work will occur when we finally drop - * the engine->active.lock after dequeue. - */ - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); - - /* Also transfer the pending irq_work for the old breadcrumb. */ - intel_engine_signal_breadcrumbs(rq->engine); - } - spin_unlock(&old->breadcrumbs.irq_lock); + /* + * All the outstanding signals on ve->siblings[0] must have + * been completed, just pending the interrupt handler. As those + * signals still refer to the old sibling (via rq->engine), we must + * transfer those to the old irq_worker to keep our locking + * consistent. + */ + intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context); } #define for_each_waiter(p__, rq__) \ @@ -1883,12 +1855,16 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; /* No waiter should start before its signaler */ - GEM_BUG_ON(i915_request_started(w) && + GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && + i915_request_started(w) && !i915_request_completed(rq)); GEM_BUG_ON(i915_request_is_active(w)); @@ -2280,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, rq); + virtual_xfer_breadcrumbs(ve); /* * Move the bound engine to the top of the list @@ -3494,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; + GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); if (!i915_request_timeline(rq)->has_initial_breadcrumb) return 0; @@ -3520,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) /* Record the updated position of the request's payload */ rq->infix = intel_ring_offset(rq, cs); + __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); + + return 0; +} + +static int emit_pdps(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); + int err, i; + u32 *cs; + + GEM_BUG_ON(intel_vgpu_active(rq->i915)); + + /* + * Beware ye of the dragons, this sequence is magic! + * + * Small changes to this sequence can cause anything from + * GPU hangs to forcewake errors and machine lockups! + */ + + /* Flush any residual operations from the context load */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; + + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Ensure the LRI have landed before we invalidate & continue */ + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + u32 base = engine->mmio_base; + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + return 0; } @@ -3544,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request) * to cancel/unwind this request now. */ + if (!i915_vm_is_4lvl(request->context->vm)) { + ret = emit_pdps(request); + if (ret) + return ret; + } + /* Unconditionally invalidate GPU caches and TLBs. */ ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) @@ -3886,6 +3919,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) ring_set_paused(engine, 0); /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that * the *first* entry we check is entry 0. To complicate this further, @@ -3898,16 +3939,15 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ - /* - * Sometimes Icelake forgets to reset its pointers on a GPU reset. - * Bludgeon them with a mmio update to be sure. - */ + invalidate_csb_entries(&execlists->csb_status[0], + &execlists->csb_status[reset_value]); + + /* Once more for luck and our trusty paranoia */ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - reset_value << 8 | reset_value); + 0xffff << 16 | reset_value << 8 | reset_value); ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); + GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); } static void execlists_sanitize(struct intel_engine_cs *engine) @@ -4074,6 +4114,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) */ ring_set_paused(engine, 1); intel_engine_stop_cs(engine); + + engine->execlists.reset_ccid = active_ccid(engine); } static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) @@ -4116,7 +4158,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * its request, it was still running at the time of the * reset and will have been clobbered. */ - rq = execlists_active(execlists); + rq = active_context(engine, engine->execlists.reset_ccid); if (!rq) goto unwind; @@ -4166,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * image back to the expected values to skip over the guilty request. */ __i915_request_reset(rq, stalled); - if (!stalled) - goto out_replay; /* * We want a simple context + ring to execute the breadcrumb update. @@ -4177,9 +4217,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - restore_default_state(ce, engine); - out_replay: ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", head, ce->ring->tail); @@ -4545,6 +4582,42 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } +static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +{ + static const i915_reg_t vd[] = { + GEN12_VD0_AUX_NV, + GEN12_VD1_AUX_NV, + GEN12_VD2_AUX_NV, + GEN12_VD3_AUX_NV, + }; + + static const i915_reg_t ve[] = { + GEN12_VE0_AUX_NV, + GEN12_VE1_AUX_NV, + }; + + if (engine->class == VIDEO_DECODE_CLASS) + return vd[engine->instance]; + + if (engine->class == VIDEO_ENHANCEMENT_CLASS) + return ve[engine->instance]; + + GEM_BUG_ON("unknown aux_inv_reg\n"); + + return INVALID_MMIO_REG; +} + +static u32 * +gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(inv_reg); + *cs++ = AUX_INV; + *cs++ = MI_NOOP; + + return cs; +} + static int gen12_emit_flush_render(struct i915_request *request, u32 mode) { @@ -4553,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request, u32 *cs; flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl */ flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -4570,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -4585,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; - cs = intel_ring_begin(request, 8); + cs = intel_ring_begin(request, 8 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -4605,6 +4679,9 @@ static int gen12_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + /* hsdes: 1809175790 */ + cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); + *cs++ = preparser_disable(false); intel_ring_advance(request, cs); } @@ -4612,6 +4689,56 @@ static int gen12_emit_flush_render(struct i915_request *request, return 0; } +static int gen12_emit_flush(struct i915_request *request, u32 mode) +{ + intel_engine_mask_t aux_inv = 0; + u32 cmd, *cs; + + if (mode & EMIT_INVALIDATE) + aux_inv = request->engine->mask & ~BIT(BCS0); + + cs = intel_ring_begin(request, + 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW + 1; + + /* We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (request->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + + if (aux_inv) { /* hsdes: 1809175790 */ + struct intel_engine_cs *engine; + unsigned int tmp; + + *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); + for_each_engine_masked(engine, request->engine->gt, + aux_inv, tmp) { + *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); + *cs++ = AUX_INV; + } + *cs++ = MI_NOOP; + } + intel_ring_advance(request, cs); + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4641,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen8_emit_fini_breadcrumb_footer(struct i915_request *request, - u32 *cs) +gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4656,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request, return gen8_emit_wa_tail(request, cs); } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); + u32 addr = i915_request_active_timeline(request)->hwsp_offset; + + return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0); +} - return gen8_emit_fini_breadcrumb_footer(request, cs); +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) @@ -4681,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } static u32 * @@ -4697,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } /* @@ -4735,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4749,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); - - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 * gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_HDC_PIPELINE_FLUSH); + cs = gen12_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(request, cs); } static void execlists_park(struct intel_engine_cs *engine) @@ -4801,8 +4925,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) @@ -4845,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; - if (INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) { engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; - + engine->emit_flush = gen12_emit_flush; + } engine->set_default_submission = intel_execlists_set_default_submission; if (INTEL_GEN(engine->i915) < 11) { diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 1c1923ec8be7..ab675d35030d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -113,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) struct intel_uncore *uncore = rc6_to_uncore(rc6); struct intel_engine_cs *engine; enum intel_engine_id id; - u32 rc6_mode; /* 2b: Program RC6 thresholds.*/ if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { @@ -165,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) /* 3a: Enable RC6 */ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_RC6_ENABLE | - rc6_mode; + GEN6_RC_CTL_EI_MODE(1); /* * WaRsDisableCoarsePowerGating:skl,cnl diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 708cb7808865..f59e7875cc5e 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -219,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so, if (!so->vma) return 0; + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + if (err) + return err; + err = engine->emit_bb_start(rq, so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); @@ -233,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so, return err; } - i915_vma_lock(so->vma); - err = i915_request_await_object(rq, so->vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(so->vma, rq, 0); - i915_vma_unlock(so->vma); - - return err; + return 0; } void intel_renderstate_fini(struct intel_renderstate *so) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index c682355ec79e..2f59fc6df3c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1844,8 +1844,11 @@ void intel_rps_init(struct intel_rps *rps) if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} - if (INTEL_GEN(i915) >= 6) +void intel_rps_sanitize(struct intel_rps *rps) +{ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index af07fa5b7584..8d3c9d663662 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -13,6 +13,7 @@ struct i915_request; void intel_rps_init_early(struct intel_rps *rps); void intel_rps_init(struct intel_rps *rps); +void intel_rps_sanitize(struct intel_rps *rps); void intel_rps_driver_register(struct intel_rps *rps); void intel_rps_driver_unregister(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index e1fac1b38f27..4546284fede1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -211,9 +211,9 @@ static void cacheline_free(struct intel_timeline_cacheline *cl) i915_active_release(&cl->active); } -int intel_timeline_init(struct intel_timeline *timeline, - struct intel_gt *gt, - struct i915_vma *hwsp) +static int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) { void *vaddr; @@ -280,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt) INIT_LIST_HEAD(&timelines->hwsp_free_list); } -void intel_timeline_fini(struct intel_timeline *timeline) +static void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index c8e59a333182..4298b9ac7327 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -31,11 +31,6 @@ #include "i915_syncmap.h" #include "gt/intel_timeline_types.h" -int intel_timeline_init(struct intel_timeline *tl, - struct intel_gt *gt, - struct i915_vma *hwsp); -void intel_timeline_fini(struct intel_timeline *tl); - struct intel_timeline * intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index adddc5c93b48..90a2b9e399b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; - /* WaForceContextSaveRestoreNonCoherent:cnl */ WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - /* WaPushConstantDereferenceHoldDisable:cnl */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); @@ -872,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { wa_init_mcr(i915, wal); - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - wa_write_or(wal, - GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); - /* WaInPlaceDecompressionHang:cnl */ wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, @@ -934,10 +917,13 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); - /* Wa_1607087056:icl */ - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + /* Wa_1607087056:icl,ehl,jsl */ + if (IS_ICELAKE(i915) || + IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) { + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + } } static void diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 4a53ded7c2dd..b8dd3cbc8696 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -28,7 +28,6 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "mock_engine.h" #include "selftests/mock_request.h" @@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); - intel_engine_pool_init(&engine->pool); ce = create_kernel_context(engine); if (IS_ERR(ce)) diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index b8ed3cbe1277..52af1cee9a94 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq) /* Opencode i915_request_add() so we can keep the timeline locked. */ __i915_request_commit(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_queue(rq, NULL); timeout = i915_request_wait(rq, 0, HZ / 10); @@ -154,10 +155,7 @@ static int live_context_size(void *arg) */ for_each_engine(engine, gt, id) { - struct { - struct file *state; - void *pinned; - } saved; + struct file *saved; if (!engine->context_size) continue; @@ -171,8 +169,7 @@ static int live_context_size(void *arg) * active state is sufficient, we are only checking that we * don't use more than we planned. */ - saved.state = fetch_and_zero(&engine->default_state); - saved.pinned = fetch_and_zero(&engine->pinned_default_state); + saved = fetch_and_zero(&engine->default_state); /* Overlaps with the execlists redzone */ engine->context_size += I915_GTT_PAGE_SIZE; @@ -181,8 +178,7 @@ static int live_context_size(void *arg) engine->context_size -= I915_GTT_PAGE_SIZE; - engine->pinned_default_state = saved.pinned; - engine->default_state = saved.state; + engine->default_state = saved; intel_engine_pm_put(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 7529df92f6a2..824f99c4cc7c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -4342,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) return intel_gt_live_subtests(tests, &i915->gt); } -static void hexdump(const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - pr_info("*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - pr_info("[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static int emit_semaphore_signal(struct intel_context *ce, void *slot) { const u32 offset = @@ -4518,10 +4489,10 @@ static int live_lrc_layout(void *arg) if (err) { pr_info("%s: HW register image:\n", engine->name); - hexdump(hw, PAGE_SIZE); + igt_hexdump(hw, PAGE_SIZE); pr_info("%s: SW register image:\n", engine->name); - hexdump(lrc, PAGE_SIZE); + igt_hexdump(lrc, PAGE_SIZE); } shmem_unpin_map(engine->default_state, hw); @@ -5206,6 +5177,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) { struct i915_vma *batch; u32 dw, x, *cs, *hw; + u32 *defaults; batch = create_user_vma(ce->vm, SZ_64K); if (IS_ERR(batch)) @@ -5217,9 +5189,16 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) return ERR_CAST(cs); } + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + x = 0; dw = 0; - hw = ce->engine->pinned_default_state; + hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5250,6 +5229,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) *cs++ = MI_BATCH_BUFFER_END; + shmem_unpin_map(ce->engine->default_state, defaults); + i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); @@ -5360,6 +5341,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) { struct i915_vma *batch; u32 dw, *cs, *hw; + u32 *defaults; batch = create_user_vma(ce->vm, SZ_64K); if (IS_ERR(batch)) @@ -5371,8 +5353,15 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) return ERR_CAST(cs); } + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + dw = 0; - hw = ce->engine->pinned_default_state; + hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5400,6 +5389,8 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) *cs++ = MI_BATCH_BUFFER_END; + shmem_unpin_map(ce->engine->default_state, defaults); + i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); @@ -5467,6 +5458,7 @@ static int compare_isolation(struct intel_engine_cs *engine, { u32 x, dw, *hw, *lrc; u32 *A[2], *B[2]; + u32 *defaults; int err = 0; A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); @@ -5499,9 +5491,15 @@ static int compare_isolation(struct intel_engine_cs *engine, } lrc += LRC_STATE_OFFSET / sizeof(*hw); + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + err = -ENOMEM; + goto err_lrc; + } + x = 0; dw = 0; - hw = engine->pinned_default_state; + hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5541,6 +5539,8 @@ static int compare_isolation(struct intel_engine_cs *engine, } while (dw < PAGE_SIZE / sizeof(u32) && (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + shmem_unpin_map(ce->engine->default_state, defaults); +err_lrc: i915_gem_object_unpin_map(ce->state->obj); err_B1: i915_gem_object_unpin_map(result[1]->obj); @@ -5690,18 +5690,16 @@ static int live_lrc_isolation(void *arg) continue; intel_engine_pm_get(engine); - if (engine->pinned_default_state) { - for (i = 0; i < ARRAY_SIZE(poison); i++) { - int result; + for (i = 0; i < ARRAY_SIZE(poison); i++) { + int result; - result = __lrc_isolation(engine, poison[i]); - if (result && !err) - err = result; + result = __lrc_isolation(engine, poison[i]); + if (result && !err) + err = result; - result = __lrc_isolation(engine, ~poison[i]); - if (result && !err) - err = result; - } + result = __lrc_isolation(engine, ~poison[i]); + if (result && !err) + err = result; } intel_engine_pm_put(engine); if (igt_flush_test(gt->i915)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 9995faadd7e8..3350e7c995bc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine) *cs++ = STACK_MAGIC; *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); vma->private = intel_context_create(engine); /* dummy residuals */ diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index b89a7d7611f6..6275d69aa9cc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -56,6 +56,18 @@ static int cmp_u64(const void *A, const void *B) return 0; } +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + static struct i915_vma * create_spin_counter(struct intel_engine_cs *engine, struct i915_address_space *vm, @@ -236,8 +248,8 @@ int live_rps_clock_interval(void *arg) for_each_engine(engine, gt, id) { unsigned long saved_heartbeat; struct i915_request *rq; - ktime_t dt; u32 cycles; + u64 dt; if (!intel_engine_can_store_dword(engine)) continue; @@ -286,15 +298,29 @@ int live_rps_clock_interval(void *arg) engine->name); err = -ENODEV; } else { - preempt_disable(); - dt = ktime_get(); - cycles = -intel_uncore_read_fw(gt->uncore, - GEN6_RP_CUR_UP_EI); - udelay(1000); - dt = ktime_sub(ktime_get(), dt); - cycles += intel_uncore_read_fw(gt->uncore, - GEN6_RP_CUR_UP_EI); - preempt_enable(); + ktime_t dt_[5]; + u32 cycles_[5]; + int i; + + for (i = 0; i < 5; i++) { + preempt_disable(); + + dt_[i] = ktime_get(); + cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); + + udelay(1000); + + dt_[i] = ktime_sub(ktime_get(), dt_[i]); + cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); + + preempt_enable(); + } + + /* Use the median of both cycle/dt; close enough */ + sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); + cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; + sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); + dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); } intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); @@ -306,14 +332,14 @@ int live_rps_clock_interval(void *arg) if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); u32 expected = - intel_gt_ns_to_pm_interval(gt, ktime_to_ns(dt)); + intel_gt_ns_to_pm_interval(gt, dt); pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", - engine->name, cycles, time, ktime_to_ns(dt), expected, + engine->name, cycles, time, dt, expected, gt->clock_frequency / 1000); - if (10 * time < 8 * ktime_to_ns(dt) || - 8 * time > 10 * ktime_to_ns(dt)) { + if (10 * time < 8 * dt || + 8 * time > 10 * dt) { pr_err("%s: rps clock time does not match walltime!\n", engine->name); err = -EINVAL; @@ -701,6 +727,7 @@ int live_rps_frequency_cs(void *arg) err_vma: *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); i915_vma_put(vma); @@ -842,6 +869,7 @@ int live_rps_frequency_srm(void *arg) err_vma: *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); i915_vma_put(vma); diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 8f9b2f33dbaf..535cc1169e54 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -192,6 +192,17 @@ static struct kobj_attribute max_spin_attr = __ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store); static ssize_t +max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns); +} + +static struct kobj_attribute max_spin_def = +__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL); + +static ssize_t timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -234,6 +245,17 @@ static struct kobj_attribute timeslice_duration_attr = __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store); static ssize_t +timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms); +} + +static struct kobj_attribute timeslice_duration_def = +__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL); + +static ssize_t stop_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -273,6 +295,17 @@ static struct kobj_attribute stop_timeout_attr = __ATTR(stop_timeout_ms, 0644, stop_show, stop_store); static ssize_t +stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms); +} + +static struct kobj_attribute stop_timeout_def = +__ATTR(stop_timeout_ms, 0444, stop_default, NULL); + +static ssize_t preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -317,6 +350,18 @@ static struct kobj_attribute preempt_timeout_attr = __ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store); static ssize_t +preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms); +} + +static struct kobj_attribute preempt_timeout_def = +__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL); + +static ssize_t heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -359,6 +404,17 @@ heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute heartbeat_interval_attr = __ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store); +static ssize_t +heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms); +} + +static struct kobj_attribute heartbeat_interval_def = +__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL); + static void kobj_engine_release(struct kobject *kobj) { kfree(kobj); @@ -390,6 +446,42 @@ kobj_engine(struct kobject *dir, struct intel_engine_cs *engine) return &ke->base; } +static void add_defaults(struct kobj_engine *parent) +{ + static const struct attribute *files[] = { + &max_spin_def.attr, + &stop_timeout_def.attr, +#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL + &heartbeat_interval_def.attr, +#endif + NULL + }; + struct kobj_engine *ke; + + ke = kzalloc(sizeof(*ke), GFP_KERNEL); + if (!ke) + return; + + kobject_init(&ke->base, &kobj_engine_type); + ke->engine = parent->engine; + + if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) { + kobject_put(&ke->base); + return; + } + + if (sysfs_create_files(&ke->base, files)) + return; + + if (intel_engine_has_timeslices(ke->engine) && + sysfs_create_file(&ke->base, ×lice_duration_def.attr)) + return; + + if (intel_engine_has_preempt_reset(ke->engine) && + sysfs_create_file(&ke->base, &preempt_timeout_def.attr)) + return; +} + void intel_engines_add_sysfs(struct drm_i915_private *i915) { static const struct attribute *files[] = { @@ -433,6 +525,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) sysfs_create_file(kobj, &preempt_timeout_attr.attr)) goto err_engine; + add_defaults(container_of(kobj, struct kobj_engine, base)); + if (0) { err_object: kobject_put(kobj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index aa6d56e25a10..94eb63f309ce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine, static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority | __NO_PREEMPTION; + return rq->sched.attr.priority; } static struct i915_request *schedule_in(struct i915_request *rq, int idx) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index a3cc080a46c6..8b87f130f7f1 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -882,6 +882,47 @@ static int mocs_cmd_reg_handler(struct parser_exec_state *s, return 0; } +static int is_cmd_update_pdps(unsigned int offset, + struct parser_exec_state *s) +{ + u32 base = s->workload->engine->mmio_base; + return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0)); +} + +static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + struct intel_vgpu *vgpu = s->vgpu; + struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm; + struct intel_vgpu_mm *mm; + u64 pdps[GEN8_3LVL_PDPES]; + + if (shadow_mm->ppgtt_mm.root_entry_type == + GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + pdps[0] = (u64)cmd_val(s, 2) << 32; + pdps[0] |= cmd_val(s, 4); + + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); + if (!mm) { + gvt_vgpu_err("failed to get the 4-level shadow vm\n"); + return -EINVAL; + } + intel_vgpu_mm_get(mm); + list_add_tail(&mm->ppgtt_mm.link, + &s->workload->lri_shadow_mm); + *cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]); + *cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]); + } else { + /* Currently all guests use PML4 table and now can't + * have a guest with 3-level table but uses LRI for + * PPGTT update. So this is simply un-testable. */ + GEM_BUG_ON(1); + gvt_vgpu_err("invalid shared shadow vm type\n"); + return -EINVAL; + } + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -920,6 +961,10 @@ static int cmd_reg_handler(struct parser_exec_state *s, patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE); } + if (is_cmd_update_pdps(offset, s) && + cmd_pdp_mmio_update_handler(s, offset, index)) + return -EINVAL; + /* TODO * In order to let workload with inhibit context to generate * correct image data into memory, vregs values will be loaded to diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index dd25c3024370..158873f269b1 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -424,8 +424,6 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); out: - intel_vgpu_unpin_mm(workload->shadow_mm); - intel_vgpu_destroy_workload(workload); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d2b0d85b39bc..210016192ce7 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1900,6 +1900,7 @@ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, INIT_LIST_HEAD(&mm->ppgtt_mm.list); INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); + INIT_LIST_HEAD(&mm->ppgtt_mm.link); if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) mm->ppgtt_mm.guest_pdps[0] = pdps[0]; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 88789316807d..320b8d6ad92f 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -160,6 +160,7 @@ struct intel_vgpu_mm { struct list_head list; struct list_head lru_list; + struct list_head link; /* possible LRI shadow mm list */ } ppgtt_mm; struct { void *virtual_ggtt; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 2faf50e1b051..3e88e3b5c43a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2812,7 +2812,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_D(GAMTARBMODE, D_BDW_PLUS); #define RING_REG(base) _MMIO((base) + 0x270) - MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); + MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 35ad540622ac..c00189432b58 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,10 +58,8 @@ static void set_context_pdp_root_pointer( static void update_shadow_pdps(struct intel_vgpu_workload *workload) { - struct drm_i915_gem_object *ctx_obj = - workload->req->context->state->obj; struct execlist_ring_context *shadow_ring_context; - struct page *page; + struct intel_context *ctx = workload->req->context; if (WARN_ON(!workload->shadow_mm)) return; @@ -69,11 +67,9 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) return; - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap(page); + shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state; set_context_pdp_root_pointer(shadow_ring_context, (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); - kunmap(page); } /* @@ -646,10 +642,11 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) } } -static int prepare_workload(struct intel_vgpu_workload *workload) +static int +intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_mm *m; int ret = 0; ret = intel_vgpu_pin_mm(workload->shadow_mm); @@ -664,6 +661,52 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return -EINVAL; } + if (!list_empty(&workload->lri_shadow_mm)) { + list_for_each_entry(m, &workload->lri_shadow_mm, + ppgtt_mm.link) { + ret = intel_vgpu_pin_mm(m); + if (ret) { + list_for_each_entry_from_reverse(m, + &workload->lri_shadow_mm, + ppgtt_mm.link) + intel_vgpu_unpin_mm(m); + gvt_vgpu_err("LRI shadow ppgtt fail to pin\n"); + break; + } + } + } + + if (ret) + intel_vgpu_unpin_mm(workload->shadow_mm); + + return ret; +} + +static void +intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu_mm *m; + + if (!list_empty(&workload->lri_shadow_mm)) { + list_for_each_entry(m, &workload->lri_shadow_mm, + ppgtt_mm.link) + intel_vgpu_unpin_mm(m); + } + intel_vgpu_unpin_mm(workload->shadow_mm); +} + +static int prepare_workload(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + int ret = 0; + + ret = intel_vgpu_shadow_mm_pin(workload); + if (ret) { + gvt_vgpu_err("fail to pin shadow mm\n"); + return ret; + } + update_shadow_pdps(workload); set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]); @@ -710,7 +753,7 @@ err_shadow_wa_ctx: err_shadow_batch: release_shadow_batch_buffer(workload); err_unpin_mm: - intel_vgpu_unpin_mm(workload->shadow_mm); + intel_vgpu_shadow_mm_unpin(workload); return ret; } @@ -820,6 +863,37 @@ out: return workload; } +static void update_guest_pdps(struct intel_vgpu *vgpu, + u64 ring_context_gpa, u32 pdp[8]) +{ + u64 gpa; + int i; + + gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); + + for (i = 0; i < 8; i++) + intel_gvt_hypervisor_write_gpa(vgpu, + gpa + i * 8, &pdp[7 - i], 4); +} + +static bool +check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) +{ + if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32; + + if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) { + gvt_dbg_mm("4-level context ppgtt not match LRI command\n"); + return false; + } + return true; + } else { + /* see comment in LRI handler in cmd_parser.c */ + gvt_dbg_mm("invalid shadow mm type\n"); + return false; + } +} + static void update_guest_context(struct intel_vgpu_workload *workload) { struct i915_request *rq = workload->req; @@ -905,6 +979,15 @@ write: shadow_ring_context = (void *) ctx->lrc_reg_state; + if (!list_empty(&workload->lri_shadow_mm)) { + struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm, + struct intel_vgpu_mm, + ppgtt_mm.link); + GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m)); + update_guest_pdps(vgpu, workload->ring_context_gpa, + (void *)m->ppgtt_mm.guest_pdps); + } + #define COPY_REG(name) \ intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) @@ -1013,6 +1096,9 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->complete(workload); + intel_vgpu_shadow_mm_unpin(workload); + intel_vgpu_destroy_workload(workload); + atomic_dec(&s->running_workload_num); wake_up(&scheduler->workload_complete_wq); @@ -1406,6 +1492,16 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) release_shadow_batch_buffer(workload); release_shadow_wa_ctx(&workload->wa_ctx); + if (!list_empty(&workload->lri_shadow_mm)) { + struct intel_vgpu_mm *m, *mm; + list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm, + ppgtt_mm.link) { + list_del(&m->ppgtt_mm.link); + intel_vgpu_mm_put(m); + } + } + + GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm)); if (workload->shadow_mm) intel_vgpu_mm_put(workload->shadow_mm); @@ -1424,6 +1520,7 @@ alloc_workload(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&workload->list); INIT_LIST_HEAD(&workload->shadow_bb); + INIT_LIST_HEAD(&workload->lri_shadow_mm); init_waitqueue_head(&workload->shadow_ctx_status_wq); atomic_set(&workload->shadow_ctx_active, 0); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index bf7fc0ca4cb1..15d317f2a4a4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -87,6 +87,7 @@ struct intel_vgpu_workload { int status; struct intel_vgpu_mm *shadow_mm; + struct list_head lri_shadow_mm; /* For PPGTT load cmd */ /* different submission model may need different handler */ int (*prepare)(struct intel_vgpu_workload *); diff --git a/drivers/gpu/drm/i915/i915_config.c b/drivers/gpu/drm/i915/i915_config.c new file mode 100644 index 000000000000..b79b5f6d2cfa --- /dev/null +++ b/drivers/gpu/drm/i915/i915_config.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" + +unsigned long +i915_fence_context_timeout(const struct drm_i915_private *i915, u64 context) +{ + if (context && IS_ACTIVE(CONFIG_DRM_I915_FENCE_TIMEOUT)) + return msecs_to_jiffies_timeout(CONFIG_DRM_I915_FENCE_TIMEOUT); + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c09e1afb5f79..bca036ac6621 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,6 +32,7 @@ #include <drm/drm_debugfs.h> #include "gem/i915_gem_context.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -1303,8 +1304,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "GT awake? %s [%d]\n", yesno(dev_priv->gt.awake), atomic_read(&dev_priv->gt.wakeref.count)); - seq_printf(m, "CS timestamp frequency: %u kHz\n", - RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); + seq_printf(m, "CS timestamp frequency: %u Hz\n", + RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz); p = drm_seq_file_printer(m); for_each_uabi_engine(engine, dev_priv) @@ -1403,13 +1404,12 @@ static int i915_perf_noa_delay_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; /* * This would lead to infinite waits as we're doing timestamp * difference on the CS with only 32bits. */ - if (val > mul_u32_u32(U32_MAX, clk)) + if (i915_cs_timestamp_ns_to_ticks(i915, val) > U32_MAX) return -EINVAL; atomic64_set(&i915->perf.noa_programming_delay, val); @@ -1484,6 +1484,9 @@ gt_drop_caches(struct intel_gt *gt, u64 val) if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); + if (val & DROP_FREED) + intel_gt_flush_buffer_pool(gt); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6af69555733e..adb9bf34cf97 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -108,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20200430" -#define DRIVER_TIMESTAMP 1588234401 +#define DRIVER_DATE "20200515" +#define DRIVER_TIMESTAMP 1589543364 struct drm_i915_gem_object; @@ -148,6 +148,8 @@ enum hpd_pin { struct i915_hotplug { struct delayed_work hotplug_work; + const u32 *hpd, *pch_hpd; + struct { unsigned long last_jiffies; int count; @@ -510,6 +512,7 @@ struct i915_psr { u32 dc3co_exit_delay; struct delayed_work dc3co_work; bool force_mode_changed; + struct drm_dp_vsc_sdp vsc; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) @@ -614,13 +617,14 @@ struct i915_gem_mm { #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ -#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ -#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ - -#define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */ -#define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */ +unsigned long i915_fence_context_timeout(const struct drm_i915_private *i915, + u64 context); -#define I915_ENGINE_WEDGED_TIMEOUT (60 * HZ) /* Reset but no recovery? */ +static inline unsigned long +i915_fence_timeout(const struct drm_i915_private *i915) +{ + return i915_fence_context_timeout(i915, U64_MAX); +} /* Amount of SAGV/QGV points, BSpec precisely defines this */ #define I915_NUM_QGV_POINTS 8 @@ -1507,6 +1511,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_REVID(p, since, until) \ (IS_ICELAKE(p) && IS_REVID(p, since, until)) +#define EHL_REVID_A0 0x0 + +#define IS_EHL_REVID(p, since, until) \ + (IS_ELKHARTLAKE(p) && IS_REVID(p, since, until)) + #define TGL_REVID_A0 0x0 #define TGL_REVID_B0 0x1 #define TGL_REVID_C0 0x2 @@ -1915,4 +1924,16 @@ i915_coherent_map_type(struct drm_i915_private *i915) return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } +static inline u64 i915_cs_timestamp_ns_to_ticks(struct drm_i915_private *i915, u64 val) +{ + return DIV_ROUND_UP_ULL(val * RUNTIME_INFO(i915)->cs_timestamp_frequency_hz, + 1000000000); +} + +static inline u64 i915_cs_timestamp_ticks_to_ns(struct drm_i915_private *i915, u64 val) +{ + return div_u64(val * 1000000000, + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0ba7b1e881c0..6501939929d5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -128,6 +128,13 @@ search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + if (vma == active) { /* now seen this vma twice */ + if (flags & PIN_NONBLOCK) + break; + + active = ERR_PTR(-EAGAIN); + } + /* * We keep this list in a rough least-recently scanned order * of active elements (inactive elements are cheap to reap). @@ -143,21 +150,12 @@ search_again: * To notice when we complete one full cycle, we record the * first active element seen, before moving it to the tail. */ - if (i915_vma_is_active(vma)) { - if (vma == active) { - if (flags & PIN_NONBLOCK) - break; - - active = ERR_PTR(-EAGAIN); - } - - if (active != ERR_PTR(-EAGAIN)) { - if (!active) - active = vma; + if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { + if (!active) + active = vma; - list_move_tail(&vma->vm_link, &vm->bound_list); - continue; - } + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; } if (mark_free(&scan, vma, flags, &eviction_list)) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 54fce81d5724..d042644b9cd2 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -153,7 +153,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_CS_TIMESTAMP_FREQUENCY: - value = 1000 * RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + value = RUNTIME_INFO(i915)->cs_timestamp_frequency_hz; break; case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index fa2d82a6de04..76b80fbfb7e9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -42,7 +42,7 @@ struct i915_vma_coredump { int num_pages; int page_count; int unused; - u32 *pages[0]; + u32 *pages[]; }; struct i915_request_coredump { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index bd722d0650c8..4dc601dffc08 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -124,7 +124,6 @@ static const u32 hpd_status_i915[HPD_NUM_PINS] = { [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS, }; -/* BXT hpd list */ static const u32 hpd_bxt[HPD_NUM_PINS] = { [HPD_PORT_A] = BXT_DE_PORT_HP_DDIA, [HPD_PORT_B] = BXT_DE_PORT_HP_DDIB, @@ -168,6 +167,49 @@ static const u32 hpd_tgp[HPD_NUM_PINS] = { [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; +static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) +{ + struct i915_hotplug *hpd = &dev_priv->hotplug; + + if (HAS_GMCH(dev_priv)) { + if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || + IS_CHERRYVIEW(dev_priv)) + hpd->hpd = hpd_status_g4x; + else + hpd->hpd = hpd_status_i915; + return; + } + + if (INTEL_GEN(dev_priv) >= 12) + hpd->hpd = hpd_gen12; + else if (INTEL_GEN(dev_priv) >= 11) + hpd->hpd = hpd_gen11; + else if (IS_GEN9_LP(dev_priv)) + hpd->hpd = hpd_bxt; + else if (INTEL_GEN(dev_priv) >= 8) + hpd->hpd = hpd_bdw; + else if (INTEL_GEN(dev_priv) >= 7) + hpd->hpd = hpd_ivb; + else + hpd->hpd = hpd_ilk; + + if (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv)) + return; + + if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv)) + hpd->pch_hpd = hpd_tgp; + else if (HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv)) + hpd->pch_hpd = hpd_icp; + else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv)) + hpd->pch_hpd = hpd_spt; + else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_CPT(dev_priv)) + hpd->pch_hpd = hpd_cpt; + else if (HAS_PCH_IBX(dev_priv)) + hpd->pch_hpd = hpd_ibx; + else + MISSING_CASE(INTEL_PCH_TYPE(dev_priv)); +} + static void intel_handle_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -1504,33 +1546,27 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_status) { u32 pin_mask = 0, long_mask = 0; + u32 hotplug_trigger; - if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv)) { - u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X; - - if (hotplug_trigger) { - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug_trigger, hotplug_trigger, - hpd_status_g4x, - i9xx_port_hotplug_long_detect); + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X; + else + hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915; - intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); - } + if (hotplug_trigger) { + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, hotplug_trigger, + dev_priv->hotplug.hpd, + i9xx_port_hotplug_long_detect); - if (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X) - dp_aux_irq_handler(dev_priv); - } else { - u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915; - - if (hotplug_trigger) { - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug_trigger, hotplug_trigger, - hpd_status_i915, - i9xx_port_hotplug_long_detect); - intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); - } + intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); } + + if ((IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X) + dp_aux_irq_handler(dev_priv); } static irqreturn_t valleyview_irq_handler(int irq, void *arg) @@ -1696,8 +1732,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) } static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 hotplug_trigger, - const u32 hpd[HPD_NUM_PINS]) + u32 hotplug_trigger) { u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0; @@ -1720,8 +1755,9 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, if (!hotplug_trigger) return; - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, pch_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -1732,7 +1768,7 @@ static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK; - ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx); + ibx_hpd_irq_handler(dev_priv, hotplug_trigger); if (pch_iir & SDE_AUDIO_POWER_MASK) { int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK) >> @@ -1820,7 +1856,7 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT; - ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt); + ibx_hpd_irq_handler(dev_priv, hotplug_trigger); if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) { int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK_CPT) >> @@ -1857,22 +1893,18 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); - const u32 *pins; if (HAS_PCH_TGP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; - pins = hpd_tgp; } else if (HAS_PCH_JSP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; - pins = hpd_tgp; } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; - pins = hpd_icp; } else { drm_WARN(&dev_priv->drm, !HAS_PCH_ICP(dev_priv), "Unrecognized PCH type 0x%x\n", @@ -1881,7 +1913,6 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; - pins = hpd_icp; } if (ddi_hotplug_trigger) { @@ -1891,8 +1922,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - ddi_hotplug_trigger, - dig_hotplug_reg, pins, + ddi_hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, icp_ddi_port_hotplug_long_detect); } @@ -1903,8 +1934,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - tc_hotplug_trigger, - dig_hotplug_reg, pins, + tc_hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, tc_port_hotplug_long_detect); } @@ -1929,7 +1960,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug_trigger, dig_hotplug_reg, hpd_spt, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, spt_port_hotplug_long_detect); } @@ -1940,7 +1972,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(PCH_PORT_HOTPLUG2, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug2_trigger, dig_hotplug_reg, hpd_spt, + hotplug2_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, spt_port_hotplug2_long_detect); } @@ -1952,16 +1985,16 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) } static void ilk_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 hotplug_trigger, - const u32 hpd[HPD_NUM_PINS]) + u32 hotplug_trigger) { u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0; dig_hotplug_reg = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.hpd, ilk_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -1974,7 +2007,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG; if (hotplug_trigger) - ilk_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ilk); + ilk_hpd_irq_handler(dev_priv, hotplug_trigger); if (de_iir & DE_AUX_CHANNEL_A) dp_aux_irq_handler(dev_priv); @@ -2020,7 +2053,7 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB; if (hotplug_trigger) - ilk_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ivb); + ilk_hpd_irq_handler(dev_priv, hotplug_trigger); if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev_priv); @@ -2130,16 +2163,16 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) } static void bxt_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 hotplug_trigger, - const u32 hpd[HPD_NUM_PINS]) + u32 hotplug_trigger) { u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0; dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG); I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.hpd, bxt_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -2151,15 +2184,11 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) u32 trigger_tc = iir & GEN11_DE_TC_HOTPLUG_MASK; u32 trigger_tbt = iir & GEN11_DE_TBT_HOTPLUG_MASK; long_pulse_detect_func long_pulse_detect; - const u32 *hpd; - if (INTEL_GEN(dev_priv) >= 12) { + if (INTEL_GEN(dev_priv) >= 12) long_pulse_detect = gen12_port_hotplug_long_detect; - hpd = hpd_gen12; - } else { + else long_pulse_detect = gen11_port_hotplug_long_detect; - hpd = hpd_gen11; - } if (trigger_tc) { u32 dig_hotplug_reg; @@ -2167,8 +2196,10 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) dig_hotplug_reg = I915_READ(GEN11_TC_HOTPLUG_CTL); I915_WRITE(GEN11_TC_HOTPLUG_CTL, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tc, - dig_hotplug_reg, hpd, long_pulse_detect); + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + trigger_tc, dig_hotplug_reg, + dev_priv->hotplug.hpd, + long_pulse_detect); } if (trigger_tbt) { @@ -2177,8 +2208,10 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) dig_hotplug_reg = I915_READ(GEN11_TBT_HOTPLUG_CTL); I915_WRITE(GEN11_TBT_HOTPLUG_CTL, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tbt, - dig_hotplug_reg, hpd, long_pulse_detect); + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + trigger_tbt, dig_hotplug_reg, + dev_priv->hotplug.hpd, + long_pulse_detect); } if (pin_mask) @@ -2309,15 +2342,13 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (IS_GEN9_LP(dev_priv)) { tmp_mask = iir & BXT_DE_PORT_HOTPLUG_MASK; if (tmp_mask) { - bxt_hpd_irq_handler(dev_priv, tmp_mask, - hpd_bxt); + bxt_hpd_irq_handler(dev_priv, tmp_mask); found = true; } } else if (IS_BROADWELL(dev_priv)) { tmp_mask = iir & GEN8_PORT_DP_A_HOTPLUG; if (tmp_mask) { - ilk_hpd_irq_handler(dev_priv, - tmp_mask, hpd_bdw); + ilk_hpd_irq_handler(dev_priv, tmp_mask); found = true; } } @@ -2870,6 +2901,14 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); + + /* Wa_14010685332:icl */ + if (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) { + intel_uncore_rmw(uncore, SOUTH_CHICKEN1, + SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); + intel_uncore_rmw(uncore, SOUTH_CHICKEN1, + SBCLK_RUN_REFCLK_DIS, 0); + } } static void gen11_irq_reset(struct drm_i915_private *dev_priv) @@ -2989,13 +3028,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - if (HAS_PCH_IBX(dev_priv)) { + if (HAS_PCH_IBX(dev_priv)) hotplug_irqs = SDE_HOTPLUG_MASK; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); - } else { + else hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); - } + + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); @@ -3021,13 +3059,12 @@ static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, u32 sde_ddi_mask, u32 sde_tc_mask, - u32 ddi_enable_mask, u32 tc_enable_mask, - const u32 *pins) + u32 ddi_enable_mask, u32 tc_enable_mask) { u32 hotplug_irqs, enabled_irqs; hotplug_irqs = sde_ddi_mask | sde_tc_mask; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); @@ -3044,8 +3081,7 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), - ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), - hpd_icp); + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1)); } /* @@ -3057,8 +3093,7 @@ static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, 0, - TGP_DDI_HPD_ENABLE_MASK, 0, - hpd_tgp); + TGP_DDI_HPD_ENABLE_MASK, 0); } static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3083,11 +3118,9 @@ static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - const u32 *hpd; u32 val; - hpd = INTEL_GEN(dev_priv) >= 12 ? hpd_gen12 : hpd_gen11; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); hotplug_irqs = GEN11_DE_TC_HOTPLUG_MASK | GEN11_DE_TBT_HOTPLUG_MASK; val = I915_READ(GEN11_DE_HPD_IMR); @@ -3099,12 +3132,10 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, - TGP_DDI_HPD_ENABLE_MASK, - TGP_TC_HPD_ENABLE_MASK, hpd_tgp); + TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, - ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE_MASK, hpd_icp); + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3140,7 +3171,7 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); hotplug_irqs = SDE_HOTPLUG_MASK_SPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); @@ -3169,17 +3200,17 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bdw); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); } else if (INTEL_GEN(dev_priv) >= 7) { hotplug_irqs = DE_DP_A_HOTPLUG_IVB; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ivb); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } else { hotplug_irqs = DE_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ilk); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } @@ -3230,7 +3261,7 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); @@ -3361,7 +3392,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | GEN8_PIPE_CDCLK_CRC_DONE; u32 de_pipe_enables; - u32 de_port_masked = GEN8_AUX_CHANNEL_A; + u32 de_port_masked = gen8_de_port_aux_mask(dev_priv); u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; enum pipe pipe; @@ -3369,18 +3400,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; - if (INTEL_GEN(dev_priv) >= 9) { - de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | - GEN9_AUX_CHANNEL_D; - if (IS_GEN9_LP(dev_priv)) - de_port_masked |= BXT_DE_PORT_GMBUS; - } - - if (INTEL_GEN(dev_priv) >= 11) - de_port_masked |= ICL_AUX_CHANNEL_E; - - if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11) - de_port_masked |= CNL_AUX_CHANNEL_F; + if (IS_GEN9_LP(dev_priv)) + de_port_masked |= BXT_DE_PORT_GMBUS; de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; @@ -3934,6 +3955,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int i; + intel_hpd_init_pins(dev_priv); + intel_hpd_init_work(dev_priv); INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1faf9d6ec0a4..eb0b5be7c35d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -615,7 +615,7 @@ static const struct intel_device_info chv_info = { .has_logical_ring_contexts = 1, .display.has_gmch = 1, .dma_mask_size = 39, - .ppgtt_type = INTEL_PPGTT_ALIASING, + .ppgtt_type = INTEL_PPGTT_FULL, .ppgtt_size = 32, .has_reset_engine = 1, .has_snoop = true, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c533f569dd42..f35712d04ba4 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1612,10 +1612,7 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) struct drm_i915_gem_object *bo; struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - - DIV64_U64_ROUND_UP( - atomic64_read(&stream->perf->noa_programming_delay) * - RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, - 1000000ull); + i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay)); const u32 base = stream->engine->mmio_base; #define CS_GPR(x) GEN8_RING_CS_GPR(base, x) u32 *batch, *ts0, *cs, *jump; @@ -3485,8 +3482,7 @@ err: static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { - return div64_u64(1000000000ULL * (2ULL << exponent), - 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); + return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent); } /** @@ -4344,8 +4340,8 @@ void i915_perf_init(struct drm_i915_private *i915) if (perf->ops.enable_metric_set) { mutex_init(&perf->lock); - oa_sample_rate_hard_limit = 1000 * - (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); + oa_sample_rate_hard_limit = + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2; mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 83c6a8ccd2cb..e991a707bdb7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -442,6 +442,7 @@ static u64 count_interrupts(struct drm_i915_private *i915) static void i915_pmu_event_destroy(struct perf_event *event) { WARN_ON(event->parent); + module_put(THIS_MODULE); } static int @@ -533,8 +534,10 @@ static int i915_pmu_event_init(struct perf_event *event) if (ret) return ret; - if (!event->parent) + if (!event->parent) { + __module_get(THIS_MODULE); event->destroy = i915_pmu_event_destroy; + } return 0; } diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 732aad148881..5003a71113cb 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -24,15 +24,12 @@ enum { I915_PRIORITY_DISPLAY, }; -#define I915_USER_PRIORITY_SHIFT 2 +#define I915_USER_PRIORITY_SHIFT 0 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) -#define I915_PRIORITY_WAIT ((u8)BIT(0)) -#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1)) - /* Smallest priority value that cannot be bumped. */ #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) @@ -47,8 +44,6 @@ enum { #define I915_PRIORITY_UNPREEMPTABLE INT_MAX #define I915_PRIORITY_BARRIER INT_MAX -#define __NO_PREEMPTION (I915_PRIORITY_WAIT) - struct i915_priolist { struct list_head requests[I915_PRIORITY_COUNT]; struct rb_node node; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fd9f2904d93c..6c076a24eb82 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2557,6 +2557,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) #define BSD_HWS_PGA_GEN7 _MMIO(0x04180) +#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208) +#define GEN12_VD0_AUX_NV _MMIO(0x4218) +#define GEN12_VD1_AUX_NV _MMIO(0x4228) +#define GEN12_VD2_AUX_NV _MMIO(0x4298) +#define GEN12_VD3_AUX_NV _MMIO(0x42A8) +#define GEN12_VE0_AUX_NV _MMIO(0x4238) +#define GEN12_VE1_AUX_NV _MMIO(0x42B8) +#define AUX_INV REG_BIT(0) #define BLT_HWS_PGA_GEN7 _MMIO(0x04280) #define VEBOX_HWS_PGA_GEN7 _MMIO(0x04380) #define RING_ACTHD(base) _MMIO((base) + 0x74) @@ -8573,6 +8581,7 @@ enum { #define FDI_BC_BIFURCATION_SELECT (1 << 12) #define CHASSIS_CLK_REQ_DURATION_MASK (0xf << 8) #define CHASSIS_CLK_REQ_DURATION(x) ((x) << 8) +#define SBCLK_RUN_REFCLK_DIS (1 << 7) #define SPT_PWM_GRANULARITY (1 << 0) #define SOUTH_CHICKEN2 _MMIO(0xc2004) #define FDI_MPHY_IOSFSB_RESET_STATUS (1 << 13) @@ -9064,6 +9073,7 @@ enum { #define GEN7_PCODE_ILLEGAL_DATA 0x3 #define GEN11_PCODE_ILLEGAL_SUBCOMMAND 0x4 #define GEN11_PCODE_LOCKED 0x6 +#define GEN11_PCODE_REJECTED 0x11 #define GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10 #define GEN6_PCODE_WRITE_RC6VIDS 0x4 #define GEN6_PCODE_READ_RC6VIDS 0x5 @@ -9085,6 +9095,9 @@ enum { #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) +#define ICL_PCODE_SAGV_DE_MEM_SS_CONFIG 0xe +#define ICL_PCODE_POINTS_RESTRICTED 0x0 +#define ICL_PCODE_POINTS_RESTRICTED_MASK 0x1 #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 #define ICL_PCODE_EXIT_TCCOLD 0x12 diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 22635bbabf06..526c1e9acbd5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -23,6 +23,7 @@ */ #include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> #include <linux/irq_work.h> #include <linux/prefetch.h> #include <linux/sched.h> @@ -367,8 +368,6 @@ __await_execution(struct i915_request *rq, } spin_unlock_irq(&signal->lock); - /* Copy across semaphore status as we need the same behaviour */ - rq->sched.flags |= signal->sched.flags; return 0; } @@ -536,10 +535,8 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) { - request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; + if (request->sched.semaphores && i915_request_started(request)) request->sched.semaphores = 0; - } /* * We don't need to wake_up any waiters on request->execute, they @@ -597,15 +594,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void irq_semaphore_cb(struct irq_work *wrk) -{ - struct i915_request *rq = - container_of(wrk, typeof(*rq), semaphore_work); - - i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); - i915_request_put(rq); -} - static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -613,11 +601,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: - if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { - i915_request_get(rq); - init_irq_work(&rq->semaphore_work, irq_semaphore_cb); - irq_work_queue(&rq->semaphore_work); - } break; case FENCE_FREE: @@ -950,6 +933,7 @@ __emit_semaphore_wait(struct i915_request *to, u32 *cs; GEM_BUG_ON(INTEL_GEN(to->i915) < 8); + GEM_BUG_ON(i915_request_has_initial_breadcrumb(to)); /* We need to pin the signaler's HWSP until we are finished reading. */ err = intel_timeline_read_hwsp(from, to, &hwsp_offset); @@ -995,13 +979,26 @@ emit_semaphore_wait(struct i915_request *to, gfp_t gfp) { const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; + struct i915_sw_fence *wait = &to->submit; if (!intel_context_use_semaphores(to->context)) goto await_fence; + if (i915_request_has_initial_breadcrumb(to)) + goto await_fence; + if (!rcu_access_pointer(from->hwsp_cacheline)) goto await_fence; + /* + * If this or its dependents are waiting on an external fence + * that may fail catastrophically, then we want to avoid using + * sempahores as they bypass the fence signaling metadata, and we + * lose the fence->error propagation. + */ + if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) + goto await_fence; + /* Just emit the first semaphore we see as request space is limited. */ if (already_busywaiting(to) & mask) goto await_fence; @@ -1017,11 +1014,10 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; to->sched.semaphores |= mask; - to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; - return 0; + wait = &to->semaphore; await_fence: - return i915_sw_fence_await_dma_fence(&to->submit, + return i915_sw_fence_await_dma_fence(wait, &from->fence, 0, I915_FENCE_GFP); } @@ -1034,11 +1030,15 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) GEM_BUG_ON(to == from); GEM_BUG_ON(to->timeline == from->timeline); - if (i915_request_completed(from)) + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); return 0; + } if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, &from->sched); + ret = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_EXTERNAL); if (ret < 0) return ret; } @@ -1052,15 +1052,56 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) if (ret < 0) return ret; - if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) { - ret = i915_sw_fence_await_dma_fence(&to->semaphore, - &from->fence, 0, - I915_FENCE_GFP); - if (ret < 0) - return ret; + return 0; +} + +static void mark_external(struct i915_request *rq) +{ + /* + * The downside of using semaphores is that we lose metadata passing + * along the signaling chain. This is particularly nasty when we + * need to pass along a fatal error such as EFAULT or EDEADLK. For + * fatal errors we want to scrub the request before it is executed, + * which means that we cannot preload the request onto HW and have + * it wait upon a semaphore. + */ + rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; +} + +static int +__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + mark_external(rq); + return i915_sw_fence_await_dma_fence(&rq->submit, fence, + i915_fence_context_timeout(rq->i915, + fence->context), + I915_FENCE_GFP); +} + +static int +i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence *iter; + int err = 0; + + if (!to_dma_fence_chain(fence)) + return __i915_request_await_external(rq, fence); + + dma_fence_chain_for_each(iter, fence) { + struct dma_fence_chain *chain = to_dma_fence_chain(iter); + + if (!dma_fence_is_i915(chain->fence)) { + err = __i915_request_await_external(rq, iter); + break; + } + + err = i915_request_await_dma_fence(rq, chain->fence); + if (err < 0) + break; } - return 0; + dma_fence_put(iter); + return err; } int @@ -1110,9 +1151,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) if (dma_fence_is_i915(fence)) ret = i915_request_await_request(rq, to_request(fence)); else - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - fence->context ? I915_FENCE_TIMEOUT : 0, - I915_FENCE_GFP); + ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; @@ -1192,7 +1231,8 @@ __i915_request_await_execution(struct i915_request *to, * immediate execution, and so we must wait until it reaches the * active slot. */ - if (intel_engine_has_semaphores(to->engine)) { + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); if (err < 0) return err; @@ -1200,7 +1240,9 @@ __i915_request_await_execution(struct i915_request *to, /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, &from->sched); + err = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_WEAK); if (err < 0) return err; } @@ -1236,6 +1278,9 @@ i915_request_await_execution(struct i915_request *rq, continue; } + if (fence->context == rq->fence.context) + continue; + /* * We don't squash repeated fence dependencies here as we * want to run our callback in all cases. @@ -1246,9 +1291,7 @@ i915_request_await_execution(struct i915_request *rq, to_request(fence), hook); else - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - I915_FENCE_TIMEOUT, - GFP_KERNEL); + ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; } while (--nchild); @@ -1456,11 +1499,6 @@ void i915_request_add(struct i915_request *rq) attr = ctx->sched; rcu_read_unlock(); - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; - __i915_request_queue(rq, &attr); mutex_unlock(&tl->mutex); @@ -1645,7 +1683,6 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) intel_rps_boost(rq); - i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } wait.tsk = current; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d8ce908e1346..8ec7ee4dbadc 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -84,19 +84,26 @@ enum { I915_FENCE_FLAG_PQUEUE, /* - * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list + * I915_FENCE_FLAG_HOLD - this request is currently on hold * - * Internal bookkeeping used by the breadcrumb code to track when - * a request is on the various signal_list. + * This request has been suspended, pending an ongoing investigation. */ - I915_FENCE_FLAG_SIGNAL, + I915_FENCE_FLAG_HOLD, /* - * I915_FENCE_FLAG_HOLD - this request is currently on hold + * I915_FENCE_FLAG_INITIAL_BREADCRUMB - this request has the initial + * breadcrumb that marks the end of semaphore waits and start of the + * user payload. + */ + I915_FENCE_FLAG_INITIAL_BREADCRUMB, + + /* + * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * - * This request has been suspended, pending an ongoing investigation. + * Internal bookkeeping used by the breadcrumb code to track when + * a request is on the various signal_list. */ - I915_FENCE_FLAG_HOLD, + I915_FENCE_FLAG_SIGNAL, /* * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted @@ -209,7 +216,6 @@ struct i915_request { }; struct list_head execute_cb; struct i915_sw_fence semaphore; - struct irq_work semaphore_work; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -390,6 +396,12 @@ static inline bool i915_request_in_priority_queue(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } +static inline bool +i915_request_has_initial_breadcrumb(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 37cfcf5b321b..f4ea318781f0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -51,11 +51,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) GEM_BUG_ON(rb_first_cached(&execlists->queue) != rb_first(&execlists->queue.rb_root)); - last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; + last_prio = INT_MAX; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); - GEM_BUG_ON(p->priority >= last_prio); + GEM_BUG_ON(p->priority > last_prio); last_prio = p->priority; GEM_BUG_ON(!p->used); @@ -174,7 +174,7 @@ sched_lock_engine(const struct i915_sched_node *node, static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority | __NO_PREEMPTION; + return rq->sched.attr.priority; } static inline bool need_preempt(int prio, int active) @@ -434,25 +434,12 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, dep->waiter = node; dep->flags = flags; - /* Keep track of whether anyone on this chain has a semaphore */ - if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && - !node_started(signal)) - node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; - /* All set, now publish. Beware the lockless walkers. */ list_add_rcu(&dep->signal_link, &node->signalers_list); list_add_rcu(&dep->wait_link, &signal->waiters_list); - /* - * As we do not allow WAIT to preempt inflight requests, - * once we have executed a request, along with triggering - * any execution callbacks, we must preserve its ordering - * within the non-preemptible FIFO. - */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); - if (flags & I915_DEPENDENCY_EXTERNAL) - __bump_priority(signal, __NO_PREEMPTION); - + /* Propagate the chains */ + node->flags |= signal->flags; ret = true; } @@ -462,7 +449,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, } int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal) + struct i915_sched_node *signal, + unsigned long flags) { struct i915_dependency *dep; @@ -473,8 +461,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, local_bh_disable(); if (!__i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_EXTERNAL | - I915_DEPENDENCY_ALLOC)) + flags | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); local_bh_enable(); /* kick submission tasklet */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index d1dc4efef77b..6f0bf00fc569 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, unsigned long flags); int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal); + struct i915_sched_node *signal, + unsigned long flags); void i915_sched_node_fini(struct i915_sched_node *node); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index d18e70550054..f72e6c397b08 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -65,7 +65,7 @@ struct i915_sched_node { struct list_head link; struct i915_sched_attr attr; unsigned int flags; -#define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0) +#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0) intel_engine_mask_t semaphores; }; @@ -78,6 +78,7 @@ struct i915_dependency { unsigned long flags; #define I915_DEPENDENCY_ALLOC BIT(0) #define I915_DEPENDENCY_EXTERNAL BIT(1) +#define I915_DEPENDENCY_WEAK BIT(2) }; #endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 98bcb6fa0ab4..d53d207ab6eb 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -133,4 +133,6 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...); #define igt_timeout(t, fmt, ...) \ __igt_timeout((t), KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +void igt_hexdump(const void *buf, size_t len); + #endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7daf81f55c90..295b9829e2da 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -546,13 +546,11 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, cb->fence = fence; i915_sw_fence_await(fence); - ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake); - if (ret == 0) { - ret = 1; - } else { + ret = 1; + if (dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake)) { + /* fence already signaled */ __dma_i915_sw_fence_wake(dma, &cb->base); - if (ret == -ENOENT) /* fence already signaled */ - ret = 0; + ret = 0; } return ret; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 91bb7891c70c..8a635bd4d5d8 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -136,8 +136,8 @@ void intel_device_info_print_runtime(const struct intel_runtime_info *info, sseu_dump(&info->sseu, p); drm_printf(p, "rawclk rate: %u kHz\n", info->rawclk_freq); - drm_printf(p, "CS timestamp frequency: %u kHz\n", - info->cs_timestamp_frequency_khz); + drm_printf(p, "CS timestamp frequency: %u Hz\n", + info->cs_timestamp_frequency_hz); } static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, @@ -678,12 +678,12 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; - base_freq *= 1000; + base_freq *= 1000000; frac_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); - frac_freq = 1000 / (frac_freq + 1); + frac_freq = 1000000 / (frac_freq + 1); return base_freq + frac_freq; } @@ -691,8 +691,8 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, u32 rpm_config_reg) { - u32 f19_2_mhz = 19200; - u32 f24_mhz = 24000; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; u32 crystal_clock = (rpm_config_reg & GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; @@ -711,10 +711,10 @@ static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, u32 rpm_config_reg) { - u32 f19_2_mhz = 19200; - u32 f24_mhz = 24000; - u32 f25_mhz = 25000; - u32 f38_4_mhz = 38400; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; + u32 f25_mhz = 25000000; + u32 f38_4_mhz = 38400000; u32 crystal_clock = (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; @@ -736,9 +736,9 @@ static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) { - u32 f12_5_mhz = 12500; - u32 f19_2_mhz = 19200; - u32 f24_mhz = 24000; + u32 f12_5_mhz = 12500000; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; if (INTEL_GEN(dev_priv) <= 4) { /* PRMs say: @@ -747,7 +747,7 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) * hclks." (through the “Clocking Configuration” * (“CLKCFG”) MCHBAR register) */ - return RUNTIME_INFO(dev_priv)->rawclk_freq / 16; + return RUNTIME_INFO(dev_priv)->rawclk_freq * 1000 / 16; } else if (INTEL_GEN(dev_priv) <= 8) { /* PRMs say: * @@ -1048,11 +1048,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) drm_dbg(&dev_priv->drm, "rawclk rate: %d kHz\n", runtime->rawclk_freq); /* Initialize command stream timestamp frequency */ - runtime->cs_timestamp_frequency_khz = + runtime->cs_timestamp_frequency_hz = read_timestamp_frequency(dev_priv); - if (runtime->cs_timestamp_frequency_khz) { + if (runtime->cs_timestamp_frequency_hz) { runtime->cs_timestamp_period_ns = - div_u64(1e6, runtime->cs_timestamp_frequency_khz); + i915_cs_timestamp_ticks_to_ns(dev_priv, 1); drm_dbg(&dev_priv->drm, "CS timestamp wraparound in %lldms\n", div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 69c9257c6c6a..62e03ffa377e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -221,7 +221,7 @@ struct intel_runtime_info { u32 rawclk_freq; - u32 cs_timestamp_frequency_khz; + u32 cs_timestamp_frequency_hz; u32 cs_timestamp_period_ns; /* Media engine access to SFC per instance */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bfb180fe8047..696491d71a1d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -43,6 +43,7 @@ #include "i915_fixed.h" #include "i915_irq.h" #include "i915_trace.h" +#include "display/intel_bw.h" #include "intel_pm.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" @@ -3637,10 +3638,6 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - /* HACK! */ - if (IS_GEN(dev_priv, 12)) - return false; - return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } @@ -3760,34 +3757,116 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) void intel_sagv_pre_plane_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_bw_state *new_bw_state; + const struct intel_bw_state *old_bw_state; + u32 new_mask = 0; + + /* + * Just return if we can't control SAGV or don't have it. + * This is different from situation when we have SAGV but just can't + * afford it due to DBuf limitation - in case if SAGV is completely + * disabled in a BIOS, we are not even allowed to send a PCode request, + * as it will throw an error. So have to check it here. + */ + if (!intel_has_sagv(dev_priv)) + return; + + new_bw_state = intel_atomic_get_new_bw_state(state); + if (!new_bw_state) + return; - if (!intel_can_enable_sagv(state)) + if (INTEL_GEN(dev_priv) < 11 && !intel_can_enable_sagv(dev_priv, new_bw_state)) { intel_disable_sagv(dev_priv); + return; + } + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * Nothing to mask + */ + if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) + return; + + new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; + + /* + * If new mask is zero - means there is nothing to mask, + * we can only unmask, which should be done in unmask. + */ + if (!new_mask) + return; + + /* + * Restrict required qgv points before updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(dev_priv, new_mask); } void intel_sagv_post_plane_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_bw_state *new_bw_state; + const struct intel_bw_state *old_bw_state; + u32 new_mask = 0; + + /* + * Just return if we can't control SAGV or don't have it. + * This is different from situation when we have SAGV but just can't + * afford it due to DBuf limitation - in case if SAGV is completely + * disabled in a BIOS, we are not even allowed to send a PCode request, + * as it will throw an error. So have to check it here. + */ + if (!intel_has_sagv(dev_priv)) + return; + + new_bw_state = intel_atomic_get_new_bw_state(state); + if (!new_bw_state) + return; - if (intel_can_enable_sagv(state)) + if (INTEL_GEN(dev_priv) < 11 && intel_can_enable_sagv(dev_priv, new_bw_state)) { intel_enable_sagv(dev_priv); + return; + } + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * Nothing to unmask + */ + if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) + return; + + new_mask = new_bw_state->qgv_points_mask; + + /* + * Allow required qgv points after updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(dev_priv, new_mask); } -static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->uapi.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_plane *plane; + const struct intel_plane_state *plane_state; int level, latency; + if (!intel_has_sagv(dev_priv)) + return false; + if (!crtc_state->hw.active) return true; if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) return false; - for_each_intel_plane_on_crtc(dev, crtc, plane) { + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { const struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane->id]; @@ -3803,7 +3882,7 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state latency = dev_priv->wm.skl_latency[level]; if (skl_needs_memory_bw_wa(dev_priv) && - plane->base.state->fb->modifier == + plane_state->uapi.fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3819,35 +3898,110 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state return true; } -bool intel_can_enable_sagv(struct intel_atomic_state *state) +static bool tgl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum plane_id plane_id; + + if (!crtc_state->hw.active) + return true; + + for_each_plane_id_on_crtc(crtc, plane_id) { + const struct skl_ddb_entry *plane_alloc = + &crtc_state->wm.skl.plane_ddb_y[plane_id]; + const struct skl_plane_wm *wm = + &crtc_state->wm.skl.optimal.planes[plane_id]; + + if (skl_ddb_entry_size(plane_alloc) < wm->sagv_wm0.min_ddb_alloc) + return false; + } + + return true; +} + +static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (INTEL_GEN(dev_priv) >= 12) + return tgl_crtc_can_enable_sagv(crtc_state); + else + return skl_crtc_can_enable_sagv(crtc_state); +} + +bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, + const struct intel_bw_state *bw_state) +{ + if (INTEL_GEN(dev_priv) < 11 && + bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) + return false; + + return bw_state->pipe_sagv_reject == 0; +} + +static int intel_compute_sagv_mask(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); + int ret; struct intel_crtc *crtc; - const struct intel_crtc_state *crtc_state; - enum pipe pipe; + struct intel_crtc_state *new_crtc_state; + struct intel_bw_state *new_bw_state = NULL; + const struct intel_bw_state *old_bw_state = NULL; + int i; - if (!intel_has_sagv(dev_priv)) - return false; + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); - /* - * If there are no active CRTCs, no additional checks need be performed - */ - if (hweight8(state->active_pipes) == 0) - return true; + old_bw_state = intel_atomic_get_old_bw_state(state); - /* - * SKL+ workaround: bspec recommends we disable SAGV when we have - * more then one pipe enabled - */ - if (hweight8(state->active_pipes) > 1) - return false; + if (intel_crtc_can_enable_sagv(new_crtc_state)) + new_bw_state->pipe_sagv_reject &= ~BIT(crtc->pipe); + else + new_bw_state->pipe_sagv_reject |= BIT(crtc->pipe); + } + + if (!new_bw_state) + return 0; + + new_bw_state->active_pipes = + intel_calc_active_pipes(state, old_bw_state->active_pipes); + + if (new_bw_state->active_pipes != old_bw_state->active_pipes) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal; - /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(state->active_pipes) - 1; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = to_intel_crtc_state(crtc->base.state); + /* + * We store use_sagv_wm in the crtc state rather than relying on + * that bw state since we have no convenient way to get at the + * latter from the plane commit hooks (especially in the legacy + * cursor case) + */ + pipe_wm->use_sagv_wm = INTEL_GEN(dev_priv) >= 12 && + intel_can_enable_sagv(dev_priv, new_bw_state); + } + + if (intel_can_enable_sagv(dev_priv, new_bw_state) != + intel_can_enable_sagv(dev_priv, old_bw_state)) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } - return intel_crtc_can_enable_sagv(crtc_state); + return 0; } /* @@ -4574,6 +4728,20 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, return total_data_rate; } +static const struct skl_wm_level * +skl_plane_wm_level(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, + int level) +{ + const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + + if (level == 0 && pipe_wm->use_sagv_wm) + return &wm->sagv_wm0; + + return &wm->wm[level]; +} + static int skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) { @@ -4610,7 +4778,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) plane_data_rate, uv_plane_data_rate); - skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate, alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); @@ -4810,7 +4977,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate, wm_intermediate_val = latency * pixel_rate * cpp; ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) ret = add_fixed16_u32(ret, 1); return ret; @@ -4945,18 +5112,19 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_min_scanlines, wp->dbuf_block_size); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) interm_pbpl++; wp->plane_blocks_per_line = div_fixed16(interm_pbpl, wp->y_min_scanlines); - } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) { - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, - wp->dbuf_block_size); - wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, - wp->dbuf_block_size) + 1; + wp->dbuf_block_size); + + if (!wp->x_tiled || + INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + interm_pbpl++; + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } @@ -5022,7 +5190,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * WaIncreaseLatencyIPCEnabled: kbl,cfl * Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) || + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && dev_priv->ipc_enabled) latency += 4; @@ -5145,6 +5313,20 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state, } } +static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, + const struct skl_wm_params *wm_params, + struct skl_plane_wm *plane_wm) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct skl_wm_level *sagv_wm = &plane_wm->sagv_wm0; + struct skl_wm_level *levels = plane_wm->wm; + unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + + skl_compute_plane_wm(crtc_state, 0, latency, + wm_params, &levels[0], + sagv_wm); +} + static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, const struct skl_wm_params *wp, struct skl_plane_wm *wm) @@ -5197,10 +5379,6 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, trans_offset_b; } else { res_blocks = wm0_sel_res_b + trans_offset_b; - - /* WA BUG:1938466 add one block for non y-tile planes */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0)) - res_blocks += 1; } /* @@ -5216,6 +5394,8 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, enum plane_id plane_id, int color_plane) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; struct skl_wm_params wm_params; int ret; @@ -5226,6 +5406,10 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, return ret; skl_compute_wm_levels(crtc_state, &wm_params, wm->wm); + + if (INTEL_GEN(dev_priv) >= 12) + tgl_compute_sagv_wm(crtc_state, &wm_params, wm); + skl_compute_transition_wm(crtc_state, &wm_params, wm); return 0; @@ -5385,8 +5569,12 @@ void skl_write_plane_wm(struct intel_plane *plane, &crtc_state->wm.skl.plane_ddb_uv[plane_id]; for (level = 0; level <= max_level; level++) { + const struct skl_wm_level *wm_level; + + wm_level = skl_plane_wm_level(crtc_state, plane_id, level); + skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), - &wm->wm[level]); + wm_level); } skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id), &wm->trans_wm); @@ -5419,8 +5607,12 @@ void skl_write_cursor_wm(struct intel_plane *plane, &crtc_state->wm.skl.plane_ddb_y[plane_id]; for (level = 0; level <= max_level; level++) { + const struct skl_wm_level *wm_level; + + wm_level = skl_plane_wm_level(crtc_state, plane_id, level); + skl_write_wm_level(dev_priv, CUR_WM(pipe, level), - &wm->wm[level]); + wm_level); } skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); @@ -5584,23 +5776,25 @@ skl_print_wm_changes(struct intel_atomic_state *state) continue; drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm\n", plane->base.base.id, plane->base.name, enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), enast(old_wm->trans_wm.plane_en), + enast(old_wm->sagv_wm0.plane_en), enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), - enast(new_wm->trans_wm.plane_en)); + enast(new_wm->trans_wm.plane_en), + enast(new_wm->sagv_wm0.plane_en)); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" - " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" + " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", plane->base.base.id, plane->base.name, enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, @@ -5611,6 +5805,7 @@ skl_print_wm_changes(struct intel_atomic_state *state) enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, + enast(old_wm->sagv_wm0.ignore_lines), old_wm->sagv_wm0.plane_res_l, enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, @@ -5620,37 +5815,42 @@ skl_print_wm_changes(struct intel_atomic_state *state) enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l, + enast(new_wm->sagv_wm0.ignore_lines), new_wm->sagv_wm0.plane_res_l); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, old_wm->trans_wm.plane_res_b, + old_wm->sagv_wm0.plane_res_b, new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, - new_wm->trans_wm.plane_res_b); + new_wm->trans_wm.plane_res_b, + new_wm->sagv_wm0.plane_res_b); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, old_wm->trans_wm.min_ddb_alloc, + old_wm->sagv_wm0.min_ddb_alloc, new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, - new_wm->trans_wm.min_ddb_alloc); + new_wm->trans_wm.min_ddb_alloc, + new_wm->sagv_wm0.min_ddb_alloc); } } } @@ -5811,6 +6011,10 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret) return ret; + ret = intel_compute_sagv_mask(state); + if (ret) + return ret; + /* * skl_compute_ddb() will have adjusted the final watermarks * based on how much ddb is available. Now we can actually @@ -5939,6 +6143,9 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, skl_wm_level_from_reg_val(val, &wm->wm[level]); } + if (INTEL_GEN(dev_priv) >= 12) + wm->sagv_wm0 = wm->wm[0]; + if (plane_id != PLANE_CURSOR) val = I915_READ(PLANE_WM_TRANS(pipe, plane_id)); else @@ -6916,9 +7123,6 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE); /* ReadHitWriteOnlyDisable:cnl */ val |= RCCUNIT_CLKGATE_DIS; - /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) - val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); /* Wa_2201832410:cnl */ diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 9a6036ab0f90..614ac7f8d4cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include "i915_reg.h" +#include "display/intel_bw.h" struct drm_device; struct drm_i915_private; @@ -41,7 +42,8 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); -bool intel_can_enable_sagv(struct intel_atomic_state *state); +bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, + const struct intel_bw_state *bw_state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 14daf6af6854..916ccd1c0e96 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -371,6 +371,8 @@ static int gen7_check_mailbox_status(u32 mbox) return -ENXIO; case GEN11_PCODE_LOCKED: return -EBUSY; + case GEN11_PCODE_REJECTED: + return -EACCES; case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: return -EOVERFLOW; default: @@ -429,7 +431,7 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, mutex_lock(&i915->sb_lock); err = __sandybridge_pcode_rw(i915, mbox, val, val1, - 500, 0, + 500, 20, true); mutex_unlock(&i915->sb_lock); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 078f5b2eb8a4..a61cb8ca4d50 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -899,11 +899,6 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } -#define HAS_FWTABLE(dev_priv) \ - (INTEL_GEN(dev_priv) >= 9 || \ - IS_CHERRYVIEW(dev_priv) || \ - IS_VALLEYVIEW(dev_priv)) - /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __vlv_fw_ranges[] = { GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER), diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 6942487c14a9..ec776591e1cf 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -149,8 +149,7 @@ static bool check_hw_restrictions(struct drm_i915_private *i915, guc_wopcm_size)) return false; - if ((IS_GEN(i915, 9) || - IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0)) && + if (IS_GEN(i915, 9) && !gen9_check_huc_fw_fits(i915, guc_wopcm_size, huc_fw_size)) return false; diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 0a953bfc0585..5dd5d81646c4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -37,6 +37,7 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) +selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 5608fab98d5d..8eb3108f1767 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -221,8 +221,7 @@ static int live_noa_delay(void *arg) goto out; } - if (rq->engine->emit_init_breadcrumb && - i915_request_timeline(rq)->has_initial_breadcrumb) { + if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); if (err) { i915_request_add(rq); @@ -263,8 +262,7 @@ static int live_noa_delay(void *arg) delay = intel_read_status_page(stream->engine, 0x102); delay -= intel_read_status_page(stream->engine, 0x100); - delay = div_u64(mul_u32_u32(delay, 1000 * 1000), - RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); + delay = i915_cs_timestamp_ticks_to_ns(i915, delay); pr_info("GPU delay: %uns, expected %lluns\n", delay, expected); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 15b1ca9f7a01..6014e8dfcbb1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -816,10 +816,12 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(batch->vm->gt); + __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); i915_gem_object_unpin_map(batch->obj); + intel_gt_chipset_flush(batch->vm->gt); + return 0; } @@ -865,13 +867,6 @@ static int live_all_engines(void *arg) goto out_request; } - err = engine->emit_bb_start(request[idx], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[idx]->batch = batch; - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, 0); if (err == 0) @@ -879,6 +874,13 @@ static int live_all_engines(void *arg) i915_vma_unlock(batch); GEM_BUG_ON(err); + err = engine->emit_bb_start(request[idx], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[idx]->batch = batch; + i915_request_get(request[idx]); i915_request_add(request[idx]); idx++; @@ -993,13 +995,6 @@ static int live_sequential_engines(void *arg) } } - err = engine->emit_bb_start(request[idx], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[idx]->batch = batch; - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, false); @@ -1008,6 +1003,13 @@ static int live_sequential_engines(void *arg) i915_vma_unlock(batch); GEM_BUG_ON(err); + err = engine->emit_bb_start(request[idx], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[idx]->batch = batch; + i915_request_get(request[idx]); i915_request_add(request[idx]); @@ -1060,9 +1062,12 @@ out_request: I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(engine->gt); + __i915_gem_object_flush_map(request[idx]->batch->obj, + 0, sizeof(*cmd)); i915_gem_object_unpin_map(request[idx]->batch->obj); + + intel_gt_chipset_flush(engine->gt); } i915_vma_put(request[idx]->batch); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index d3bf9eefb682..1bc11c09faef 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -396,6 +396,35 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...) return true; } +void igt_hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_filter, i915_selftest.filter, charp, 0400); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 9ad4ab088466..e35ba5f9e73f 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -169,8 +169,7 @@ igt_spinner_create_request(struct igt_spinner *spin, intel_gt_chipset_flush(engine->gt); - if (engine->emit_init_breadcrumb && - i915_request_timeline(rq)->has_initial_breadcrumb) { + if (engine->emit_init_breadcrumb) { err = engine->emit_init_breadcrumb(rq); if (err) goto cancel_rq; |