From 2af104290da5e4858e8caefa068827d7392c6a09 Mon Sep 17 00:00:00 2001 From: Tomohito Esaki Date: Fri, 28 Jan 2022 15:08:34 +0900 Subject: drm: introduce fb_modifiers_not_supported flag in mode_config If only linear modifier is advertised, since there are many drivers that only linear supported, the DRM core should handle this rather than open-coding in every driver. However, there are legacy drivers such as radeon that do not support modifiers but infer the actual layout of the underlying buffer. Therefore, a new flag fb_modifiers_not_supported is introduced for these legacy drivers, and allow_fb_modifiers is replaced with this new flag. v3: - change the order as follows: 1. add fb_modifiers_not_supported flag 2. add default modifiers 3. remove allow_fb_modifiers flag - add a conditional disable in amdgpu_dm_plane_init() v4: - modify kernel docs v5: - modify kernel docs Signed-off-by: Tomohito Esaki Acked-by: Harry Wentland Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220128060836.11216-2-etom@igel.co.jp --- include/drm/drm_mode_config.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 91ca575a78de..4a93dac91cf9 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -933,6 +933,16 @@ struct drm_mode_config { */ bool allow_fb_modifiers; + /** + * @fb_modifiers_not_supported: + * + * When this flag is set, the DRM device will not expose modifier + * support to userspace. This is only used by legacy drivers that infer + * the buffer layout through heuristics without using modifiers. New + * drivers shall not set fhis flag. + */ + bool fb_modifiers_not_supported; + /** * @normalize_zpos: * -- cgit v1.2.3 From 8be576837b6e62b2ad0de2f9ba31cef618fa2891 Mon Sep 17 00:00:00 2001 From: Tomohito Esaki Date: Fri, 28 Jan 2022 15:08:35 +0900 Subject: drm: add support modifiers for drivers whose planes only support linear layout The LINEAR modifier is advertised as default if a driver doesn't specify modifiers. v2: - rebase to the latest master branch (5.16.0+) + "drm/plane: Make format_mod_supported truly optional" patch [1] [1] https://patchwork.freedesktop.org/patch/467940/?series=98255&rev=3 v3: - change the order as follows: 1. add fb_modifiers_not_supported flag 2. add default modifiers 3. remove allow_fb_modifiers flag v5: - change default_modifiers array from non-static to static - remove terminator in default_modifiers array - use ARRAY_SIZE to get the format_modifier_count - update sanity check in plane init func to use the fb_modifiers_not_supported - modify kernel docs Signed-off-by: Tomohito Esaki Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220128060836.11216-3-etom@igel.co.jp --- drivers/gpu/drm/drm_plane.c | 23 +++++++++++++---------- include/drm/drm_plane.h | 3 +++ 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index deeec60a3315..bf0daa8d9bbd 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -237,6 +237,9 @@ static int __drm_universal_plane_init(struct drm_device *dev, const char *name, va_list ap) { struct drm_mode_config *config = &dev->mode_config; + static const uint64_t default_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + }; unsigned int format_modifier_count = 0; int ret; @@ -277,16 +280,16 @@ static int __drm_universal_plane_init(struct drm_device *dev, while (*temp_modifiers++ != DRM_FORMAT_MOD_INVALID) format_modifier_count++; + } else { + if (!dev->mode_config.fb_modifiers_not_supported) { + format_modifiers = default_modifiers; + format_modifier_count = ARRAY_SIZE(default_modifiers); + } } /* autoset the cap and check for consistency across all planes */ - if (format_modifier_count) { - drm_WARN_ON(dev, !config->allow_fb_modifiers && - !list_empty(&config->plane_list)); - config->allow_fb_modifiers = true; - } else { - drm_WARN_ON(dev, config->allow_fb_modifiers); - } + drm_WARN_ON(dev, config->fb_modifiers_not_supported && + format_modifier_count); plane->modifier_count = format_modifier_count; plane->modifiers = kmalloc_array(format_modifier_count, @@ -341,7 +344,7 @@ static int __drm_universal_plane_init(struct drm_device *dev, drm_object_attach_property(&plane->base, config->prop_src_h, 0); } - if (config->allow_fb_modifiers) + if (format_modifier_count) create_in_format_blob(dev, plane); return 0; @@ -368,8 +371,8 @@ static int __drm_universal_plane_init(struct drm_device *dev, * drm_universal_plane_init() to let the DRM managed resource infrastructure * take care of cleanup and deallocation. * - * Drivers supporting modifiers must set @format_modifiers on all their planes, - * even those that only support DRM_FORMAT_MOD_LINEAR. + * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set + * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Zero on success, error code on failure. diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 06759badf99f..2628c7cde2da 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -803,6 +803,9 @@ void *__drmm_universal_plane_alloc(struct drm_device *dev, * * The @drm_plane_funcs.destroy hook must be NULL. * + * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set + * @format_modifiers to NULL. The plane will advertise the linear modifier. + * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ -- cgit v1.2.3 From 3d082157a24216ca084082ce421a37d14ecfcfad Mon Sep 17 00:00:00 2001 From: Tomohito Esaki Date: Fri, 28 Jan 2022 15:08:36 +0900 Subject: drm: remove allow_fb_modifiers The allow_fb_modifiers flag is unnecessary since it has been replaced with fb_modifiers_not_supported flag. v3: - change the order as follows: 1. add fb_modifiers_not_supported flag 2. add default modifiers 3. remove allow_fb_modifiers flag v5: - keep a sanity check in plane init func Signed-off-by: Tomohito Esaki Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220128060836.11216-4-etom@igel.co.jp --- drivers/gpu/drm/selftests/test-drm_framebuffer.c | 1 - include/drm/drm_mode_config.h | 16 ---------------- 2 files changed, 17 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/selftests/test-drm_framebuffer.c b/drivers/gpu/drm/selftests/test-drm_framebuffer.c index 61b44d3a6a61..f6d66285c5fc 100644 --- a/drivers/gpu/drm/selftests/test-drm_framebuffer.c +++ b/drivers/gpu/drm/selftests/test-drm_framebuffer.c @@ -323,7 +323,6 @@ static struct drm_device mock_drm_device = { .max_width = MAX_WIDTH, .min_height = MIN_HEIGHT, .max_height = MAX_HEIGHT, - .allow_fb_modifiers = true, .funcs = &mock_config_funcs, }, }; diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 4a93dac91cf9..6b5e01295348 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -917,22 +917,6 @@ struct drm_mode_config { */ bool async_page_flip; - /** - * @allow_fb_modifiers: - * - * Whether the driver supports fb modifiers in the ADDFB2.1 ioctl call. - * Note that drivers should not set this directly, it is automatically - * set in drm_universal_plane_init(). - * - * IMPORTANT: - * - * If this is set the driver must fill out the full implicit modifier - * information in their &drm_mode_config_funcs.fb_create hook for legacy - * userspace which does not set modifiers. Otherwise the GETFB2 ioctl is - * broken for modifier aware userspace. - */ - bool allow_fb_modifiers; - /** * @fb_modifiers_not_supported: * -- cgit v1.2.3 From d80976d9ffd9d7f89a26134a299b236910477f3b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 30 Nov 2021 16:27:55 +0100 Subject: dma-resv: some doc polish for iterators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hammer it a bit more in that iterators can be restarted and when that matters, plus suggest to prefer the locked version whenver. Also delete the two leftover kerneldoc for static functions plus sprinkle some more links while at it. v2: Keep some comments (Christian) Reviewed-by: Christian König Signed-off-by: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/20211130152756.1388106-1-daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 29 +++++++++++++++-------------- include/linux/dma-resv.h | 13 ++++++++++++- 2 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 6dd9a40b55d4..ee31f15d633a 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,12 +323,8 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); -/** - * dma_resv_iter_restart_unlocked - restart the unlocked iterator - * @cursor: The dma_resv_iter object to restart - * - * Restart the unlocked iteration by initializing the cursor object. - */ +/* Restart the iterator by initializing all the necessary fields, but not the + * relation to the dma_resv object. */ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) { cursor->seq = read_seqcount_begin(&cursor->obj->seq); @@ -344,14 +340,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) cursor->is_restarted = true; } -/** - * dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj - * @cursor: cursor to record the current position - * - * Return all the fences in the dma_resv object which are not yet signaled. - * The returned fence has an extra local reference so will stay alive. - * If a concurrent modify is detected the whole iteration is started over again. - */ +/* Walk to the next not signaled fence and grab a reference to it */ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) { struct dma_resv *obj = cursor->obj; @@ -387,6 +376,12 @@ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) * dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj. * @cursor: the cursor with the current position * + * Subsequent fences are iterated with dma_resv_iter_next_unlocked(). + * + * Beware that the iterator can be restarted. Code which accumulates statistics + * or similar needs to check for this with dma_resv_iter_is_restarted(). For + * this reason prefer the locked dma_resv_iter_first() whenver possible. + * * Returns the first fence from an unlocked dma_resv obj. */ struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) @@ -406,6 +401,10 @@ EXPORT_SYMBOL(dma_resv_iter_first_unlocked); * dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj. * @cursor: the cursor with the current position * + * Beware that the iterator can be restarted. Code which accumulates statistics + * or similar needs to check for this with dma_resv_iter_is_restarted(). For + * this reason prefer the locked dma_resv_iter_next() whenver possible. + * * Returns the next fence from an unlocked dma_resv obj. */ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) @@ -431,6 +430,8 @@ EXPORT_SYMBOL(dma_resv_iter_next_unlocked); * dma_resv_iter_first - first fence from a locked dma_resv object * @cursor: cursor to record the current position * + * Subsequent fences are iterated with dma_resv_iter_next_unlocked(). + * * Return the first fence in the dma_resv object while holding the * &dma_resv.lock. */ diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index a715df97b31a..afdfdfac729f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -153,6 +153,13 @@ struct dma_resv { * struct dma_resv_iter - current position into the dma_resv fences * * Don't touch this directly in the driver, use the accessor function instead. + * + * IMPORTANT + * + * When using the lockless iterators like dma_resv_iter_next_unlocked() or + * dma_resv_for_each_fence_unlocked() beware that the iterator can be restarted. + * Code which accumulates statistics or similar needs to check for this with + * dma_resv_iter_is_restarted(). */ struct dma_resv_iter { /** @obj: The dma_resv object we iterate over */ @@ -243,7 +250,11 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * &dma_resv.lock and using RCU instead. The cursor needs to be initialized * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside * the iterator a reference to the dma_fence is held and the RCU lock dropped. - * When the dma_resv is modified the iteration starts over again. + * + * Beware that the iterator can be restarted when the struct dma_resv for + * @cursor is modified. Code which accumulates statistics or similar needs to + * check for this with dma_resv_iter_is_restarted(). For this reason prefer the + * lock iterator dma_resv_for_each_fence() whenever possible. */ #define dma_resv_for_each_fence_unlocked(cursor, fence) \ for (fence = dma_resv_iter_first_unlocked(cursor); \ -- cgit v1.2.3 From dca384a3bf5af1c781cfa6aec63904bdb5018c36 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 2 Feb 2022 10:43:40 +0100 Subject: drm/connector: Fix typo in documentation Commit 4adc33f36d80 ("drm/edid: Split deep color modes between RGB and YUV444") introduced two new variables in struct drm_display_info and their documentation, but the documentation part had a typo resulting in a doc build warning. Fixes: 4adc33f36d80 ("drm/edid: Split deep color modes between RGB and YUV444") Reported-by: Stephen Rothwell Signed-off-by: Maxime Ripard Reviewed-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20220202094340.875190-1-maxime@cerno.tech --- include/drm/drm_connector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 64cf5f88c05b..5e36eb3df66f 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -592,13 +592,13 @@ struct drm_display_info { bool rgb_quant_range_selectable; /** - * @edid_hdmi_dc_rgb444_modes: Mask of supported hdmi deep color modes + * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes * in RGB 4:4:4. Even more stuff redundant with @bus_formats. */ u8 edid_hdmi_rgb444_dc_modes; /** - * @edid_hdmi_dc_ycbcr444_modes: Mask of supported hdmi deep color + * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. */ u8 edid_hdmi_ycbcr444_dc_modes; -- cgit v1.2.3 From a3574119826d9a4ef807fb973cf5150c3b90da43 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 3 Feb 2022 13:38:06 +0000 Subject: drm: document struct drm_mode_fb_cmd2 Follow-up for the DRM_IOCTL_MODE_GETFB2 docs. v2: (Daniel Stone) - Replace fourcc.org with drm_fourcc.h because this is the authoritative source and the website may have mismatches. - Drop assumption that offsets will generally be 0. - Mention that unused entries must be zero'ed out. v3: (Pekka) - Mention that a handle can be re-used - Add unit for pitches/offsets Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Pekka Paalanen Cc: Daniel Stone Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20220203133753.261507-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 88 ++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index e1e351682872..0a0d56a6158e 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -663,41 +663,73 @@ struct drm_mode_fb_cmd { #define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ #define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifer[] */ +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order is dictated by the + * format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8 bit Y samples followed by an + * interleaved U/V plane containing 8 bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ __u32 fb_id; + /** @width: Width of the frame-buffer. */ __u32 width; + /** @height: Height of the frame-buffer. */ __u32 height; - __u32 pixel_format; /* fourcc code from drm_fourcc.h */ - __u32 flags; /* see above flags */ + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; - /* - * In case of planar formats, this ioctl allows up to 4 - * buffer objects with offsets and pitches per plane. - * The pitch and offset order is dictated by the fourcc, - * e.g. NV12 (https://fourcc.org/yuv.php#NV12) is described as: - * - * YUV 4:2:0 image with a plane of 8 bit Y samples - * followed by an interleaved U/V plane containing - * 8 bit 2x2 subsampled colour difference samples. - * - * So it would consist of Y as offsets[0] and UV as - * offsets[1]. Note that offsets[0] will generally - * be 0 (but this is not required). - * - * To accommodate tiled, compressed, etc formats, a - * modifier can be specified. The default value of zero - * indicates "native" format as specified by the fourcc. - * Vendor specific modifier token. Note that even though - * it looks like we have a modifier per-plane, we in fact - * do not. The modifier for each plane must be identical. - * Thus all combinations of different data layouts for - * multi plane formats must be enumerated as separate - * modifiers. + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. */ __u32 handles[4]; - __u32 pitches[4]; /* pitch for each plane */ - __u32 offsets[4]; /* offset of each plane */ - __u64 modifier[4]; /* ie, tiling, compress */ + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 -- cgit v1.2.3 From ea4692c75e1c63926e4fb0728f5775ef0d733888 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jan 2022 01:39:41 -0800 Subject: lib/string_helpers: Consolidate string helpers implementation There are a few implementations of string helpers in the tree like yesno() that just returns "yes" or "no" depending on a boolean argument. Those are helpful to output strings to the user or log. In order to consolidate them, prefix all of them str_ prefix to make it clear what they are about and avoid symbol clashes. Taking the commoon `val ? "yes" : "no"` implementation, quite a few users of open coded yesno() could later be converted to the new function: $ git grep '?\s*"yes"\s*' | wc -l 286 $ git grep '?\s*"no"\s*' | wc -l 20 The inlined function should keep the const strings local to each compilation unit, the same way it's now, thus not changing the current behavior. Signed-off-by: Lucas De Marchi Reviewed-by: Andy Shevchenko Acked-by: Jani Nikula Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220126093951.1470898-2-lucas.demarchi@intel.com --- include/linux/string_helpers.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 7a22921c9db7..4d72258d42fd 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -106,4 +106,24 @@ void kfree_strarray(char **array, size_t n); char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n); +static inline const char *str_yes_no(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *str_on_off(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *str_enable_disable(bool v) +{ + return v ? "enable" : "disable"; +} + +static inline const char *str_enabled_disabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + #endif -- cgit v1.2.3 From 976b6d97c62347df3e686f60a5f455bb8ed6ea23 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 19 Jan 2022 11:17:32 +0100 Subject: dma-buf: consolidate dma_fence subclass checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the wrapper functions to check for dma_fence subclasses in the dma_fence header. This makes it easier to document and also check the different requirements for fence containers in the subclasses. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220204100429.2049-2-christian.koenig@amd.com --- include/linux/dma-fence-array.h | 15 +-------------- include/linux/dma-fence-chain.h | 3 +-- include/linux/dma-fence.h | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 303dd712220f..fec374f69e12 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -45,19 +45,6 @@ struct dma_fence_array { struct irq_work work; }; -extern const struct dma_fence_ops dma_fence_array_ops; - -/** - * dma_fence_is_array - check if a fence is from the array subsclass - * @fence: fence to test - * - * Return true if it is a dma_fence_array and false otherwise. - */ -static inline bool dma_fence_is_array(struct dma_fence *fence) -{ - return fence->ops == &dma_fence_array_ops; -} - /** * to_dma_fence_array - cast a fence to a dma_fence_array * @fence: fence to cast to a dma_fence_array @@ -68,7 +55,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) static inline struct dma_fence_array * to_dma_fence_array(struct dma_fence *fence) { - if (fence->ops != &dma_fence_array_ops) + if (!fence || !dma_fence_is_array(fence)) return NULL; return container_of(fence, struct dma_fence_array, base); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 54fe3443fd2c..ee906b659694 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -49,7 +49,6 @@ struct dma_fence_chain { spinlock_t lock; }; -extern const struct dma_fence_ops dma_fence_chain_ops; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain @@ -61,7 +60,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops; static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { - if (!fence || fence->ops != &dma_fence_chain_ops) + if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 1ea691753bd3..775cdc0b4f24 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -587,4 +587,42 @@ struct dma_fence *dma_fence_get_stub(void); struct dma_fence *dma_fence_allocate_private_stub(void); u64 dma_fence_context_alloc(unsigned num); +extern const struct dma_fence_ops dma_fence_array_ops; +extern const struct dma_fence_ops dma_fence_chain_ops; + +/** + * dma_fence_is_array - check if a fence is from the array subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_array and false otherwise. + */ +static inline bool dma_fence_is_array(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_array_ops; +} + +/** + * dma_fence_is_chain - check if a fence is from the chain subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_chain and false otherwise. + */ +static inline bool dma_fence_is_chain(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +/** + * dma_fence_is_container - check if a fence is a container for other fences + * @fence: the fence to test + * + * Return true if this fence is a container for other fences, false otherwise. + * This is important since we can't build up large fence structure or otherwise + * we run into recursion during operation on those fences. + */ +static inline bool dma_fence_is_container(struct dma_fence *fence) +{ + return dma_fence_is_array(fence) || dma_fence_is_chain(fence); +} + #endif /* __LINUX_DMA_FENCE_H */ -- cgit v1.2.3 From 18f5fad275efef015226ee4f90eae34d8f44aa5e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Jan 2022 11:42:40 +0100 Subject: dma-buf: add dma_fence_chain_contained helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a reoccurring pattern that we need to extract the fence from a dma_fence_chain object. Add a helper for this. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220204100429.2049-6-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-chain.c | 6 ++---- include/linux/dma-fence-chain.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 084c6927b735..06f8ef97c6e8 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -148,8 +148,7 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence) dma_fence_get(&head->base); dma_fence_chain_for_each(fence, &head->base) { - struct dma_fence_chain *chain = to_dma_fence_chain(fence); - struct dma_fence *f = chain ? chain->fence : fence; + struct dma_fence *f = dma_fence_chain_contained(fence); dma_fence_get(f); if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) { @@ -165,8 +164,7 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence) static bool dma_fence_chain_signaled(struct dma_fence *fence) { dma_fence_chain_for_each(fence, fence) { - struct dma_fence_chain *chain = to_dma_fence_chain(fence); - struct dma_fence *f = chain ? chain->fence : fence; + struct dma_fence *f = dma_fence_chain_contained(fence); if (!dma_fence_is_signaled(f)) { dma_fence_put(fence); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index ee906b659694..10d51bcdf7b7 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -66,6 +66,21 @@ to_dma_fence_chain(struct dma_fence *fence) return container_of(fence, struct dma_fence_chain, base); } +/** + * dma_fence_chain_contained - return the contained fence + * @fence: the fence to test + * + * If the fence is a dma_fence_chain the function returns the fence contained + * inside the chain object, otherwise it returns the fence itself. + */ +static inline struct dma_fence * +dma_fence_chain_contained(struct dma_fence *fence) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + + return chain ? chain->fence : fence; +} + /** * dma_fence_chain_alloc * -- cgit v1.2.3 From d315bdbfebd517cf5efabf666c8099e027ef666f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 9 Feb 2022 16:56:33 +0100 Subject: drm/gem-shmem: Set vm_ops in static initializer Initialize default vm_ops in static initialization of the GEM SHMEM funcs, instead of the mmap code. It's simply better style. GEM helpers will later set a VMA's vm_ops from the default automatically. v2: * also update the drivers that build upon GEM SHMEM Signed-off-by: Thomas Zimmermann Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patchwork.freedesktop.org/patch/msgid/20220209155634.3994-2-tzimmermann@suse.de --- drivers/gpu/drm/drm_gem_shmem_helper.c | 5 +++-- drivers/gpu/drm/lima/lima_gem.c | 1 + drivers/gpu/drm/panfrost/panfrost_gem.c | 1 + drivers/gpu/drm/v3d/v3d_bo.c | 1 + drivers/gpu/drm/virtio/virtgpu_object.c | 1 + include/drm/drm_gem_shmem_helper.h | 2 ++ 6 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 621924116eb4..5991a22a9e22 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -46,6 +46,7 @@ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, }; static struct drm_gem_shmem_object * @@ -585,11 +586,12 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) drm_gem_vm_close(vma); } -static const struct vm_operations_struct drm_gem_shmem_vm_ops = { +const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, }; +EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_ops); /** * drm_gem_shmem_mmap - Memory-map a shmem GEM object @@ -625,7 +627,6 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - vma->vm_ops = &drm_gem_shmem_vm_ops; return 0; } diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index f9a9198ef198..6a6f6f2ead75 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -213,6 +213,7 @@ static const struct drm_gem_object_funcs lima_gem_funcs = { .vmap = lima_gem_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = lima_gem_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, }; struct drm_gem_object *lima_gem_create_object(struct drm_device *dev, size_t size) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index ead65f5fa2bc..293e799e2fe8 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -206,6 +206,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, }; /** diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 6e3113f419f4..8b3229a37c6d 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -59,6 +59,7 @@ static const struct drm_gem_object_funcs v3d_gem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, }; /* gem_create_object function for allocating a BO struct and doing diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index baef2c5f2aaf..f293e6ad52da 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -124,6 +124,7 @@ static const struct drm_gem_object_funcs virtio_gpu_shmem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, }; bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo) diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 311d66c9cf4b..08e7846e8abc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -135,6 +135,8 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem) void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, struct drm_printer *p, unsigned int indent); +extern const struct vm_operations_struct drm_gem_shmem_vm_ops; + /* * GEM object functions */ -- cgit v1.2.3 From 90d4aa20c8cc76f5baecd423b5dc289b899ebc42 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 14 Feb 2022 07:28:38 +0100 Subject: drm/ttm: fix resource manager size type and description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leave the man->size units as driver defined. Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220214093439.2989-1-christian.koenig@amd.com Reviewed-by: Matthew Auld --- drivers/gpu/drm/ttm/ttm_resource.c | 6 +++--- include/drm/ttm/ttm_resource.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 68344c90549b..8204b7062517 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -153,19 +153,19 @@ void ttm_resource_set_bo(struct ttm_resource *res, * * @man: memory manager object to init * @bdev: ttm device this manager belongs to - * @p_size: size managed area in pages. + * @size: size of managed resources in arbitrary units * * Initialise core parts of a manager object. */ void ttm_resource_manager_init(struct ttm_resource_manager *man, struct ttm_device *bdev, - unsigned long p_size) + uint64_t size) { unsigned i; spin_lock_init(&man->move_lock); man->bdev = bdev; - man->size = p_size; + man->size = size; for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) INIT_LIST_HEAD(&man->lru[i]); diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 69eea9d6399b..555a11fb8a7f 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -278,7 +278,7 @@ void ttm_resource_set_bo(struct ttm_resource *res, void ttm_resource_manager_init(struct ttm_resource_manager *man, struct ttm_device *bdev, - unsigned long p_size); + uint64_t size); int ttm_resource_manager_evict_all(struct ttm_device *bdev, struct ttm_resource_manager *man); -- cgit v1.2.3 From 0e05fc49c358cb49e59ce8d6ecda652951335e1e Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 12 Jul 2021 14:05:01 +0200 Subject: drm/ttm: add common accounting to the resource mgr v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes sense to have this in the common manager for debugging and accounting of how much resources are used. v2: cleanup kerneldoc a bit v3: drop the atomic, update counter under lock instead Signed-off-by: Christian König Reviewed-by: Huang Rui (v1) Reviewed-by: Matthew Auld Tested-by: Bas Nieuwenhuizen Link: https://patchwork.freedesktop.org/patch/msgid/20220214093439.2989-2-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_resource.c | 30 ++++++++++++++++++++++++++++++ include/drm/ttm/ttm_resource.h | 11 +++++++++-- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 8204b7062517..cbd47a104962 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -41,6 +41,8 @@ void ttm_resource_init(struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_resource *res) { + struct ttm_resource_manager *man; + res->start = 0; res->num_pages = PFN_UP(bo->base.size); res->mem_type = place->mem_type; @@ -50,6 +52,11 @@ void ttm_resource_init(struct ttm_buffer_object *bo, res->bus.is_iomem = false; res->bus.caching = ttm_cached; res->bo = bo; + + man = ttm_manager_type(bo->bdev, place->mem_type); + spin_lock(&bo->bdev->lru_lock); + man->usage += bo->base.size; + spin_unlock(&bo->bdev->lru_lock); } EXPORT_SYMBOL(ttm_resource_init); @@ -65,6 +72,9 @@ EXPORT_SYMBOL(ttm_resource_init); void ttm_resource_fini(struct ttm_resource_manager *man, struct ttm_resource *res) { + spin_lock(&man->bdev->lru_lock); + man->usage -= res->bo->base.size; + spin_unlock(&man->bdev->lru_lock); } EXPORT_SYMBOL(ttm_resource_fini); @@ -166,6 +176,7 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man, spin_lock_init(&man->move_lock); man->bdev = bdev; man->size = size; + man->usage = 0; for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) INIT_LIST_HEAD(&man->lru[i]); @@ -226,6 +237,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, } EXPORT_SYMBOL(ttm_resource_manager_evict_all); +/** + * ttm_resource_manager_usage + * + * @man: A memory manager object. + * + * Return how many resources are currently used. + */ +uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man) +{ + uint64_t usage; + + spin_lock(&man->bdev->lru_lock); + usage = man->usage; + spin_unlock(&man->bdev->lru_lock); + return usage; +} +EXPORT_SYMBOL(ttm_resource_manager_usage); + /** * ttm_resource_manager_debug * @@ -238,6 +267,7 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man, drm_printf(p, " use_type: %d\n", man->use_type); drm_printf(p, " use_tt: %d\n", man->use_tt); drm_printf(p, " size: %llu\n", man->size); + drm_printf(p, " usage: %llu\n", ttm_resource_manager_usage(man)); if (man->func->debug) man->func->debug(man, p); } diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 555a11fb8a7f..323c14a30c6b 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -130,10 +131,15 @@ struct ttm_resource_manager { struct dma_fence *move; /* - * Protected by the global->lru_lock. + * Protected by the bdev->lru_lock. */ - struct list_head lru[TTM_MAX_BO_PRIORITY]; + + /** + * @usage: How much of the resources are used, protected by the + * bdev->lru_lock. + */ + uint64_t usage; }; /** @@ -283,6 +289,7 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man, int ttm_resource_manager_evict_all(struct ttm_device *bdev, struct ttm_resource_manager *man); +uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man); void ttm_resource_manager_debug(struct ttm_resource_manager *man, struct drm_printer *p); -- cgit v1.2.3 From 2509969a9862b522d2208e8663057fb227556687 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 4 Feb 2022 16:13:41 -0800 Subject: drm: Plumb debugfs_init through to panels We'd like panels to be able to add things to debugfs underneath the connector's directory. Let's plumb it through. A panel will be able to put things in a "panel" directory under the connector's directory. Note that debugfs is not ABI and so it's always possible that the location that the panel gets for its debugfs could change in the future. NOTE: this currently only works if you're using a modern architecture. Specifically the plumbing relies on _both_ drm_bridge_connector and drm_panel_bridge. If you're not using one or both of these things then things won't be plumbed through. As a side effect of this change, drm_bridges can also get callbacks to put stuff underneath the connector's debugfs directory. At the moment all bridges in the chain have their debugfs_init() called with the connector's root directory. Signed-off-by: Douglas Anderson Reviewed-by: Javier Martinez Canillas Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20220204161245.v2.2.Ib0bd5346135cbb0b63006b69b61d4c8af6484740@changeid --- drivers/gpu/drm/bridge/panel.c | 12 ++++++++++++ drivers/gpu/drm/drm_bridge_connector.c | 15 +++++++++++++++ drivers/gpu/drm/drm_debugfs.c | 3 +++ include/drm/drm_bridge.h | 7 +++++++ include/drm/drm_connector.h | 7 +++++++ include/drm/drm_panel.h | 8 ++++++++ 6 files changed, 52 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index b32295abd9e7..5be057575183 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -138,6 +138,17 @@ static int panel_bridge_get_modes(struct drm_bridge *bridge, return drm_panel_get_modes(panel_bridge->panel, connector); } +static void panel_bridge_debugfs_init(struct drm_bridge *bridge, + struct dentry *root) +{ + struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); + struct drm_panel *panel = panel_bridge->panel; + + root = debugfs_create_dir("panel", root); + if (panel->funcs->debugfs_init) + panel->funcs->debugfs_init(panel, root); +} + static const struct drm_bridge_funcs panel_bridge_bridge_funcs = { .attach = panel_bridge_attach, .detach = panel_bridge_detach, @@ -150,6 +161,7 @@ static const struct drm_bridge_funcs panel_bridge_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_get_input_bus_fmts = drm_atomic_helper_bridge_propagate_bus_fmt, + .debugfs_init = panel_bridge_debugfs_init, }; /** diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 791379816837..60923cdfe8e1 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -216,6 +216,20 @@ static void drm_bridge_connector_destroy(struct drm_connector *connector) kfree(bridge_connector); } +static void drm_bridge_connector_debugfs_init(struct drm_connector *connector, + struct dentry *root) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_encoder *encoder = bridge_connector->encoder; + struct drm_bridge *bridge; + + list_for_each_entry(bridge, &encoder->bridge_chain, chain_node) { + if (bridge->funcs->debugfs_init) + bridge->funcs->debugfs_init(bridge, root); + } +} + static const struct drm_connector_funcs drm_bridge_connector_funcs = { .reset = drm_atomic_helper_connector_reset, .detect = drm_bridge_connector_detect, @@ -223,6 +237,7 @@ static const struct drm_connector_funcs drm_bridge_connector_funcs = { .destroy = drm_bridge_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .debugfs_init = drm_bridge_connector_debugfs_init, }; /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index b0a826489488..7f1b82dbaebb 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -436,6 +436,9 @@ void drm_debugfs_connector_add(struct drm_connector *connector) /* vrr range */ debugfs_create_file("vrr_range", S_IRUGO, root, connector, &vrr_range_fops); + + if (connector->funcs->debugfs_init) + connector->funcs->debugfs_init(connector, root); } void drm_debugfs_connector_remove(struct drm_connector *connector) diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 061d87313fac..f27b4060faa2 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -649,6 +649,13 @@ struct drm_bridge_funcs { * the DRM_BRIDGE_OP_HPD flag in their &drm_bridge->ops. */ void (*hpd_disable)(struct drm_bridge *bridge); + + /** + * @debugfs_init: + * + * Allows bridges to create bridge-specific debugfs files. + */ + void (*debugfs_init)(struct drm_bridge *bridge, struct dentry *root); }; /** diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 5e36eb3df66f..5166186146f4 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1142,6 +1142,13 @@ struct drm_connector_funcs { * has been received from a source outside the display driver / device. */ void (*oob_hotplug_event)(struct drm_connector *connector); + + /** + * @debugfs_init: + * + * Allows connectors to create connector-specific debugfs files. + */ + void (*debugfs_init)(struct drm_connector *connector, struct dentry *root); }; /** diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 4602f833eb51..1ba2d424a53f 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -29,6 +29,7 @@ #include struct backlight_device; +struct dentry; struct device_node; struct drm_connector; struct drm_device; @@ -125,6 +126,13 @@ struct drm_panel_funcs { */ int (*get_timings)(struct drm_panel *panel, unsigned int num_timings, struct display_timing *timings); + + /** + * @debugfs_init: + * + * Allows panels to create panels-specific debugfs files. + */ + void (*debugfs_init)(struct drm_panel *panel, struct dentry *root); }; /** -- cgit v1.2.3 From bcf8b616deb8794179e3e9c6233a53f42664afb2 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 14 Feb 2022 14:37:06 +0100 Subject: drm/format-helper: Add drm_fb_xrgb8888_to_mono_reversed() Add support to convert from XR24 to reversed monochrome for drivers that control monochromatic display panels, that only have 1 bit per pixel. The function does a line-by-line conversion doing an intermediate step first from XR24 to 8-bit grayscale and then to reversed monochrome. The drm_fb_gray8_to_mono_reversed_line() helper was based on code from drivers/gpu/drm/tiny/repaper.c driver. Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Andy Shevchenko Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220214133710.3278506-3-javierm@redhat.com --- drivers/gpu/drm/drm_format_helper.c | 110 ++++++++++++++++++++++++++++++++++++ include/drm/drm_format_helper.h | 4 ++ 2 files changed, 114 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index b981712623d3..bc0f49773868 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -12,9 +12,11 @@ #include #include +#include #include #include #include +#include #include static unsigned int clip_offset(const struct drm_rect *clip, unsigned int pitch, unsigned int cpp) @@ -591,3 +593,111 @@ int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_for return -EINVAL; } EXPORT_SYMBOL(drm_fb_blit_toio); + +static void drm_fb_gray8_to_mono_reversed_line(u8 *dst, const u8 *src, unsigned int pixels, + unsigned int start_offset, unsigned int end_len) +{ + unsigned int xb, i; + + for (xb = 0; xb < pixels; xb++) { + unsigned int start = 0, end = 8; + u8 byte = 0x00; + + if (xb == 0 && start_offset) + start = start_offset; + + if (xb == pixels - 1 && end_len) + end = end_len; + + for (i = start; i < end; i++) { + unsigned int x = xb * 8 + i; + + byte >>= 1; + if (src[x] >> 7) + byte |= BIT(7); + } + *dst++ = byte; + } +} + +/** + * drm_fb_xrgb8888_to_mono_reversed - Convert XRGB8888 to reversed monochrome + * @dst: reversed monochrome destination buffer + * @dst_pitch: Number of bytes between two consecutive scanlines within dst + * @src: XRGB8888 source buffer + * @fb: DRM framebuffer + * @clip: Clip rectangle area to copy + * + * DRM doesn't have native monochrome support. + * Such drivers can announce the commonly supported XR24 format to userspace + * and use this function to convert to the native format. + * + * This function uses drm_fb_xrgb8888_to_gray8() to convert to grayscale and + * then the result is converted from grayscale to reversed monohrome. + */ +void drm_fb_xrgb8888_to_mono_reversed(void *dst, unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, const struct drm_rect *clip) +{ + unsigned int linepixels = drm_rect_width(clip); + unsigned int lines = clip->y2 - clip->y1; + unsigned int cpp = fb->format->cpp[0]; + unsigned int len_src32 = linepixels * cpp; + struct drm_device *dev = fb->dev; + unsigned int start_offset, end_len; + unsigned int y; + u8 *mono = dst, *gray8; + u32 *src32; + + if (drm_WARN_ON(dev, fb->format->format != DRM_FORMAT_XRGB8888)) + return; + + /* + * The reversed mono destination buffer contains 1 bit per pixel + * and destination scanlines have to be in multiple of 8 pixels. + */ + if (!dst_pitch) + dst_pitch = DIV_ROUND_UP(linepixels, 8); + + drm_WARN_ONCE(dev, dst_pitch % 8 != 0, "dst_pitch is not a multiple of 8\n"); + + /* + * The cma memory is write-combined so reads are uncached. + * Speed up by fetching one line at a time. + * + * Also, format conversion from XR24 to reversed monochrome + * are done line-by-line but are converted to 8-bit grayscale + * as an intermediate step. + * + * Allocate a buffer to be used for both copying from the cma + * memory and to store the intermediate grayscale line pixels. + */ + src32 = kmalloc(len_src32 + linepixels, GFP_KERNEL); + if (!src32) + return; + + gray8 = (u8 *)src32 + len_src32; + + /* + * For damage handling, it is possible that only parts of the source + * buffer is copied and this could lead to start and end pixels that + * are not aligned to multiple of 8. + * + * Calculate if the start and end pixels are not aligned and set the + * offsets for the reversed mono line conversion function to adjust. + */ + start_offset = clip->x1 % 8; + end_len = clip->x2 % 8; + + vaddr += clip_offset(clip, fb->pitches[0], cpp); + for (y = 0; y < lines; y++) { + src32 = memcpy(src32, vaddr, len_src32); + drm_fb_xrgb8888_to_gray8_line(gray8, src32, linepixels); + drm_fb_gray8_to_mono_reversed_line(mono, gray8, dst_pitch, + start_offset, end_len); + vaddr += fb->pitches[0]; + mono += dst_pitch; + } + + kfree(src32); +} +EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono_reversed); diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index b30ed5de0a33..0b0937c0b2f6 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -43,4 +43,8 @@ int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_for const void *vmap, const struct drm_framebuffer *fb, const struct drm_rect *rect); +void drm_fb_xrgb8888_to_mono_reversed(void *dst, unsigned int dst_pitch, const void *src, + const struct drm_framebuffer *fb, + const struct drm_rect *clip); + #endif /* __LINUX_DRM_FORMAT_HELPER_H */ -- cgit v1.2.3 From 8c30e2d81bfddc5ab9f6b04db1c0f7d6ca7bdf46 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 11 Feb 2022 10:46:40 +0100 Subject: fbdev: Don't sort deferred-I/O pages by default Fbdev's deferred I/O sorts all dirty pages by default, which incurs a significant overhead. Make the sorting step optional and update the few drivers that require it. Use a FIFO list by default. Most fbdev drivers with deferred I/O build a bounding rectangle around the dirty pages or simply flush the whole screen. The only two affected DRM drivers, generic fbdev and vmwgfx, both use a bounding rectangle. In those cases, the exact order of the pages doesn't matter. The other drivers look at the page index or handle pages one-by-one. The patch sets the sort_pagelist flag for those, even though some of them would probably work correctly without sorting. Driver maintainers should update their driver accordingly. Sorting pages by memory offset for deferred I/O performs an implicit bubble-sort step on the list of dirty pages. The algorithm goes through the list of dirty pages and inserts each new page according to its index field. Even worse, list traversal always starts at the first entry. As video memory is most likely updated scanline by scanline, the algorithm traverses through the complete list for each updated page. For example, with 1024x768x32bpp each page covers exactly one scanline. Writing a single screen update from top to bottom requires updating 768 pages. With an average list length of 384 entries, a screen update creates (768 * 384 =) 294912 compare operation. Fix this by making the sorting step opt-in and update the few drivers that require it. All other drivers work with unsorted page lists. Pages are appended to the list. Therefore, in the common case of writing the framebuffer top to bottom, pages are still sorted by offset, which may have a positive effect on performance. Playing a video [1] in mplayer's benchmark mode shows the difference (i7-4790, FullHD, simpledrm, kernel with debugging). mplayer -benchmark -nosound -vo fbdev ./big_buck_bunny_720p_stereo.ogg With sorted page lists: BENCHMARKs: VC: 32.960s VO: 73.068s A: 0.000s Sys: 2.413s = 108.441s BENCHMARK%: VC: 30.3947% VO: 67.3802% A: 0.0000% Sys: 2.2251% = 100.0000% With unsorted page lists: BENCHMARKs: VC: 31.005s VO: 42.889s A: 0.000s Sys: 2.256s = 76.150s BENCHMARK%: VC: 40.7156% VO: 56.3219% A: 0.0000% Sys: 2.9625% = 100.0000% VC shows the overhead of video decoding, VO shows the overhead of the video output. Using unsorted page lists reduces the benchmark's run time by ~32s/~25%. v2: * Make sorted pagelists the special case (Sam) * Comment on drivers' use of pagelist (Sam) * Warn about the overhead in comment Signed-off-by: Thomas Zimmermann Acked-by: Sam Ravnborg Acked-by: Andy Shevchenko Link: https://download.blender.org/peach/bigbuckbunny_movies/big_buck_bunny_720p_stereo.ogg # [1] Link: https://patchwork.freedesktop.org/patch/msgid/20220211094640.21632-3-tzimmermann@suse.de --- drivers/staging/fbtft/fbtft-core.c | 1 + drivers/video/fbdev/broadsheetfb.c | 1 + drivers/video/fbdev/core/fb_defio.c | 24 +++++++++++++++++------- drivers/video/fbdev/metronomefb.c | 1 + drivers/video/fbdev/udlfb.c | 1 + include/linux/fb.h | 1 + 6 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index f2684d2d6851..4a35347b3020 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -654,6 +654,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, fbops->fb_blank = fbtft_fb_blank; fbdefio->delay = HZ / fps; + fbdefio->sort_pagelist = true; fbdefio->deferred_io = fbtft_deferred_io; fb_deferred_io_init(info); diff --git a/drivers/video/fbdev/broadsheetfb.c b/drivers/video/fbdev/broadsheetfb.c index fd66f4d4a621..b9054f658838 100644 --- a/drivers/video/fbdev/broadsheetfb.c +++ b/drivers/video/fbdev/broadsheetfb.c @@ -1059,6 +1059,7 @@ static const struct fb_ops broadsheetfb_ops = { static struct fb_deferred_io broadsheetfb_defio = { .delay = HZ/4, + .sort_pagelist = true, .deferred_io = broadsheetfb_dpy_deferred_io, }; diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 169a81c8172b..98b0f23bf5e2 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -96,7 +96,7 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct fb_info *info = vmf->vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; - struct page *cur; + struct list_head *pos = &fbdefio->pagelist; /* this is a callback we get when userspace first tries to write to the page. we schedule a workqueue. that workqueue @@ -137,14 +137,24 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) if (!list_empty(&page->lru)) goto page_already_added; - /* we loop through the pagelist before adding in order - to keep the pagelist sorted */ - list_for_each_entry(cur, &fbdefio->pagelist, lru) { - if (cur->index > page->index) - break; + if (unlikely(fbdefio->sort_pagelist)) { + /* + * We loop through the pagelist before adding in order to + * keep the pagelist sorted. This has significant overhead + * of O(n^2) with n being the number of written pages. If + * possible, drivers should try to work with unsorted page + * lists instead. + */ + struct page *cur; + + list_for_each_entry(cur, &fbdefio->pagelist, lru) { + if (cur->index > page->index) + break; + } + pos = &cur->lru; } - list_add_tail(&page->lru, &cur->lru); + list_add_tail(&page->lru, pos); page_already_added: mutex_unlock(&fbdefio->lock); diff --git a/drivers/video/fbdev/metronomefb.c b/drivers/video/fbdev/metronomefb.c index 952826557a0c..af858dd23ea6 100644 --- a/drivers/video/fbdev/metronomefb.c +++ b/drivers/video/fbdev/metronomefb.c @@ -568,6 +568,7 @@ static const struct fb_ops metronomefb_ops = { static struct fb_deferred_io metronomefb_defio = { .delay = HZ, + .sort_pagelist = true, .deferred_io = metronomefb_dpy_deferred_io, }; diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index b9cdd02c1000..184bb8433b78 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -980,6 +980,7 @@ static int dlfb_ops_open(struct fb_info *info, int user) if (fbdefio) { fbdefio->delay = DL_DEFIO_WRITE_DELAY; + fbdefio->sort_pagelist = true; fbdefio->deferred_io = dlfb_dpy_deferred_io; } diff --git a/include/linux/fb.h b/include/linux/fb.h index 9a14f3f8a329..39baa9a70779 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -204,6 +204,7 @@ struct fb_pixmap { struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; + bool sort_pagelist; /* sort pagelist by offset */ struct mutex lock; /* mutex that protects the page list */ struct list_head pagelist; /* list of touched pages */ /* callback */ -- cgit v1.2.3 From afea229fe10282da14595870b44f82792451dfb2 Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Mon, 21 Feb 2022 22:15:48 +0530 Subject: drm: improve drm_buddy_alloc function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Make drm_buddy_alloc a single function to handle range allocation and non-range allocation demands - Implemented a new function alloc_range() which allocates the requested power-of-two block comply with range limitations - Moved order computation and memory alignment logic from i915 driver to drm buddy v2: merged below changes to keep the build unbroken - drm_buddy_alloc_range() becomes obsolete and may be removed - enable ttm range allocation (fpfn / lpfn) support in i915 driver - apply enhanced drm_buddy_alloc() function to i915 driver v3(Matthew Auld): - Fix alignment issues and remove unnecessary list_empty check - add more validation checks for input arguments - make alloc_range() block allocations as bottom-up - optimize order computation logic - replace uint64_t with u64, which is preferred in the kernel v4(Matthew Auld): - keep drm_buddy_alloc_range() function implementation for generic actual range allocations - keep alloc_range() implementation for end bias allocations v5(Matthew Auld): - modify drm_buddy_alloc() passing argument place->lpfn to lpfn as place->lpfn will currently always be zero for i915 v6(Matthew Auld): - fixup potential uaf - If we are unlucky and can't allocate enough memory when splitting blocks, where we temporarily end up with the given block and its buddy on the respective free list, then we need to ensure we delete both blocks, and no just the buddy, before potentially freeing them - fix warnings reported by kernel test robot v7(Matthew Auld): - revert fixup potential uaf - keep __alloc_range() add node to the list logic same as drm_buddy_alloc_blocks() by having a temporary list variable - at drm_buddy_alloc_blocks() keep i915 range_overflows macro and add a new check for end variable v8: - fix warnings reported by kernel test robot v9(Matthew Auld): - remove DRM_BUDDY_RANGE_ALLOCATION flag - remove unnecessary function description v10: - keep DRM_BUDDY_RANGE_ALLOCATION flag as removing the flag and replacing with (end < size) logic fails amdgpu driver load Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220221164552.2434-1-Arunpravin.PaneerSelvam@amd.com --- drivers/gpu/drm/drm_buddy.c | 292 +++++++++++++++++++------- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 67 +++--- drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 2 + include/drm/drm_buddy.h | 13 +- 4 files changed, 257 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index d60878bc9c20..1d801c88b286 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects) } EXPORT_SYMBOL(drm_buddy_free_list); -/** - * drm_buddy_alloc_blocks - allocate power-of-two blocks - * - * @mm: DRM buddy manager to allocate from - * @order: size of the allocation - * - * The order value here translates to: - * - * 0 = 2^0 * mm->chunk_size - * 1 = 2^1 * mm->chunk_size - * 2 = 2^2 * mm->chunk_size - * - * Returns: - * allocated ptr to the &drm_buddy_block on success - */ -struct drm_buddy_block * -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order) +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +static struct drm_buddy_block * +alloc_range_bias(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order) +{ + struct drm_buddy_block *block; + struct drm_buddy_block *buddy; + LIST_HEAD(dfs); + int err; + int i; + + end = end - 1; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(&dfs, + struct drm_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + if (drm_buddy_block_order(block) < order) + continue; + + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (drm_buddy_block_is_allocated(block)) + continue; + + if (contains(start, end, block_start, block_end) && + order == drm_buddy_block_order(block)) { + /* + * Find the free block within the range. + */ + if (drm_buddy_block_is_free(block)) + return block; + + continue; + } + + if (!drm_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, &dfs); + list_add(&block->left->tmp_link, &dfs); + } while (1); + + return ERR_PTR(-ENOSPC); + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = get_buddy(block); + if (buddy && + (drm_buddy_block_is_free(block) && + drm_buddy_block_is_free(buddy))) + __drm_buddy_free(mm, block); + return ERR_PTR(err); +} + +static struct drm_buddy_block * +alloc_from_freelist(struct drm_buddy *mm, + unsigned int order, + unsigned long flags) { struct drm_buddy_block *block = NULL; unsigned int i; @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order) while (i != order) { err = split_block(mm, block); if (unlikely(err)) - goto out_free; + goto err_undo; - /* Go low */ - block = block->left; + block = block->right; i--; } - - mark_allocated(block); - mm->avail -= drm_buddy_block_size(mm, block); - kmemleak_update_trace(block); return block; -out_free: +err_undo: if (i != order) __drm_buddy_free(mm, block); return ERR_PTR(err); } -EXPORT_SYMBOL(drm_buddy_alloc_blocks); - -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) -{ - return s1 <= e2 && e1 >= s2; -} - -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) -{ - return s1 <= s2 && e1 >= e2; -} -/** - * drm_buddy_alloc_range - allocate range - * - * @mm: DRM buddy manager to allocate from - * @blocks: output list head to add allocated blocks - * @start: start of the allowed range for this block - * @size: size of the allocation - * - * Intended for pre-allocating portions of the address space, for example to - * reserve a block for the initial framebuffer or similar, hence the expectation - * here is that drm_buddy_alloc_blocks() is still the main vehicle for - * allocations, so if that's not the case then the drm_mm range allocator is - * probably a much better fit, and so you should probably go use that instead. - * - * Note that it's safe to chain together multiple alloc_ranges - * with the same blocks list - * - * Returns: - * 0 on success, error code on failure. - */ -int drm_buddy_alloc_range(struct drm_buddy *mm, - struct list_head *blocks, - u64 start, u64 size) +static int __alloc_range(struct drm_buddy *mm, + struct list_head *dfs, + u64 start, u64 size, + struct list_head *blocks) { struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(allocated); - LIST_HEAD(dfs); u64 end; int err; - int i; - - if (size < mm->chunk_size) - return -EINVAL; - - if (!IS_ALIGNED(size | start, mm->chunk_size)) - return -EINVAL; - - if (range_overflows(start, size, mm->size)) - return -EINVAL; - - for (i = 0; i < mm->n_roots; ++i) - list_add_tail(&mm->roots[i]->tmp_link, &dfs); end = start + size - 1; @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm, u64 block_start; u64 block_end; - block = list_first_entry_or_null(&dfs, + block = list_first_entry_or_null(dfs, struct drm_buddy_block, tmp_link); if (!block) @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm, goto err_undo; } - list_add(&block->right->tmp_link, &dfs); - list_add(&block->left->tmp_link, &dfs); + list_add(&block->right->tmp_link, dfs); + list_add(&block->left->tmp_link, dfs); } while (1); list_splice_tail(&allocated, blocks); @@ -459,7 +484,120 @@ err_free: drm_buddy_free_list(mm, &allocated); return err; } -EXPORT_SYMBOL(drm_buddy_alloc_range); + +static int __drm_buddy_alloc_range(struct drm_buddy *mm, + u64 start, + u64 size, + struct list_head *blocks) +{ + LIST_HEAD(dfs); + int i; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + return __alloc_range(mm, &dfs, start, size, blocks); +} + +/** + * drm_buddy_alloc_blocks - allocate power-of-two blocks + * + * @mm: DRM buddy manager to allocate from + * @start: start of the allowed range for this block + * @end: end of the allowed range for this block + * @size: size of the allocation + * @min_page_size: alignment of the allocation + * @blocks: output list head to add allocated blocks + * @flags: DRM_BUDDY_*_ALLOCATION flags + * + * alloc_range_bias() called on range limitations, which traverses + * the tree and returns the desired block. + * + * alloc_from_freelist() called when *no* range restrictions + * are enforced, which picks the block from the freelist. + * + * Returns: + * 0 on success, error code on failure. + */ +int drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_page_size, + struct list_head *blocks, + unsigned long flags) +{ + struct drm_buddy_block *block = NULL; + unsigned int min_order, order; + unsigned long pages; + LIST_HEAD(allocated); + int err; + + if (size < mm->chunk_size) + return -EINVAL; + + if (min_page_size < mm->chunk_size) + return -EINVAL; + + if (!is_power_of_2(min_page_size)) + return -EINVAL; + + if (!IS_ALIGNED(start | end | size, mm->chunk_size)) + return -EINVAL; + + if (end > mm->size) + return -EINVAL; + + if (range_overflows(start, size, mm->size)) + return -EINVAL; + + /* Actual range allocation */ + if (start + size == end) + return __drm_buddy_alloc_range(mm, start, size, blocks); + + pages = size >> ilog2(mm->chunk_size); + order = fls(pages) - 1; + min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); + + do { + order = min(order, (unsigned int)fls(pages) - 1); + BUG_ON(order > mm->max_order); + BUG_ON(order < min_order); + + do { + if (flags & DRM_BUDDY_RANGE_ALLOCATION) + /* Allocate traversing within the range */ + block = alloc_range_bias(mm, start, end, order); + else + /* Allocate from freelist */ + block = alloc_from_freelist(mm, order, flags); + + if (!IS_ERR(block)) + break; + + if (order-- == min_order) { + err = -ENOSPC; + goto err_free; + } + } while (1); + + mark_allocated(block); + mm->avail -= drm_buddy_block_size(mm, block); + kmemleak_update_trace(block); + list_add_tail(&block->link, &allocated); + + pages -= BIT(order); + + if (!pages) + break; + } while (1); + + list_splice_tail(&allocated, blocks); + return 0; + +err_free: + drm_buddy_free_list(mm, &allocated); + return err; +} +EXPORT_SYMBOL(drm_buddy_alloc_blocks); /** * drm_buddy_block_print - print block information diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 247714bab044..a328a38fab07 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); struct i915_ttm_buddy_resource *bman_res; struct drm_buddy *mm = &bman->mm; - unsigned long n_pages; - unsigned int min_order; + unsigned long n_pages, lpfn; u64 min_page_size; u64 size; int err; - GEM_BUG_ON(place->fpfn || place->lpfn); + lpfn = place->lpfn; + if (!lpfn) + lpfn = man->size; bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL); if (!bman_res) @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, INIT_LIST_HEAD(&bman_res->blocks); bman_res->mm = mm; + if (place->fpfn || lpfn != man->size) + bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION; + GEM_BUG_ON(!bman_res->base.num_pages); size = bman_res->base.num_pages << PAGE_SHIFT; @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, min_page_size = bo->page_alignment << PAGE_SHIFT; GEM_BUG_ON(min_page_size < mm->chunk_size); - min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + unsigned long pages; + size = roundup_pow_of_two(size); - min_order = ilog2(size) - ilog2(mm->chunk_size); + min_page_size = size; + + pages = size >> ilog2(mm->chunk_size); + if (pages > lpfn) + lpfn = pages; } if (size > mm->size) { @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, n_pages = size >> ilog2(mm->chunk_size); - do { - struct drm_buddy_block *block; - unsigned int order; - - order = fls(n_pages) - 1; - GEM_BUG_ON(order > mm->max_order); - GEM_BUG_ON(order < min_order); - - do { - mutex_lock(&bman->lock); - block = drm_buddy_alloc_blocks(mm, order); - mutex_unlock(&bman->lock); - if (!IS_ERR(block)) - break; - - if (order-- == min_order) { - err = -ENOSPC; - goto err_free_blocks; - } - } while (1); - - n_pages -= BIT(order); - - list_add_tail(&block->link, &bman_res->blocks); - - if (!n_pages) - break; - } while (1); + mutex_lock(&bman->lock); + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + (u64)lpfn << PAGE_SHIFT, + (u64)n_pages << PAGE_SHIFT, + min_page_size, + &bman_res->blocks, + bman_res->flags); + mutex_unlock(&bman->lock); + if (unlikely(err)) + goto err_free_blocks; *res = &bman_res->base; return 0; @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, { struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); struct drm_buddy *mm = &bman->mm; + unsigned long flags = 0; int ret; + flags |= DRM_BUDDY_RANGE_ALLOCATION; + mutex_lock(&bman->lock); - ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size); + ret = drm_buddy_alloc_blocks(mm, start, + start + size, + size, mm->chunk_size, + &bman->reserved, + flags); mutex_unlock(&bman->lock); return ret; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index 312077941411..72c90b432e87 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -20,6 +20,7 @@ struct drm_buddy; * * @base: struct ttm_resource base class we extend * @blocks: the list of struct i915_buddy_block for this resource/allocation + * @flags: DRM_BUDDY_*_ALLOCATION flags * @mm: the struct i915_buddy_mm for this resource * * Extends the struct ttm_resource to manage an address space allocation with @@ -28,6 +29,7 @@ struct drm_buddy; struct i915_ttm_buddy_resource { struct ttm_resource base; struct list_head blocks; + unsigned long flags; struct drm_buddy *mm; }; diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index f524db152413..54f25a372f27 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -22,6 +22,8 @@ start__ >= max__ || size__ > max__ - start__; \ }) +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0) + struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) #define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); void drm_buddy_fini(struct drm_buddy *mm); -struct drm_buddy_block * -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order); - -int drm_buddy_alloc_range(struct drm_buddy *mm, - struct list_head *blocks, - u64 start, u64 size); +int drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_page_size, + struct list_head *blocks, + unsigned long flags); void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); -- cgit v1.2.3 From 476e4063022787b5720758239ee4c22fa2495e82 Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Mon, 21 Feb 2022 22:15:49 +0530 Subject: drm: implement top-down allocation method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implemented a function which walk through the order list, compares the offset and returns the maximum offset block, this method is unpredictable in obtaining the high range address blocks which depends on allocation and deallocation. for instance, if driver requests address at a low specific range, allocator traverses from the root block and splits the larger blocks until it reaches the specific block and in the process of splitting, lower orders in the freelist are occupied with low range address blocks and for the subsequent TOPDOWN memory request we may return the low range blocks.To overcome this issue, we may go with the below approach. The other approach, sorting each order list entries in ascending order and compares the last entry of each order list in the freelist and return the max block. This creates sorting overhead on every drm_buddy_free() request and split up of larger blocks for a single page request. v2: - Fix alignment issues(Matthew Auld) - Remove unnecessary list_empty check(Matthew Auld) - merged the below patch to see the feature in action - add top-down alloc support to i915 driver Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220221164552.2434-2-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 36 +++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 3 +++ include/drm/drm_buddy.h | 1 + 3 files changed, 35 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 1d801c88b286..72ee3cd96a24 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -369,6 +369,26 @@ err_undo: return ERR_PTR(err); } +static struct drm_buddy_block * +get_maxblock(struct list_head *head) +{ + struct drm_buddy_block *max_block = NULL, *node; + + max_block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!max_block) + return NULL; + + list_for_each_entry(node, head, link) { + if (drm_buddy_block_offset(node) > + drm_buddy_block_offset(max_block)) + max_block = node; + } + + return max_block; +} + static struct drm_buddy_block * alloc_from_freelist(struct drm_buddy *mm, unsigned int order, @@ -379,11 +399,17 @@ alloc_from_freelist(struct drm_buddy *mm, int err; for (i = order; i <= mm->max_order; ++i) { - block = list_first_entry_or_null(&mm->free_list[i], - struct drm_buddy_block, - link); - if (block) - break; + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + block = get_maxblock(&mm->free_list[i]); + if (block) + break; + } else { + block = list_first_entry_or_null(&mm->free_list[i], + struct drm_buddy_block, + link); + if (block) + break; + } } if (!block) diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index a328a38fab07..7f553cceb6c0 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -53,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, INIT_LIST_HEAD(&bman_res->blocks); bman_res->mm = mm; + if (place->flags & TTM_PL_FLAG_TOPDOWN) + bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + if (place->fpfn || lpfn != man->size) bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION; diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 54f25a372f27..f0378fb48d06 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -23,6 +23,7 @@ }) #define DRM_BUDDY_RANGE_ALLOCATION (1 << 0) +#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) -- cgit v1.2.3 From 95ee2a8b4b3cd1fb25f7e14e2202da4045030173 Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Mon, 21 Feb 2022 22:15:50 +0530 Subject: drm: implement a method to free unused pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On contiguous allocation, we round up the size to the *next* power of 2, implement a function to free the unused pages after the newly allocate block. v2(Matthew Auld): - replace function name 'drm_buddy_free_unused_pages' with drm_buddy_block_trim - replace input argument name 'actual_size' with 'new_size' - add more validation checks for input arguments - add overlaps check to avoid needless searching and splitting - merged the below patch to see the feature in action - add free unused pages support to i915 driver - lock drm_buddy_block_trim() function as it calls mark_free/mark_split are all globally visible v3(Matthew Auld): - remove trim method error handling as we address the failure case at drm_buddy_block_trim() function v4: - in case of trim, at __alloc_range() split_block failure path marks the block as free and removes it from the original list, potentially also freeing it, to overcome this problem, we turn the drm_buddy_block_trim() input node into a temporary node to prevent recursively freeing itself, but still retain the un-splitting/freeing of the other nodes(Matthew Auld) - modify the drm_buddy_block_trim() function return type v5(Matthew Auld): - revert drm_buddy_block_trim() function return type changes in v4 - modify drm_buddy_block_trim() passing argument n_pages to original_size as n_pages has already been rounded up to the next power-of-two and passing n_pages results noop v6: - fix warnings reported by kernel test robot v7: - modify drm_buddy_block_trim() function doc description - at drm_buddy_block_trim() handle non-allocated block as a serious programmer error - fix a typo Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220221164552.2434-3-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 69 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 ++++ include/drm/drm_buddy.h | 4 ++ 3 files changed, 83 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 72ee3cd96a24..edef30be8304 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -525,6 +525,75 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm, return __alloc_range(mm, &dfs, start, size, blocks); } +/** + * drm_buddy_block_trim - free unused pages + * + * @mm: DRM buddy manager + * @new_size: original size requested + * @blocks: Input and output list of allocated blocks. + * MUST contain single block as input to be trimmed. + * On success will contain the newly allocated blocks + * making up the @new_size. Blocks always appear in + * ascending order + * + * For contiguous allocation, we round up the size to the nearest + * power of two value, drivers consume *actual* size, so remaining + * portions are unused and can be optionally freed with this function + * + * Returns: + * 0 on success, error code on failure. + */ +int drm_buddy_block_trim(struct drm_buddy *mm, + u64 new_size, + struct list_head *blocks) +{ + struct drm_buddy_block *parent; + struct drm_buddy_block *block; + LIST_HEAD(dfs); + u64 new_start; + int err; + + if (!list_is_singular(blocks)) + return -EINVAL; + + block = list_first_entry(blocks, + struct drm_buddy_block, + link); + + if (WARN_ON(!drm_buddy_block_is_allocated(block))) + return -EINVAL; + + if (new_size > drm_buddy_block_size(mm, block)) + return -EINVAL; + + if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) + return -EINVAL; + + if (new_size == drm_buddy_block_size(mm, block)) + return 0; + + list_del(&block->link); + mark_free(mm, block); + mm->avail += drm_buddy_block_size(mm, block); + + /* Prevent recursively freeing this node */ + parent = block->parent; + block->parent = NULL; + + new_start = drm_buddy_block_offset(block); + list_add(&block->tmp_link, &dfs); + err = __alloc_range(mm, &dfs, new_start, new_size, blocks); + if (err) { + mark_allocated(block); + mm->avail -= drm_buddy_block_size(mm, block); + list_add(&block->link, blocks); + } + + block->parent = parent; + return err; +} +EXPORT_SYMBOL(drm_buddy_block_trim); + /** * drm_buddy_alloc_blocks - allocate power-of-two blocks * diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 7f553cceb6c0..76d5211c25eb 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, if (unlikely(err)) goto err_free_blocks; + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT; + + mutex_lock(&bman->lock); + drm_buddy_block_trim(mm, + original_size, + &bman_res->blocks); + mutex_unlock(&bman->lock); + } + *res = &bman_res->base; return 0; diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index f0378fb48d06..bd21f9dfd15e 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -140,6 +140,10 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, struct list_head *blocks, unsigned long flags); +int drm_buddy_block_trim(struct drm_buddy *mm, + u64 new_size, + struct list_head *blocks); + void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects); -- cgit v1.2.3 From 8ab62eda177bc350f34fea4fcea23603b8184bfd Mon Sep 17 00:00:00 2001 From: Jiawei Gu Date: Tue, 22 Feb 2022 17:42:52 +0800 Subject: drm/sched: Add device pointer to drm_gpu_scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device pointer so scheduler's printing can use DRM_DEV_ERROR() instead, which makes life easier under multiple GPU scenario. v2: amend all calls of drm_sched_init() v3: fill dev pointer for all drm_sched_init() calls Signed-off-by: Jiawei Gu Reviewed-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220221095705.5290-1-Jiawei.Gu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- drivers/gpu/drm/lima/lima_sched.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 9 +++++---- drivers/gpu/drm/v3d/v3d_sched.c | 10 +++++----- include/drm/gpu_scheduler.h | 3 ++- 8 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d78141e2c509..cd1f9140a878 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2316,7 +2316,9 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, ring->num_hw_submission, amdgpu_job_hang_limit, - timeout, adev->reset_domain->wq, ring->sched_score, ring->name); + timeout, adev->reset_domain->wq, + ring->sched_score, ring->name, + adev->dev); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 58f593b278c1..35e5ef7dbdcc 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -195,7 +195,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, msecs_to_jiffies(500), NULL, NULL, - dev_name(gpu->dev)); + dev_name(gpu->dev), gpu->dev); if (ret) return ret; diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 12437e42cc76..9cf4c60f4fca 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -491,7 +491,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) return drm_sched_init(&pipe->base, &lima_sched_ops, 1, lima_job_hang_limit, msecs_to_jiffies(timeout), NULL, - NULL, name); + NULL, name, pipe->ldev->dev); } void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 3bbf574c3bdc..367a6aaa3a20 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -89,7 +89,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ret = drm_sched_init(&ring->sched, &msm_sched_ops, num_hw_submissions, 0, sched_timeout, - NULL, NULL, to_msm_bo(ring->bo)->name); + NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); if (ret) { goto fail; } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 908d79520853..a6925dbb6224 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -812,7 +812,7 @@ int panfrost_job_init(struct panfrost_device *pfdev) nentries, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), pfdev->reset.wq, - NULL, "pan_js"); + NULL, "pan_js", pfdev->dev); if (ret) { dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); goto err_sched; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index f91fb31ab7a7..b81fceb0b8a2 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -491,7 +491,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) if (r == -ENOENT) drm_sched_job_done(s_job); else if (r) - DRM_ERROR("fence add callback failed (%d)\n", + DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); } else drm_sched_job_done(s_job); @@ -957,7 +957,7 @@ static int drm_sched_main(void *param) if (r == -ENOENT) drm_sched_job_done(sched_job); else if (r) - DRM_ERROR("fence add callback failed (%d)\n", + DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); dma_fence_put(fence); } else { @@ -991,7 +991,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, unsigned hw_submission, unsigned hang_limit, long timeout, struct workqueue_struct *timeout_wq, - atomic_t *score, const char *name) + atomic_t *score, const char *name, struct device *dev) { int i, ret; sched->ops = ops; @@ -1001,6 +1001,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, sched->timeout_wq = timeout_wq ? : system_wq; sched->hang_limit = hang_limit; sched->score = score ? score : &sched->_score; + sched->dev = dev; for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++) drm_sched_rq_init(sched, &sched->sched_rq[i]); @@ -1018,7 +1019,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, if (IS_ERR(sched->thread)) { ret = PTR_ERR(sched->thread); sched->thread = NULL; - DRM_ERROR("Failed to create scheduler for %s.\n", name); + DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name); return ret; } diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index e0cb7d0697a7..39459ae96f30 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -391,7 +391,7 @@ v3d_sched_init(struct v3d_dev *v3d) &v3d_bin_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, - NULL, "v3d_bin"); + NULL, "v3d_bin", v3d->drm.dev); if (ret) { dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret); return ret; @@ -401,7 +401,7 @@ v3d_sched_init(struct v3d_dev *v3d) &v3d_render_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, - NULL, "v3d_render"); + NULL, "v3d_render", v3d->drm.dev); if (ret) { dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.", ret); @@ -413,7 +413,7 @@ v3d_sched_init(struct v3d_dev *v3d) &v3d_tfu_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, - NULL, "v3d_tfu"); + NULL, "v3d_tfu", v3d->drm.dev); if (ret) { dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.", ret); @@ -426,7 +426,7 @@ v3d_sched_init(struct v3d_dev *v3d) &v3d_csd_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, - NULL, "v3d_csd"); + NULL, "v3d_csd", v3d->drm.dev); if (ret) { dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.", ret); @@ -438,7 +438,7 @@ v3d_sched_init(struct v3d_dev *v3d) &v3d_cache_clean_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, - NULL, "v3d_cache_clean"); + NULL, "v3d_cache_clean", v3d->drm.dev); if (ret) { dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.", ret); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index bbc22fad8d80..944f83ef9f2e 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -457,13 +457,14 @@ struct drm_gpu_scheduler { atomic_t _score; bool ready; bool free_guilty; + struct device *dev; }; int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, uint32_t hw_submission, unsigned hang_limit, long timeout, struct workqueue_struct *timeout_wq, - atomic_t *score, const char *name); + atomic_t *score, const char *name, struct device *dev); void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_job_init(struct drm_sched_job *job, -- cgit v1.2.3 From 92937f170d3f49f41d7acb86243ee691a98eb2be Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Tue, 22 Feb 2022 23:18:41 +0530 Subject: drm/selftests: add drm buddy alloc range testcase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add a test to check the range allocation - export get_buddy() function in drm_buddy.c - export drm_prandom_u32_max_state() in lib/drm_random.c - include helper functions - include prime number header file v2: - add drm_get_buddy() function description (Matthew Auld) - removed unnecessary test succeeded print Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Acked-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220222174845.2175-3-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 25 +- drivers/gpu/drm/lib/drm_random.c | 3 +- drivers/gpu/drm/lib/drm_random.h | 2 + drivers/gpu/drm/selftests/drm_buddy_selftests.h | 1 + drivers/gpu/drm/selftests/test-drm_buddy.c | 388 ++++++++++++++++++++++++ include/drm/drm_buddy.h | 3 + 6 files changed, 417 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index edef30be8304..72f52f293249 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -211,7 +211,7 @@ static int split_block(struct drm_buddy *mm, } static struct drm_buddy_block * -get_buddy(struct drm_buddy_block *block) +__get_buddy(struct drm_buddy_block *block) { struct drm_buddy_block *parent; @@ -225,6 +225,23 @@ get_buddy(struct drm_buddy_block *block) return parent->left; } +/** + * drm_get_buddy - get buddy address + * + * @block: DRM buddy block + * + * Returns the corresponding buddy block for @block, or NULL + * if this is a root block and can't be merged further. + * Requires some kind of locking to protect against + * any concurrent allocate and free operations. + */ +struct drm_buddy_block * +drm_get_buddy(struct drm_buddy_block *block) +{ + return __get_buddy(block); +} +EXPORT_SYMBOL(drm_get_buddy); + static void __drm_buddy_free(struct drm_buddy *mm, struct drm_buddy_block *block) { @@ -233,7 +250,7 @@ static void __drm_buddy_free(struct drm_buddy *mm, while ((parent = block->parent)) { struct drm_buddy_block *buddy; - buddy = get_buddy(block); + buddy = __get_buddy(block); if (!drm_buddy_block_is_free(buddy)) break; @@ -361,7 +378,7 @@ err_undo: * bigger is better, so make sure we merge everything back before we * free the allocated blocks. */ - buddy = get_buddy(block); + buddy = __get_buddy(block); if (buddy && (drm_buddy_block_is_free(block) && drm_buddy_block_is_free(buddy))) @@ -500,7 +517,7 @@ err_undo: * bigger is better, so make sure we merge everything back before we * free the allocated blocks. */ - buddy = get_buddy(block); + buddy = __get_buddy(block); if (buddy && (drm_buddy_block_is_free(block) && drm_buddy_block_is_free(buddy))) diff --git a/drivers/gpu/drm/lib/drm_random.c b/drivers/gpu/drm/lib/drm_random.c index eeb155826d27..31b5a3e21911 100644 --- a/drivers/gpu/drm/lib/drm_random.c +++ b/drivers/gpu/drm/lib/drm_random.c @@ -7,10 +7,11 @@ #include "drm_random.h" -static inline u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) { return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); } +EXPORT_SYMBOL(drm_prandom_u32_max_state); void drm_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state) diff --git a/drivers/gpu/drm/lib/drm_random.h b/drivers/gpu/drm/lib/drm_random.h index 4a3e94dfa0c0..5543bf0474bc 100644 --- a/drivers/gpu/drm/lib/drm_random.h +++ b/drivers/gpu/drm/lib/drm_random.h @@ -22,5 +22,7 @@ unsigned int *drm_random_order(unsigned int count, void drm_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state); +u32 drm_prandom_u32_max_state(u32 ep_ro, + struct rnd_state *state); #endif /* !__DRM_RANDOM_H__ */ diff --git a/drivers/gpu/drm/selftests/drm_buddy_selftests.h b/drivers/gpu/drm/selftests/drm_buddy_selftests.h index ebe16162762f..3230bfd2770b 100644 --- a/drivers/gpu/drm/selftests/drm_buddy_selftests.h +++ b/drivers/gpu/drm/selftests/drm_buddy_selftests.h @@ -8,3 +8,4 @@ */ selftest(sanitycheck, igt_sanitycheck) /* keep first (selfcheck for igt) */ selftest(buddy_alloc_limit, igt_buddy_alloc_limit) +selftest(buddy_alloc_range, igt_buddy_alloc_range) diff --git a/drivers/gpu/drm/selftests/test-drm_buddy.c b/drivers/gpu/drm/selftests/test-drm_buddy.c index 0df41e1cb8a6..586e0673a67c 100644 --- a/drivers/gpu/drm/selftests/test-drm_buddy.c +++ b/drivers/gpu/drm/selftests/test-drm_buddy.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) "drm_buddy: " fmt #include +#include #include @@ -16,6 +17,393 @@ static unsigned int random_seed; +static inline const char *yesno(bool v) +{ + return v ? "yes" : "no"; +} + +static void __igt_dump_block(struct drm_buddy *mm, + struct drm_buddy_block *block, + bool buddy) +{ + pr_err("block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%s buddy=%s\n", + block->header, + drm_buddy_block_state(block), + drm_buddy_block_order(block), + drm_buddy_block_offset(block), + drm_buddy_block_size(mm, block), + yesno(!block->parent), + yesno(buddy)); +} + +static void igt_dump_block(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + struct drm_buddy_block *buddy; + + __igt_dump_block(mm, block, false); + + buddy = drm_get_buddy(block); + if (buddy) + __igt_dump_block(mm, buddy, true); +} + +static int igt_check_block(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + struct drm_buddy_block *buddy; + unsigned int block_state; + u64 block_size; + u64 offset; + int err = 0; + + block_state = drm_buddy_block_state(block); + + if (block_state != DRM_BUDDY_ALLOCATED && + block_state != DRM_BUDDY_FREE && + block_state != DRM_BUDDY_SPLIT) { + pr_err("block state mismatch\n"); + err = -EINVAL; + } + + block_size = drm_buddy_block_size(mm, block); + offset = drm_buddy_block_offset(block); + + if (block_size < mm->chunk_size) { + pr_err("block size smaller than min size\n"); + err = -EINVAL; + } + + if (!is_power_of_2(block_size)) { + pr_err("block size not power of two\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(block_size, mm->chunk_size)) { + pr_err("block size not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, mm->chunk_size)) { + pr_err("block offset not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, block_size)) { + pr_err("block offset not aligned to block size\n"); + err = -EINVAL; + } + + buddy = drm_get_buddy(block); + + if (!buddy && block->parent) { + pr_err("buddy has gone fishing\n"); + err = -EINVAL; + } + + if (buddy) { + if (drm_buddy_block_offset(buddy) != (offset ^ block_size)) { + pr_err("buddy has wrong offset\n"); + err = -EINVAL; + } + + if (drm_buddy_block_size(mm, buddy) != block_size) { + pr_err("buddy size mismatch\n"); + err = -EINVAL; + } + + if (drm_buddy_block_state(buddy) == block_state && + block_state == DRM_BUDDY_FREE) { + pr_err("block and its buddy are free\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_check_blocks(struct drm_buddy *mm, + struct list_head *blocks, + u64 expected_size, + bool is_contiguous) +{ + struct drm_buddy_block *block; + struct drm_buddy_block *prev; + u64 total; + int err = 0; + + block = NULL; + prev = NULL; + total = 0; + + list_for_each_entry(block, blocks, link) { + err = igt_check_block(mm, block); + + if (!drm_buddy_block_is_allocated(block)) { + pr_err("block not allocated\n"), + err = -EINVAL; + } + + if (is_contiguous && prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = drm_buddy_block_offset(prev); + prev_block_size = drm_buddy_block_size(mm, prev); + offset = drm_buddy_block_offset(block); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("block offset mismatch\n"); + err = -EINVAL; + } + } + + if (err) + break; + + total += drm_buddy_block_size(mm, block); + prev = block; + } + + if (!err) { + if (total != expected_size) { + pr_err("size mismatch, expected=%llx, found=%llx\n", + expected_size, total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev block, dump:\n"); + igt_dump_block(mm, prev); + } + + pr_err("bad block, dump:\n"); + igt_dump_block(mm, block); + + return err; +} + +static int igt_check_mm(struct drm_buddy *mm) +{ + struct drm_buddy_block *root; + struct drm_buddy_block *prev; + unsigned int i; + u64 total; + int err = 0; + + if (!mm->n_roots) { + pr_err("n_roots is zero\n"); + return -EINVAL; + } + + if (mm->n_roots != hweight64(mm->size)) { + pr_err("n_roots mismatch, n_roots=%u, expected=%lu\n", + mm->n_roots, hweight64(mm->size)); + return -EINVAL; + } + + root = NULL; + prev = NULL; + total = 0; + + for (i = 0; i < mm->n_roots; ++i) { + struct drm_buddy_block *block; + unsigned int order; + + root = mm->roots[i]; + if (!root) { + pr_err("root(%u) is NULL\n", i); + err = -EINVAL; + break; + } + + err = igt_check_block(mm, root); + + if (!drm_buddy_block_is_free(root)) { + pr_err("root not free\n"); + err = -EINVAL; + } + + order = drm_buddy_block_order(root); + + if (!i) { + if (order != mm->max_order) { + pr_err("max order root missing\n"); + err = -EINVAL; + } + } + + if (prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = drm_buddy_block_offset(prev); + prev_block_size = drm_buddy_block_size(mm, prev); + offset = drm_buddy_block_offset(root); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("root offset mismatch\n"); + err = -EINVAL; + } + } + + block = list_first_entry_or_null(&mm->free_list[order], + struct drm_buddy_block, + link); + if (block != root) { + pr_err("root mismatch at order=%u\n", order); + err = -EINVAL; + } + + if (err) + break; + + prev = root; + total += drm_buddy_block_size(mm, root); + } + + if (!err) { + if (total != mm->size) { + pr_err("expected mm size=%llx, found=%llx\n", mm->size, + total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev root(%u), dump:\n", i - 1); + igt_dump_block(mm, prev); + } + + if (root) { + pr_err("bad root(%u), dump:\n", i); + igt_dump_block(mm, root); + } + + return err; +} + +static void igt_mm_config(u64 *size, u64 *chunk_size) +{ + DRM_RND_STATE(prng, random_seed); + u32 s, ms; + + /* Nothing fancy, just try to get an interesting bit pattern */ + + prandom_seed_state(&prng, random_seed); + + /* Let size be a random number of pages up to 8 GB (2M pages) */ + s = 1 + drm_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); + /* Let the chunk size be a random power of 2 less than size */ + ms = BIT(drm_prandom_u32_max_state(ilog2(s), &prng)); + /* Round size down to the chunk size */ + s &= -ms; + + /* Convert from pages to bytes */ + *chunk_size = (u64)ms << 12; + *size = (u64)s << 12; +} + +static int igt_buddy_alloc_range(void *arg) +{ + unsigned long flags = DRM_BUDDY_RANGE_ALLOCATION; + u64 offset, size, rem, chunk_size, end; + unsigned long page_num; + struct drm_buddy mm; + LIST_HEAD(blocks); + int err; + + igt_mm_config(&size, &chunk_size); + + err = drm_buddy_init(&mm, size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort, abort, abort!\n"); + goto err_fini; + } + + rem = mm.size; + offset = 0; + + for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { + struct drm_buddy_block *block; + LIST_HEAD(tmp); + + size = min(page_num * mm.chunk_size, rem); + end = offset + size; + + err = drm_buddy_alloc_blocks(&mm, offset, end, size, mm.chunk_size, &tmp, flags); + if (err) { + if (err == -ENOMEM) { + pr_info("alloc_range hit -ENOMEM with size=%llx\n", + size); + } else { + pr_err("alloc_range with offset=%llx, size=%llx failed(%d)\n", + offset, size, err); + } + + break; + } + + block = list_first_entry_or_null(&tmp, + struct drm_buddy_block, + link); + if (!block) { + pr_err("alloc_range has no blocks\n"); + err = -EINVAL; + break; + } + + if (drm_buddy_block_offset(block) != offset) { + pr_err("alloc_range start offset mismatch, found=%llx, expected=%llx\n", + drm_buddy_block_offset(block), offset); + err = -EINVAL; + } + + if (!err) + err = igt_check_blocks(&mm, &tmp, size, true); + + list_splice_tail(&tmp, &blocks); + + if (err) + break; + + offset += size; + + rem -= size; + if (!rem) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + drm_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + +err_fini: + drm_buddy_fini(&mm); + + return err; +} + static int igt_buddy_alloc_limit(void *arg) { u64 end, size = U64_MAX, start = 0; diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index bd21f9dfd15e..572077ff8ae7 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -134,6 +134,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); void drm_buddy_fini(struct drm_buddy *mm); +struct drm_buddy_block * +drm_get_buddy(struct drm_buddy_block *block); + int drm_buddy_alloc_blocks(struct drm_buddy *mm, u64 start, u64 end, u64 size, u64 min_page_size, -- cgit v1.2.3