diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
86 files changed, 17334 insertions, 2430 deletions
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile index 1c75b5c9790c..7e73aa587967 100644 --- a/drivers/gpu/drm/i915/gt/Makefile +++ b/drivers/gpu/drm/i915/gt/Makefile @@ -1,2 +1,5 @@ +# For building individual subdir files on the command line +subdir-ccflags-y += -I$(srctree)/$(src)/.. + # Extra header tests -include $(src)/Makefile.header-test +header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test deleted file mode 100644 index 61e06cbb4b32..000000000000 --- a/drivers/gpu/drm/i915/gt/Makefile.header-test +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header_test := $(notdir $(wildcard $(src)/*.h)) - -quiet_cmd_header_test = HDRTEST $@ - cmd_header_test = echo "\#include \"$(<F)\"" > $@ - -header_test_%.c: %.h - $(call cmd,header_test) - -extra-$(CONFIG_DRM_I915_WERROR) += \ - $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) - -clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/gt/gen6_renderstate.c b/drivers/gpu/drm/i915/gt/gen6_renderstate.c new file mode 100644 index 000000000000..11c8e7b3dd7c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_renderstate.c @@ -0,0 +1,315 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Generated by: intel-gpu-tools-1.8-220-g01153e7 + */ + +#include "intel_renderstate.h" + +static const u32 gen6_null_state_relocs[] = { + 0x00000020, + 0x00000024, + 0x0000002c, + 0x000001e0, + 0x000001e4, + -1, +}; + +static const u32 gen6_null_state_batch[] = { + 0x69040000, + 0x790d0001, + 0x00000000, + 0x00000000, + 0x78180000, + 0x00000001, + 0x61010008, + 0x00000000, + 0x00000001, /* reloc */ + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000001, + 0x61020000, + 0x00000000, + 0x78050001, + 0x00000018, + 0x00000000, + 0x780d1002, + 0x00000000, + 0x00000000, + 0x00000420, + 0x78150003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78100004, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78160003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78110005, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78120002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78170003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79050005, + 0xe0040000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79100000, + 0x00000000, + 0x79000002, + 0xffffffff, + 0x00000000, + 0x00000000, + 0x780e0002, + 0x00000441, + 0x00000401, + 0x00000401, + 0x78021002, + 0x00000000, + 0x00000000, + 0x00000400, + 0x78130012, + 0x00400810, + 0x00000000, + 0x20000000, + 0x04000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78140007, + 0x00000280, + 0x08080000, + 0x00000000, + 0x00060000, + 0x4e080002, + 0x00100400, + 0x00000000, + 0x00000000, + 0x78090005, + 0x02000000, + 0x22220000, + 0x02f60000, + 0x11330000, + 0x02850004, + 0x11220000, + 0x78011002, + 0x00000000, + 0x00000000, + 0x00000200, + 0x78080003, + 0x00002000, + 0x00000448, /* reloc */ + 0x00000448, /* reloc */ + 0x00000000, + 0x05000000, /* cmds end */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000220, /* state start */ + 0x00000240, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0060005a, + 0x204077be, + 0x000000c0, + 0x008d0040, + 0x0060005a, + 0x206077be, + 0x000000c0, + 0x008d0080, + 0x0060005a, + 0x208077be, + 0x000000d0, + 0x008d0040, + 0x0060005a, + 0x20a077be, + 0x000000d0, + 0x008d0080, + 0x00000201, + 0x20080061, + 0x00000000, + 0x00000000, + 0x00600001, + 0x20200022, + 0x008d0000, + 0x00000000, + 0x02800031, + 0x21c01cc9, + 0x00000020, + 0x0a8a0001, + 0x00600001, + 0x204003be, + 0x008d01c0, + 0x00000000, + 0x00600001, + 0x206003be, + 0x008d01e0, + 0x00000000, + 0x00600001, + 0x208003be, + 0x008d0200, + 0x00000000, + 0x00600001, + 0x20a003be, + 0x008d0220, + 0x00000000, + 0x00600001, + 0x20c003be, + 0x008d0240, + 0x00000000, + 0x00600001, + 0x20e003be, + 0x008d0260, + 0x00000000, + 0x00600001, + 0x210003be, + 0x008d0280, + 0x00000000, + 0x00600001, + 0x212003be, + 0x008d02a0, + 0x00000000, + 0x05800031, + 0x24001cc8, + 0x00000040, + 0x90019000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000007e, + 0x00000000, + 0x00000000, + 0x00000000, + 0x30000000, + 0x00000124, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xf99a130c, + 0x799a130c, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000031, + 0x00000003, + 0x00000000, /* state end */ +}; + +RO_RENDERSTATE(6); diff --git a/drivers/gpu/drm/i915/gt/gen7_renderstate.c b/drivers/gpu/drm/i915/gt/gen7_renderstate.c new file mode 100644 index 000000000000..655180646152 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderstate.c @@ -0,0 +1,279 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Generated by: intel-gpu-tools-1.8-220-g01153e7 + */ + +#include "intel_renderstate.h" + +static const u32 gen7_null_state_relocs[] = { + 0x0000000c, + 0x00000010, + 0x00000018, + 0x000001ec, + -1, +}; + +static const u32 gen7_null_state_batch[] = { + 0x69040000, + 0x61010008, + 0x00000000, + 0x00000001, /* reloc */ + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000001, + 0x790d0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78180000, + 0x00000001, + 0x79160000, + 0x00000008, + 0x78300000, + 0x02010040, + 0x78310000, + 0x04000000, + 0x78320000, + 0x04000000, + 0x78330000, + 0x02000000, + 0x78100004, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781b0005, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781c0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781d0004, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78110005, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78120002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78210000, + 0x00000000, + 0x78130005, + 0x00000000, + 0x20000000, + 0x04000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78140001, + 0x20000800, + 0x00000000, + 0x781e0001, + 0x00000000, + 0x00000000, + 0x78050005, + 0xe0040000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78040001, + 0x00000000, + 0x00000000, + 0x78240000, + 0x00000240, + 0x78230000, + 0x00000260, + 0x782f0000, + 0x00000280, + 0x781f000c, + 0x00400810, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78200006, + 0x000002c0, + 0x08080000, + 0x00000000, + 0x28000402, + 0x00060000, + 0x00000000, + 0x00000000, + 0x78090005, + 0x02000000, + 0x22220000, + 0x02f60000, + 0x11230000, + 0x02f60004, + 0x11230000, + 0x78080003, + 0x00006008, + 0x00000340, /* reloc */ + 0xffffffff, + 0x00000000, + 0x782a0000, + 0x00000360, + 0x79000002, + 0xffffffff, + 0x00000000, + 0x00000000, + 0x7b000005, + 0x0000000f, + 0x00000003, + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000000, + 0x05000000, /* cmds end */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000031, /* state start */ + 0x00000003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xf99a130c, + 0x799a130c, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000492, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0080005a, + 0x2e2077bd, + 0x000000c0, + 0x008d0040, + 0x0080005a, + 0x2e6077bd, + 0x000000d0, + 0x008d0040, + 0x02800031, + 0x21801fa9, + 0x008d0e20, + 0x08840001, + 0x00800001, + 0x2e2003bd, + 0x008d0180, + 0x00000000, + 0x00800001, + 0x2e6003bd, + 0x008d01c0, + 0x00000000, + 0x00800001, + 0x2ea003bd, + 0x008d0200, + 0x00000000, + 0x00800001, + 0x2ee003bd, + 0x008d0240, + 0x00000000, + 0x05800031, + 0x20001fa8, + 0x008d0e20, + 0x90031000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000380, + 0x000003a0, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* state end */ +}; + +RO_RENDERSTATE(7); diff --git a/drivers/gpu/drm/i915/gt/gen8_renderstate.c b/drivers/gpu/drm/i915/gt/gen8_renderstate.c new file mode 100644 index 000000000000..95288a34c15d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_renderstate.c @@ -0,0 +1,983 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Generated by: intel-gpu-tools-1.8-220-g01153e7 + */ + +#include "intel_renderstate.h" + +static const u32 gen8_null_state_relocs[] = { + 0x00000798, + 0x000007a4, + 0x000007ac, + 0x000007bc, + -1, +}; + +static const u32 gen8_null_state_batch[] = { + 0x7a000004, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x69040000, + 0x78140000, + 0x04000000, + 0x7820000a, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78130002, + 0x00000000, + 0x00000000, + 0x02001808, + 0x781f0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78510009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78100007, + 0x00000000, + 0x00000000, + 0x00010000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781b0007, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000800, + 0x00000000, + 0x78110008, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781e0003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781d0007, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78120002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78500003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781c0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x780c0000, + 0x00000000, + 0x78520003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78300000, + 0x08010040, + 0x78310000, + 0x1e000000, + 0x78320000, + 0x1e000000, + 0x78330000, + 0x1e000000, + 0x79190002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x791a0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x791b0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79120000, + 0x00000000, + 0x79130000, + 0x00000000, + 0x79140000, + 0x00000000, + 0x79150000, + 0x00000000, + 0x79160000, + 0x00000000, + 0x78150009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78190009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781a0009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78160009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78170009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78490001, + 0x00000000, + 0x00000000, + 0x784a0000, + 0x00000000, + 0x784b0000, + 0x00000004, + 0x79170101, + 0x00000000, + 0x00000080, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x20000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x40000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x60000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x6101000e, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00001001, + 0x00001001, + 0x00000001, + 0x00001001, + 0x61020001, + 0x00000000, + 0x00000000, + 0x79000002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78050006, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0x40000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0x80000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0xc0000000, + 0x00000000, + 0x00000000, + 0x79080001, + 0x00000000, + 0x00000000, + 0x790a0001, + 0x00000000, + 0x00000000, + 0x78060003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78070003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78040001, + 0x00000000, + 0x00000000, + 0x79110000, + 0x00000000, + 0x780d0000, + 0x00000000, + 0x79060000, + 0x00000000, + 0x7907001f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x7902000f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x790c000f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x780a0003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78080083, + 0x00004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x04004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x08004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x10004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x14004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x18004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x1c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x20004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x24004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x28004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x2c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x30004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x34004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x38004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x3c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x40004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x44004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x48004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x4c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x50004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x54004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x58004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x5c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x60004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x64004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x68004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x6c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x70004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x74004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x7c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78090043, + 0x02000000, + 0x22220000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x680b0001, + 0x78260000, + 0x00000000, + 0x78270000, + 0x00000000, + 0x78280000, + 0x00000000, + 0x78290000, + 0x00000000, + 0x782a0000, + 0x00000000, + 0x780e0000, + 0x00000dc1, + 0x78240000, + 0x00000e01, + 0x784f0000, + 0x80000100, + 0x784d0000, + 0x40000000, + 0x782b0000, + 0x00000000, + 0x782c0000, + 0x00000000, + 0x782d0000, + 0x00000000, + 0x782e0000, + 0x00000000, + 0x782f0000, + 0x00000000, + 0x780f0000, + 0x00000000, + 0x78230000, + 0x00000e60, + 0x78210000, + 0x00000e80, + 0x7b000005, + 0x00000004, + 0x00000001, + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000000, + 0x05000000, /* cmds end */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* state start */ + 0x00000000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* state end */ +}; + +RO_RENDERSTATE(8); diff --git a/drivers/gpu/drm/i915/gt/gen9_renderstate.c b/drivers/gpu/drm/i915/gt/gen9_renderstate.c new file mode 100644 index 000000000000..7d3ac02f0177 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen9_renderstate.c @@ -0,0 +1,999 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Generated by: intel-gpu-tools-1.19-177-g68e2eab2 + */ + +#include "intel_renderstate.h" + +static const u32 gen9_null_state_relocs[] = { + 0x000007a8, + 0x000007b4, + 0x000007bc, + 0x000007cc, + -1, +}; + +static const u32 gen9_null_state_batch[] = { + 0x7a000004, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x69040300, + 0x78140000, + 0x04000000, + 0x7820000a, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78130002, + 0x00000000, + 0x00000000, + 0x02001808, + 0x781f0004, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78510009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78100007, + 0x00000000, + 0x00000000, + 0x00010000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781b0007, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000800, + 0x00000000, + 0x78110008, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781e0003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781d0009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78120002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78500003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781c0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x780c0000, + 0x00000000, + 0x78520003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78300000, + 0x08010040, + 0x78310000, + 0x1e000000, + 0x78320000, + 0x1e000000, + 0x78330000, + 0x1e000000, + 0x79190002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x791a0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x791b0002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79120000, + 0x00000000, + 0x79130000, + 0x00000000, + 0x79140000, + 0x00000000, + 0x79150000, + 0x00000000, + 0x79160000, + 0x00000000, + 0x78150009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78190009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x781a0009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78160009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78170009, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78490001, + 0x00000000, + 0x00000000, + 0x784a0000, + 0x00000000, + 0x784b0000, + 0x00000004, + 0x79170101, + 0x00000000, + 0x00000080, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x20000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x40000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79180006, + 0x60000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x61010011, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000001, /* reloc */ + 0x00000000, + 0x00001001, + 0x00001001, + 0x00000001, + 0x00001001, + 0x00000000, + 0x00000000, + 0x00000000, + 0x61020001, + 0x00000000, + 0x00000000, + 0x79000002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78050006, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0x00000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0x40000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0x80000000, + 0x00000000, + 0x00000000, + 0x79040002, + 0xc0000000, + 0x00000000, + 0x00000000, + 0x79080001, + 0x00000000, + 0x00000000, + 0x790a0001, + 0x00000000, + 0x00000000, + 0x78060003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78070003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78040001, + 0x00000000, + 0x00000000, + 0x79110000, + 0x00000000, + 0x780d0000, + 0x00000000, + 0x79060000, + 0x00000000, + 0x7907001f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x7902000f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x790c000f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x780a0003, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78080083, + 0x00004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x04004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x08004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x10004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x14004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x18004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x1c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x20004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x24004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x28004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x2c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x30004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x34004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x38004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x3c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x40004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x44004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x48004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x4c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x50004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x54004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x58004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x5c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x60004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x64004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x68004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x6c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x70004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x74004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x7c004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80004000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78090043, + 0x02000000, + 0x22220000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x78550003, + 0x0000000f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x680b0001, + 0x780e0000, + 0x00000e01, + 0x78240000, + 0x00000e41, + 0x784f0000, + 0x80000100, + 0x784d0000, + 0x40000000, + 0x782b0000, + 0x00000000, + 0x782c0000, + 0x00000000, + 0x782d0000, + 0x00000000, + 0x782e0000, + 0x00000000, + 0x782f0000, + 0x00000000, + 0x780f0000, + 0x00000000, + 0x78230000, + 0x00000ea0, + 0x78210000, + 0x00000ec0, + 0x78260000, + 0x00000000, + 0x78270000, + 0x00000000, + 0x78280000, + 0x00000000, + 0x78290000, + 0x00000000, + 0x782a0000, + 0x00000000, + 0x7b000005, + 0x00000004, + 0x00000001, + 0x00000000, + 0x00000001, + 0x00000000, + 0x00000000, + 0x05000000, /* cmds end */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* state start */ + 0x00000000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* state end */ +}; + +RO_RENDERSTATE(9); diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index ea56b2cc6095..09c68dda2098 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -27,6 +27,7 @@ #include <uapi/linux/sched/types.h> #include "i915_drv.h" +#include "i915_trace.h" static void irq_enable(struct intel_engine_cs *engine) { @@ -34,9 +35,9 @@ static void irq_enable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); + spin_lock(&engine->gt->irq_lock); engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + spin_unlock(&engine->gt->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) @@ -45,9 +46,9 @@ static void irq_disable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); + spin_lock(&engine->gt->irq_lock); engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + spin_unlock(&engine->gt->irq_lock); } static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) @@ -66,14 +67,15 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; if (!b->irq_armed) return; - spin_lock_irq(&b->irq_lock); + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_armed) __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irq(&b->irq_lock); + spin_unlock_irqrestore(&b->irq_lock, flags); } static inline bool __request_completed(const struct i915_request *rq) @@ -212,28 +214,6 @@ static void signal_irq_work(struct irq_work *work) intel_engine_breadcrumbs_irq(engine); } -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->irq_lock); - if (!b->irq_enabled++) - irq_enable(engine); - GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ - spin_unlock_irq(&b->irq_lock); -} - -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->irq_lock); - GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ - if (!--b->irq_enabled) - irq_disable(engine); - spin_unlock_irq(&b->irq_lock); -} - static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2c454f227c2e..f55691d151ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -53,12 +53,24 @@ int __intel_context_do_pin(struct intel_context *ce) if (likely(!atomic_read(&ce->pin_count))) { intel_wakeref_t wakeref; + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + err = ce->ops->alloc(ce); + if (unlikely(err)) + goto err; + + __set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + } + err = 0; with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) err = ce->ops->pin(ce); if (err) goto err; + GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", + ce->engine->name, ce->timeline->fence_context, + ce->ring->head, ce->ring->tail); + i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ smp_mb__before_atomic(); /* flush pin before it is visible */ @@ -85,6 +97,9 @@ void intel_context_unpin(struct intel_context *ce) mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); if (likely(atomic_dec_and_test(&ce->pin_count))) { + GEM_TRACE("%s context:%llx retire\n", + ce->engine->name, ce->timeline->fence_context); + ce->ops->unpin(ce); i915_gem_context_put(ce->gem_context); @@ -95,11 +110,15 @@ void intel_context_unpin(struct intel_context *ce) intel_context_put(ce); } -static int __context_pin_state(struct i915_vma *vma, unsigned long flags) +static int __context_pin_state(struct i915_vma *vma) { + u64 flags; int err; - err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL); + flags = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; + flags |= PIN_HIGH | PIN_GLOBAL; + + err = i915_vma_pin(vma, 0, 0, flags); if (err) return err; @@ -107,7 +126,7 @@ static int __context_pin_state(struct i915_vma *vma, unsigned long flags) * And mark it as a globally pinned object to let the shrinker know * it cannot reclaim the object until we release it. */ - vma->obj->pin_global++; + i915_vma_make_unshrinkable(vma); vma->obj->mm.dirty = true; return 0; @@ -115,60 +134,65 @@ static int __context_pin_state(struct i915_vma *vma, unsigned long flags) static void __context_unpin_state(struct i915_vma *vma) { - vma->obj->pin_global--; __i915_vma_unpin(vma); + i915_vma_make_shrinkable(vma); } -static void intel_context_retire(struct i915_active *active) +static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); + GEM_TRACE("%s context:%llx retire\n", + ce->engine->name, ce->timeline->fence_context); + if (ce->state) __context_unpin_state(ce->state); + intel_timeline_unpin(ce->timeline); + intel_ring_unpin(ce->ring); intel_context_put(ce); } -void -intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int __intel_context_active(struct i915_active *active) { - GEM_BUG_ON(!engine->cops); + struct intel_context *ce = container_of(active, typeof(*ce), active); + int err; - kref_init(&ce->ref); + intel_context_get(ce); - ce->gem_context = ctx; - ce->engine = engine; - ce->ops = engine->cops; - ce->sseu = engine->sseu; + err = intel_ring_pin(ce->ring); + if (err) + goto err_put; - INIT_LIST_HEAD(&ce->signal_link); - INIT_LIST_HEAD(&ce->signals); + err = intel_timeline_pin(ce->timeline); + if (err) + goto err_ring; - mutex_init(&ce->pin_mutex); + if (!ce->state) + return 0; - i915_active_init(ctx->i915, &ce->active, intel_context_retire); + err = __context_pin_state(ce->state); + if (err) + goto err_timeline; + + return 0; + +err_timeline: + intel_timeline_unpin(ce->timeline); +err_ring: + intel_ring_unpin(ce->ring); +err_put: + intel_context_put(ce); + return err; } -int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) +int intel_context_active_acquire(struct intel_context *ce) { int err; - if (!i915_active_acquire(&ce->active)) - return 0; - - intel_context_get(ce); - - if (!ce->state) - return 0; - - err = __context_pin_state(ce->state, flags); - if (err) { - i915_active_cancel(&ce->active); - intel_context_put(ce); + err = i915_active_acquire(&ce->active); + if (err) return err; - } /* Preallocate tracking nodes */ if (!i915_gem_context_is_kernel(ce->gem_context)) { @@ -190,6 +214,44 @@ void intel_context_active_release(struct intel_context *ce) i915_active_release(&ce->active); } +void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->cops); + + kref_init(&ce->ref); + + ce->gem_context = ctx; + ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm); + if (ctx->timeline) + ce->timeline = intel_timeline_get(ctx->timeline); + + ce->engine = engine; + ce->ops = engine->cops; + ce->sseu = engine->sseu; + ce->ring = __intel_context_ring_size(SZ_16K); + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); + + mutex_init(&ce->pin_mutex); + + i915_active_init(ctx->i915, &ce->active, + __intel_context_active, __intel_context_retire); +} + +void intel_context_fini(struct intel_context *ce) +{ + if (ce->timeline) + intel_timeline_put(ce->timeline); + i915_vm_put(ce->vm); + + mutex_destroy(&ce->pin_mutex); + i915_active_fini(&ce->active); +} + static void i915_global_context_shrink(void) { kmem_cache_shrink(global.slab_ce); @@ -218,13 +280,48 @@ int __init i915_global_context_init(void) void intel_context_enter_engine(struct intel_context *ce) { intel_engine_pm_get(ce->engine); + intel_timeline_enter(ce->timeline); } void intel_context_exit_engine(struct intel_context *ce) { + intel_timeline_exit(ce->timeline); intel_engine_pm_put(ce->engine); } +int intel_context_prepare_remote_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_timeline *tl = ce->timeline; + int err; + + /* Only suitable for use in remotely modifying this context */ + GEM_BUG_ON(rq->hw_context == ce); + + if (rq->timeline != tl) { /* beware timeline sharing */ + err = mutex_lock_interruptible_nested(&tl->mutex, + SINGLE_DEPTH_NESTING); + if (err) + return err; + + /* Queue this switch after current activity by this context. */ + err = i915_active_request_set(&tl->last_request, rq); + mutex_unlock(&tl->mutex); + if (err) + return err; + } + + /* + * Guarantee context image and the timeline remains pinned until the + * modifying request is retired by setting the ce activity tracker. + * + * But we only need to take one pin on the account of it. Or in other + * words transfer the pinned ce object to tracked active request. + */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + return i915_active_ref(&ce->active, rq->timeline, rq); +} + struct i915_request *intel_context_create_request(struct intel_context *ce) { struct i915_request *rq; @@ -239,3 +336,7 @@ struct i915_request *intel_context_create_request(struct intel_context *ce) return rq; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_context.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index a47275bc4f01..dd742ac2fbdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -9,12 +9,15 @@ #include <linux/lockdep.h> +#include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_timeline_types.h" void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); +void intel_context_fini(struct intel_context *ce); struct intel_context * intel_context_create(struct i915_gem_context *ctx, @@ -86,23 +89,26 @@ void intel_context_exit_engine(struct intel_context *ce); static inline void intel_context_enter(struct intel_context *ce) { + lockdep_assert_held(&ce->timeline->mutex); if (!ce->active_count++) ce->ops->enter(ce); } static inline void intel_context_mark_active(struct intel_context *ce) { + lockdep_assert_held(&ce->timeline->mutex); ++ce->active_count; } static inline void intel_context_exit(struct intel_context *ce) { + lockdep_assert_held(&ce->timeline->mutex); GEM_BUG_ON(!ce->active_count); if (!--ce->active_count) ce->ops->exit(ce); } -int intel_context_active_acquire(struct intel_context *ce, unsigned long flags); +int intel_context_active_acquire(struct intel_context *ce); void intel_context_active_release(struct intel_context *ce); static inline struct intel_context *intel_context_get(struct intel_context *ce) @@ -116,19 +122,34 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } -static inline int __must_check +static inline struct intel_timeline *__must_check intel_context_timeline_lock(struct intel_context *ce) - __acquires(&ce->ring->timeline->mutex) + __acquires(&ce->timeline->mutex) { - return mutex_lock_interruptible(&ce->ring->timeline->mutex); + struct intel_timeline *tl = ce->timeline; + int err; + + err = mutex_lock_interruptible(&tl->mutex); + if (err) + return ERR_PTR(err); + + return tl; } -static inline void intel_context_timeline_unlock(struct intel_context *ce) - __releases(&ce->ring->timeline->mutex) +static inline void intel_context_timeline_unlock(struct intel_timeline *tl) + __releases(&tl->mutex) { - mutex_unlock(&ce->ring->timeline->mutex); + mutex_unlock(&tl->mutex); } +int intel_context_prepare_remote_request(struct intel_context *ce, + struct i915_request *rq); + struct i915_request *intel_context_create_request(struct intel_context *ce); +static inline struct intel_ring *__intel_context_ring_size(u64 sz) +{ + return u64_to_ptr(struct intel_ring, sz); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 08049ee91cee..bf9cedfccbf0 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include "i915_active_types.h" +#include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" @@ -22,6 +23,8 @@ struct intel_context; struct intel_ring; struct intel_context_ops { + int (*alloc)(struct intel_context *ce); + int (*pin)(struct intel_context *ce); void (*unpin)(struct intel_context *ce); @@ -35,20 +38,28 @@ struct intel_context_ops { struct intel_context { struct kref ref; - struct i915_gem_context *gem_context; struct intel_engine_cs *engine; struct intel_engine_cs *inflight; +#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2) +#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) + + struct i915_address_space *vm; + struct i915_gem_context *gem_context; struct list_head signal_link; struct list_head signals; struct i915_vma *state; struct intel_ring *ring; + struct intel_timeline *timeline; + + unsigned long flags; +#define CONTEXT_ALLOC_BIT 0 u32 *lrc_reg_state; u64 lrc_desc; - unsigned int active_count; /* notionally protected by timeline->mutex */ + unsigned int active_count; /* protected by timeline->mutex */ atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 2f1c6871ee95..d3c6993f4f46 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -9,12 +9,11 @@ #include <linux/random.h> #include <linux/seqlock.h> -#include "i915_gem_batch_pool.h" #include "i915_pmu.h" #include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "intel_engine_types.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" @@ -51,7 +50,7 @@ struct drm_printer; #define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__) #define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__) #define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__) -#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__) +#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read_fw, __VA_ARGS__) #define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__) #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \ @@ -123,73 +122,23 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) return "unknown"; } -void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); - -static inline void -execlists_set_active(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - __set_bit(bit, (unsigned long *)&execlists->active); -} - -static inline bool -execlists_set_active_once(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); -} - -static inline void -execlists_clear_active(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - __clear_bit(bit, (unsigned long *)&execlists->active); -} - -static inline void -execlists_clear_all_active(struct intel_engine_execlists *execlists) -{ - execlists->active = 0; -} - -static inline bool -execlists_is_active(const struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return test_bit(bit, (unsigned long *)&execlists->active); -} - -void execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port); -void execlists_user_end(struct intel_engine_execlists *execlists); - -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); - -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { return execlists->port_mask + 1; } -static inline struct execlist_port * -execlists_port_complete(struct intel_engine_execlists * const execlists, - struct execlist_port * const port) +static inline struct i915_request * +execlists_active(const struct intel_engine_execlists *execlists) { - const unsigned int m = execlists->port_mask; - - GEM_BUG_ON(port_index(port, execlists) != 0); - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - - memmove(port, port + 1, m * sizeof(struct execlist_port)); - memset(port + m, 0, sizeof(struct execlist_port)); - - return port; + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); + return READ_ONCE(*execlists->active); } +struct i915_request * +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { @@ -244,9 +193,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, - int size); +intel_engine_create_ring(struct intel_engine_cs *engine, int size); int intel_ring_pin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); unsigned int intel_ring_update_space(struct intel_ring *ring); @@ -388,9 +335,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine); void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); @@ -456,8 +400,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } -static inline void intel_engine_reset(struct intel_engine_cs *engine, - bool stalled) +static inline void __intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) { if (engine->reset.reset) engine->reset.reset(engine, stalled); @@ -465,10 +409,9 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, } bool intel_engine_is_idle(struct intel_engine_cs *engine); -bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +bool intel_engines_are_idle(struct intel_gt *gt); -void intel_engines_reset_default_submission(struct drm_i915_private *i915); -unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); +void intel_engines_reset_default_submission(struct intel_gt *gt); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); @@ -477,9 +420,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -struct intel_engine_cs * -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); - static inline void intel_engine_context_in(struct intel_engine_cs *engine) { unsigned long flags; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7fd33e81c2d9..82630db0394b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,8 +28,12 @@ #include "i915_drv.h" +#include "gt/intel_gt.h" + #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_engine_pool.h" +#include "intel_engine_user.h" #include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" @@ -51,30 +55,6 @@ #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) -struct engine_class_info { - const char *name; - u8 uabi_class; -}; - -static const struct engine_class_info intel_engine_classes[] = { - [RENDER_CLASS] = { - .name = "rcs", - .uabi_class = I915_ENGINE_CLASS_RENDER, - }, - [COPY_ENGINE_CLASS] = { - .name = "bcs", - .uabi_class = I915_ENGINE_CLASS_COPY, - }, - [VIDEO_DECODE_CLASS] = { - .name = "vcs", - .uabi_class = I915_ENGINE_CLASS_VIDEO, - }, - [VIDEO_ENHANCEMENT_CLASS] = { - .name = "vecs", - .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, - }, -}; - #define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; @@ -184,6 +164,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) default: MISSING_CASE(INTEL_GEN(dev_priv)); return DEFAULT_LR_CONTEXT_RENDER_SIZE; + case 12: case 11: return GEN11_LR_CONTEXT_RENDER_SIZE; case 10: @@ -255,11 +236,16 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915, return bases[i].base; } -static void __sprint_engine_name(char *name, const struct engine_info *info) +static void __sprint_engine_name(struct intel_engine_cs *engine) { - WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", - intel_engine_classes[info->class].name, - info->instance) >= INTEL_ENGINE_CS_MAX_NAME); + /* + * Before we know what the uABI name for this engine will be, + * we still would like to keep track of this engine in the debug logs. + * We throw in a ' here as a reminder that this isn't its final name. + */ + GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u", + intel_engine_class_repr(engine->class), + engine->instance) >= sizeof(engine->name)); } void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) @@ -283,15 +269,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) intel_engine_set_hwsp_writemask(engine, ~0u); } -static int -intel_engine_setup(struct drm_i915_private *dev_priv, - enum intel_engine_id id) +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; struct intel_engine_cs *engine; - GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); - BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); @@ -301,10 +283,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv, if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) return -EINVAL; - if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance])) + if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance])) return -EINVAL; - GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) return -ENOMEM; @@ -313,13 +294,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->mask = BIT(id); - engine->i915 = dev_priv; - engine->uncore = &dev_priv->uncore; - __sprint_engine_name(engine->name, info); + engine->i915 = gt->i915; + engine->gt = gt; + engine->uncore = gt->uncore; engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); + engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases); + engine->class = info->class; engine->instance = info->instance; + __sprint_engine_name(engine); /* * To be overridden by the backend on setup. However to facilitate @@ -327,14 +310,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv, */ engine->destroy = (typeof(engine->destroy))kfree; - engine->uabi_class = intel_engine_classes[info->class].uabi_class; - - engine->context_size = intel_engine_context_size(dev_priv, + engine->context_size = intel_engine_context_size(gt->i915, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) - DRIVER_CAPS(dev_priv)->has_logical_contexts = true; + DRIVER_CAPS(gt->i915)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; @@ -346,8 +327,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Scrub mmio state on takeover */ intel_engine_sanitize_mmio(engine); - dev_priv->engine_class[info->class][info->instance] = engine; - dev_priv->engine[id] = engine; + gt->engine_class[info->class][info->instance] = engine; + + intel_engine_add_user(engine); + gt->i915->engine[id] = engine; + return 0; } @@ -423,14 +407,14 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) WARN_ON(engine_mask & GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); - if (i915_inject_load_failure()) + if (i915_inject_probe_failure(i915)) return -ENODEV; for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(i915, i); + err = intel_engine_setup(&i915->gt, i); if (err) goto cleanup; @@ -445,15 +429,9 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) if (WARN_ON(mask != engine_mask)) device_info->engine_mask = mask; - /* We always presume we have at least RCS available for later probing */ - if (WARN_ON(!HAS_ENGINE(i915, RCS0))) { - err = -ENODEV; - goto cleanup; - } - RUNTIME_INFO(i915)->num_engines = hweight32(mask); - i915_check_and_clear_faults(i915); + intel_gt_check_and_clear_faults(&i915->gt); intel_setup_engine_capabilities(i915); @@ -495,11 +473,6 @@ cleanup: return err; } -static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) -{ - i915_gem_batch_pool_init(&engine->batch_pool, engine); -} - void intel_engine_init_execlists(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -508,6 +481,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + memset(execlists->pending, 0, sizeof(execlists->pending)); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; } @@ -577,7 +554,7 @@ static int init_status_page(struct intel_engine_cs *engine) i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -621,14 +598,19 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); - intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_pool_init(&engine->pool); + /* Use the whole device by default */ engine->sseu = intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); + intel_engine_init_workarounds(engine); + intel_engine_init_whitelist(engine); + intel_engine_init_ctx_wa(engine); + return 0; } @@ -675,49 +657,9 @@ cleanup: return err; } -void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) -{ - static const struct { - u8 engine; - u8 sched; - } map[] = { -#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } - MAP(PREEMPTION, PREEMPTION), - MAP(SEMAPHORES, SEMAPHORES), -#undef MAP - }; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 enabled, disabled; - - enabled = 0; - disabled = 0; - for_each_engine(engine, i915, id) { /* all engines must agree! */ - int i; - - if (engine->schedule) - enabled |= (I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY); - else - disabled |= (I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY); - - for (i = 0; i < ARRAY_SIZE(map); i++) { - if (engine->flags & BIT(map[i].engine)) - enabled |= BIT(map[i].sched); - else - disabled |= BIT(map[i].sched); - } - } - - i915->caps.scheduler = enabled & ~disabled; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) - i915->caps.scheduler = 0; -} - struct measure_breadcrumb { struct i915_request rq; - struct i915_timeline timeline; + struct intel_timeline timeline; struct intel_ring ring; u32 cs[1024]; }; @@ -727,19 +669,17 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) struct measure_breadcrumb *frame; int dw = -ENOMEM; - GEM_BUG_ON(!engine->i915->gt.scratch); + GEM_BUG_ON(!engine->gt->scratch); frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) return -ENOMEM; - if (i915_timeline_init(engine->i915, - &frame->timeline, - engine->status_page.vma)) + if (intel_timeline_init(&frame->timeline, + engine->gt, + engine->status_page.vma)) goto out_frame; - INIT_LIST_HEAD(&frame->ring.request_list); - frame->ring.timeline = &frame->timeline; frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; @@ -750,42 +690,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.ring = &frame->ring; frame->rq.timeline = &frame->timeline; - dw = i915_timeline_pin(&frame->timeline); + dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ - i915_timeline_unpin(&frame->timeline); + intel_timeline_unpin(&frame->timeline); out_timeline: - i915_timeline_fini(&frame->timeline); + intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); return dw; } -static int pin_context(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context **out) -{ - struct intel_context *ce; - int err; - - ce = i915_gem_context_get_engine(ctx, engine->id); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - err = intel_context_pin(ce); - intel_context_put(ce); - if (err) - return err; - - *out = ce; - return 0; -} - void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) { @@ -807,6 +727,27 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) #endif } +static struct intel_context * +create_kernel_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + ce = intel_context_create(engine->i915->kernel_context, engine); + if (IS_ERR(ce)) + return ce; + + ce->ring = __intel_context_ring_size(SZ_4K); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return ERR_PTR(err); + } + + return ce; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -820,29 +761,24 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) */ int intel_engine_init_common(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; int ret; - /* We may need to do things with the shrinker which + engine->set_default_submission(engine); + + /* + * We may need to do things with the shrinker which * require us to immediately switch back to the default * context. This can cause a problem as pinning the * default context also requires GTT space which may not * be available. To avoid this we always pin the default * context. */ - ret = pin_context(i915->kernel_context, engine, - &engine->kernel_context); - if (ret) - return ret; + ce = create_kernel_context(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); - /* - * Similarly the preempt context must always be available so that - * we can interrupt the engine at any time. However, as preemption - * is optional, we allow it to fail. - */ - if (i915->preempt_context) - pin_context(i915->preempt_context, engine, - &engine->preempt_context); + engine->kernel_context = ce; ret = measure_breadcrumb_dw(engine); if (ret < 0) @@ -850,14 +786,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb_dw = ret; - engine->set_default_submission(engine); - return 0; err_unpin: - if (engine->preempt_context) - intel_context_unpin(engine->preempt_context); - intel_context_unpin(engine->kernel_context); + intel_context_unpin(ce); + intel_context_put(ce); return ret; } @@ -874,16 +807,15 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); + intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); if (engine->default_state) i915_gem_object_put(engine->default_state); - if (engine->preempt_context) - intel_context_unpin(engine->preempt_context); intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); intel_wa_list_free(&engine->ctx_wa_list); @@ -966,52 +898,23 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } } -u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - u32 mcr_s_ss_select; - u32 slice = fls(sseu->slice_mask); - u32 subslice = fls(sseu->subslice_mask[slice]); - - if (IS_GEN(dev_priv, 10)) - mcr_s_ss_select = GEN8_MCR_SLICE(slice) | - GEN8_MCR_SUBSLICE(subslice); - else if (INTEL_GEN(dev_priv) >= 11) - mcr_s_ss_select = GEN11_MCR_SLICE(slice) | - GEN11_MCR_SUBSLICE(subslice); - else - mcr_s_ss_select = 0; - - return mcr_s_ss_select; -} - static u32 read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, i915_reg_t reg) { struct drm_i915_private *i915 = engine->i915; struct intel_uncore *uncore = engine->uncore; - u32 mcr_slice_subslice_mask; - u32 mcr_slice_subslice_select; - u32 default_mcr_s_ss_select; - u32 mcr; - u32 ret; + u32 mcr_mask, mcr_ss, mcr, old_mcr, val; enum forcewake_domains fw_domains; if (INTEL_GEN(i915) >= 11) { - mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | - GEN11_MCR_SUBSLICE_MASK; - mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | - GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); } else { - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; - mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | - GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); } - default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(i915); - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(uncore, @@ -1021,26 +924,23 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, spin_lock_irq(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); - mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != - default_mcr_s_ss_select); - - mcr &= ~mcr_slice_subslice_mask; - mcr |= mcr_slice_subslice_select; + mcr &= ~mcr_mask; + mcr |= mcr_ss; intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - ret = intel_uncore_read_fw(uncore, reg); + val = intel_uncore_read_fw(uncore, reg); - mcr &= ~mcr_slice_subslice_mask; - mcr |= default_mcr_s_ss_select; + mcr &= ~mcr_mask; + mcr |= old_mcr & mcr_mask; intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irq(&uncore->lock); - return ret; + return val; } /* NB: please notice the memset */ @@ -1108,16 +1008,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, static bool ring_is_idle(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - intel_wakeref_t wakeref; bool idle = true; if (I915_SELFTEST_ONLY(!engine->mmio_base)) return true; - /* If the whole device is asleep, the engine must be idle */ - wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); - if (!wakeref) + if (!intel_engine_pm_get_if_awake(engine)) return true; /* First check that no commands are left in the ring */ @@ -1126,11 +1022,11 @@ static bool ring_is_idle(struct intel_engine_cs *engine) idle = false; /* No bit for gen2, so assume the CS parser is idle */ - if (INTEL_GEN(dev_priv) > 2 && + if (INTEL_GEN(engine->i915) > 2 && !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) idle = false; - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_engine_pm_put(engine); return idle; } @@ -1145,17 +1041,17 @@ static bool ring_is_idle(struct intel_engine_cs *engine) bool intel_engine_is_idle(struct intel_engine_cs *engine) { /* More white lies, if wedged, hw state is inconsistent */ - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) return true; - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_is_awake(engine)) return true; /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) { + if (execlists_active(&engine->execlists)) { struct tasklet_struct *t = &engine->execlists.tasklet; - synchronize_hardirq(engine->i915->drm.irq); + synchronize_hardirq(engine->i915->drm.pdev->irq); local_bh_disable(); if (tasklet_trylock(t)) { @@ -1169,7 +1065,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Otherwise flush the tasklet if it was on another cpu */ tasklet_unlock_wait(t); - if (READ_ONCE(engine->execlists.active)) + if (execlists_active(&engine->execlists)) return false; } @@ -1181,7 +1077,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return ring_is_idle(engine); } -bool intel_engines_are_idle(struct drm_i915_private *i915) +bool intel_engines_are_idle(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1190,14 +1086,14 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return true; /* Already parked (and passed an idleness test); must still be idle */ - if (!READ_ONCE(i915->gt.awake)) + if (!READ_ONCE(gt->awake)) return true; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (!intel_engine_is_idle(engine)) return false; } @@ -1205,12 +1101,12 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) return true; } -void intel_engines_reset_default_submission(struct drm_i915_private *i915) +void intel_engines_reset_default_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) engine->set_default_submission(engine); } @@ -1229,20 +1125,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int which; - - which = 0; - for_each_engine(engine, i915, id) - if (engine->default_state) - which |= BIT(engine->uabi_class); - - return which; -} - static int print_sched_attr(struct drm_i915_private *i915, const struct i915_sched_attr *attr, char *buf, int x, int len) @@ -1320,7 +1202,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned long flags; u64 addr; - if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) + if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", ENGINE_READ(engine, RING_START)); @@ -1367,6 +1249,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } if (HAS_EXECLISTS(dev_priv)) { + struct i915_request * const *port, *rq; const u32 *hws = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; const u8 num_entries = execlists->csb_size; @@ -1399,27 +1282,33 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } spin_lock_irqsave(&engine->active.lock, flags); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct i915_request *rq; - unsigned int count; + for (port = execlists->active; (rq = *port); port++) { + char hdr[80]; + int len; + + len = snprintf(hdr, sizeof(hdr), + "\t\tActive[%d: ", + (int)(port - execlists->active)); + if (!i915_request_signaled(rq)) + len += snprintf(hdr + len, sizeof(hdr) - len, + "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset, + hwsp_seqno(rq)); + snprintf(hdr + len, sizeof(hdr) - len, "rq: "); + print_request(m, rq, hdr); + } + for (port = execlists->pending; (rq = *port); port++) { char hdr[80]; - rq = port_unpack(&execlists->port[idx], &count); - if (!rq) { - drm_printf(m, "\t\tELSP[%d] idle\n", idx); - } else if (!i915_request_signaled(rq)) { - snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", - idx, count, - i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, - hwsp_seqno(rq)); - print_request(m, rq, hdr); - } else { - print_request(m, rq, "\t\tELSP[%d] rq: "); - } + snprintf(hdr, sizeof(hdr), + "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", + (int)(port - execlists->pending), + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset, + hwsp_seqno(rq)); + print_request(m, rq, hdr); } - drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); spin_unlock_irqrestore(&engine->active.lock, flags); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", @@ -1471,6 +1360,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; + unsigned long flags; if (header) { va_list ap; @@ -1480,7 +1370,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, va_end(ap); } - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); @@ -1490,10 +1380,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_engine_count(error, engine), i915_reset_count(error)); - rcu_read_lock(); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1513,9 +1402,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); } + spin_unlock_irqrestore(&engine->active.lock, flags); - rcu_read_unlock(); - + drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); if (wakeref) { intel_engine_print_registers(engine, m); @@ -1534,29 +1423,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static u8 user_class_map[] = { - [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, - [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, - [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, - [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, -}; - -struct intel_engine_cs * -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) -{ - if (class >= ARRAY_SIZE(user_class_map)) - return NULL; - - class = user_class_map[class]; - - GEM_BUG_ON(class > MAX_ENGINE_CLASS); - - if (instance > MAX_ENGINE_INSTANCE) - return NULL; - - return i915->engine_class[class][instance]; -} - /** * intel_enable_engine_stats() - Enable engine busy tracking on engine * @engine: engine to enable stats collection @@ -1583,15 +1449,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } if (engine->stats.enabled++ == 0) { - const struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); + struct i915_request * const *port; + struct i915_request *rq; engine->stats.enabled_at = ktime_get(); /* XXX submission method oblivious? */ - while (num_ports-- && port_isset(port)) { + for (port = execlists->active; (rq = *port); port++) engine->stats.active++; - port++; + + for (port = execlists->pending; (rq = *port); port++) { + /* Exclude any contexts already counted in active */ + if (!intel_context_inflight_count(rq->hw_context)) + engine->stats.active++; } if (engine->stats.active) @@ -1672,7 +1542,6 @@ struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; - unsigned long flags; /* * We are called by the error capture, reset and to dump engine @@ -1685,7 +1554,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->active.lock, flags); + lockdep_assert_held(&engine->active.lock); list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; @@ -1700,11 +1569,12 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->active.lock, flags); return active; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "mock_engine.c" +#include "selftest_engine.c" #include "selftest_engine_cs.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2ce00d3dc42a..65b5ca74b394 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -8,6 +8,8 @@ #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_engine_pool.h" +#include "intel_gt.h" #include "intel_gt_pm.h" static int __engine_unpark(struct intel_wakeref *wf) @@ -18,7 +20,7 @@ static int __engine_unpark(struct intel_wakeref *wf) GEM_TRACE("%s\n", engine->name); - intel_gt_pm_get(engine->i915); + intel_gt_pm_get(engine->gt); /* Pin the default state for fast resets from atomic context. */ map = NULL; @@ -35,38 +37,51 @@ static int __engine_unpark(struct intel_wakeref *wf) return 0; } -void intel_engine_pm_get(struct intel_engine_cs *engine) +#if IS_ENABLED(CONFIG_LOCKDEP) + +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) { - intel_wakeref_get(&engine->i915->runtime_pm, &engine->wakeref, __engine_unpark); + unsigned long flags; + + local_irq_save(flags); + mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); + + return flags; } -void intel_engine_park(struct intel_engine_cs *engine) +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { - /* - * We are committed now to parking this engine, make sure there - * will be no more interrupts arriving later and the engine - * is truly idle. - */ - if (wait_for(intel_engine_is_idle(engine), 10)) { - struct drm_printer p = drm_debug_printer(__func__); + mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_); + local_irq_restore(flags); +} - dev_err(engine->i915->drm.dev, - "%s is not idle before parking\n", - engine->name); - intel_engine_dump(engine, &p, NULL); - } +#else + +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +{ + return 0; +} + +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) +{ } +#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { struct i915_request *rq; + unsigned long flags; + bool result = true; /* Already inside the kernel context, safe to power down. */ if (engine->wakeref_serial == engine->serial) return true; /* GPU is pointing to the void, as good as in the kernel context. */ - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) return true; /* @@ -81,18 +96,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * retiring the last request, thus all rings should be empty and * all timelines idle. */ + flags = __timeline_mark_lock(engine->kernel_context); + rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ - return true; + goto out_unlock; + + intel_timeline_enter(rq->timeline); /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; + i915_request_add_active_barriers(rq); - i915_request_add_barriers(rq); + /* Install ourselves as a preemption barrier */ + rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; __i915_request_commit(rq); - return false; + /* Release our exclusive hold on the engine */ + __intel_wakeref_defer_park(&engine->wakeref); + __i915_request_queue(rq, NULL); + + result = false; +out_unlock: + __timeline_mark_unlock(engine->kernel_context, flags); + return result; } static int __engine_park(struct intel_wakeref *wf) @@ -115,6 +143,7 @@ static int __engine_park(struct intel_wakeref *wf) GEM_TRACE("%s\n", engine->name); intel_engine_disarm_breadcrumbs(engine); + intel_engine_pool_park(&engine->pool); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); @@ -129,40 +158,22 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->i915); + intel_gt_pm_put(engine->gt); return 0; } -void intel_engine_pm_put(struct intel_engine_cs *engine) -{ - intel_wakeref_put(&engine->i915->runtime_pm, &engine->wakeref, __engine_park); -} +static const struct intel_wakeref_ops wf_ops = { + .get = __engine_unpark, + .put = __engine_park, +}; void intel_engine_init__pm(struct intel_engine_cs *engine) { - intel_wakeref_init(&engine->wakeref); -} - -int intel_engines_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* kernel context lost */ - err = engine->resume(engine); - intel_engine_pm_put(engine); - if (err) { - dev_err(i915->drm.dev, - "Failed to restart %s (%d)\n", - engine->name, err); - break; - } - } - intel_gt_pm_put(i915); + struct intel_runtime_pm *rpm = &engine->i915->runtime_pm; - return err; + intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index b326cd993d60..739c50fefcef 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,16 +7,30 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H -struct drm_i915_private; -struct intel_engine_cs; +#include "intel_engine_types.h" +#include "intel_wakeref.h" -void intel_engine_pm_get(struct intel_engine_cs *engine); -void intel_engine_pm_put(struct intel_engine_cs *engine); +static inline bool +intel_engine_pm_is_awake(const struct intel_engine_cs *engine) +{ + return intel_wakeref_is_active(&engine->wakeref); +} -void intel_engine_park(struct intel_engine_cs *engine); +static inline void intel_engine_pm_get(struct intel_engine_cs *engine) +{ + intel_wakeref_get(&engine->wakeref); +} -void intel_engine_init__pm(struct intel_engine_cs *engine); +static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) +{ + return intel_wakeref_get_if_active(&engine->wakeref); +} + +static inline void intel_engine_pm_put(struct intel_engine_cs *engine) +{ + intel_wakeref_put(&engine->wakeref); +} -int intel_engines_resume(struct drm_i915_private *i915); +void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c new file mode 100644 index 000000000000..4cd54c569911 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -0,0 +1,177 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "gem/i915_gem_object.h" + +#include "i915_drv.h" +#include "intel_engine_pm.h" +#include "intel_engine_pool.h" + +static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool) +{ + return container_of(pool, struct intel_engine_cs, pool); +} + +static struct list_head * +bucket_for_size(struct intel_engine_pool *pool, size_t sz) +{ + int n; + + /* + * Compute a power-of-two bucket, but throw everything greater than + * 16KiB into the same bucket: i.e. the buckets hold objects of + * (1 page, 2 pages, 4 pages, 8+ pages). + */ + n = fls(sz >> PAGE_SHIFT) - 1; + if (n >= ARRAY_SIZE(pool->cache_list)) + n = ARRAY_SIZE(pool->cache_list) - 1; + + return &pool->cache_list[n]; +} + +static void node_free(struct intel_engine_pool_node *node) +{ + i915_gem_object_put(node->obj); + i915_active_fini(&node->active); + kfree(node); +} + +static int pool_active(struct i915_active *ref) +{ + struct intel_engine_pool_node *node = + container_of(ref, typeof(*node), active); + struct dma_resv *resv = node->obj->base.resv; + int err; + + if (dma_resv_trylock(resv)) { + dma_resv_add_excl_fence(resv, NULL); + dma_resv_unlock(resv); + } + + err = i915_gem_object_pin_pages(node->obj); + if (err) + return err; + + /* Hide this pinned object from the shrinker until retired */ + i915_gem_object_make_unshrinkable(node->obj); + + return 0; +} + +static void pool_retire(struct i915_active *ref) +{ + struct intel_engine_pool_node *node = + container_of(ref, typeof(*node), active); + struct intel_engine_pool *pool = node->pool; + struct list_head *list = bucket_for_size(pool, node->obj->base.size); + unsigned long flags; + + GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); + + i915_gem_object_unpin_pages(node->obj); + + /* Return this object to the shrinker pool */ + i915_gem_object_make_purgeable(node->obj); + + spin_lock_irqsave(&pool->lock, flags); + list_add(&node->link, list); + spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct intel_engine_pool_node * +node_create(struct intel_engine_pool *pool, size_t sz) +{ + struct intel_engine_cs *engine = to_engine(pool); + struct intel_engine_pool_node *node; + struct drm_i915_gem_object *obj; + + node = kmalloc(sizeof(*node), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (!node) + return ERR_PTR(-ENOMEM); + + node->pool = pool; + i915_active_init(engine->i915, &node->active, pool_active, pool_retire); + + obj = i915_gem_object_create_internal(engine->i915, sz); + if (IS_ERR(obj)) { + i915_active_fini(&node->active); + kfree(node); + return ERR_CAST(obj); + } + + node->obj = obj; + return node; +} + +struct intel_engine_pool_node * +intel_engine_pool_get(struct intel_engine_pool *pool, size_t size) +{ + struct intel_engine_pool_node *node; + struct list_head *list; + unsigned long flags; + int ret; + + GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); + + size = PAGE_ALIGN(size); + list = bucket_for_size(pool, size); + + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry(node, list, link) { + if (node->obj->base.size < size) + continue; + list_del(&node->link); + break; + } + spin_unlock_irqrestore(&pool->lock, flags); + + if (&node->link == list) { + node = node_create(pool, size); + if (IS_ERR(node)) + return node; + } + + ret = i915_active_acquire(&node->active); + if (ret) { + node_free(node); + return ERR_PTR(ret); + } + + return node; +} + +void intel_engine_pool_init(struct intel_engine_pool *pool) +{ + int n; + + spin_lock_init(&pool->lock); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) + INIT_LIST_HEAD(&pool->cache_list[n]); +} + +void intel_engine_pool_park(struct intel_engine_pool *pool) +{ + int n; + + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct list_head *list = &pool->cache_list[n]; + struct intel_engine_pool_node *node, *nn; + + list_for_each_entry_safe(node, nn, list, link) + node_free(node); + + INIT_LIST_HEAD(list); + } +} + +void intel_engine_pool_fini(struct intel_engine_pool *pool) +{ + int n; + + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) + GEM_BUG_ON(!list_empty(&pool->cache_list[n])); +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h new file mode 100644 index 000000000000..8d069efd9457 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_ENGINE_POOL_H +#define INTEL_ENGINE_POOL_H + +#include "intel_engine_pool_types.h" +#include "i915_active.h" +#include "i915_request.h" + +struct intel_engine_pool_node * +intel_engine_pool_get(struct intel_engine_pool *pool, size_t size); + +static inline int +intel_engine_pool_mark_active(struct intel_engine_pool_node *node, + struct i915_request *rq) +{ + return i915_active_ref(&node->active, rq->timeline, rq); +} + +static inline void +intel_engine_pool_put(struct intel_engine_pool_node *node) +{ + i915_active_release(&node->active); +} + +void intel_engine_pool_init(struct intel_engine_pool *pool); +void intel_engine_pool_park(struct intel_engine_pool *pool); +void intel_engine_pool_fini(struct intel_engine_pool *pool); + +#endif /* INTEL_ENGINE_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h new file mode 100644 index 000000000000..e31ee361b76f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_ENGINE_POOL_TYPES_H +#define INTEL_ENGINE_POOL_TYPES_H + +#include <linux/list.h> +#include <linux/spinlock.h> + +#include "i915_active_types.h" + +struct drm_i915_gem_object; + +struct intel_engine_pool { + spinlock_t lock; + struct list_head cache_list[4]; +}; + +struct intel_engine_pool_node { + struct i915_active active; + struct drm_i915_gem_object *obj; + struct list_head link; + struct intel_engine_pool *pool; +}; + +#endif /* INTEL_ENGINE_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 868b220214f8..a82cea95c2f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -12,18 +12,40 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/llist.h> +#include <linux/rbtree.h> +#include <linux/timer.h> #include <linux/types.h> #include "i915_gem.h" -#include "i915_gem_batch_pool.h" #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "i915_timeline_types.h" +#include "intel_engine_pool_types.h" #include "intel_sseu.h" +#include "intel_timeline_types.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" +/* Legacy HW Engine ID */ + +#define RCS0_HW 0 +#define VCS0_HW 1 +#define BCS0_HW 2 +#define VECS0_HW 3 +#define VCS1_HW 4 +#define VCS2_HW 6 +#define VCS3_HW 7 +#define VECS1_HW 12 + +/* Gen11+ HW Engine class + instance */ +#define RENDER_CLASS 0 +#define VIDEO_DECODE_CLASS 1 +#define VIDEO_ENHANCEMENT_CLASS 2 +#define COPY_ENGINE_CLASS 3 +#define OTHER_CLASS 4 +#define MAX_ENGINE_CLASS 4 +#define MAX_ENGINE_INSTANCE 3 + #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -35,6 +57,7 @@ struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; +struct intel_gt; struct intel_uncore; typedef u8 intel_engine_mask_t; @@ -66,9 +89,17 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct i915_timeline *timeline; - struct list_head request_list; - struct list_head active_link; + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; u32 head; u32 tail; @@ -138,6 +169,11 @@ struct intel_engine_execlists { struct tasklet_struct tasklet; /** + * @timer: kick the current context if its timeslice expires + */ + struct timer_list timer; + + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ struct i915_priolist default_priolist; @@ -160,51 +196,28 @@ struct intel_engine_execlists { */ u32 __iomem *ctrl_reg; +#define EXECLIST_MAX_PORTS 2 + /** + * @active: the currently known context executing on HW + */ + struct i915_request * const *active; /** - * @port: execlist port states + * @inflight: the set of contexts submitted and acknowleged by HW * - * For each hardware ELSP (ExecList Submission Port) we keep - * track of the last request and the number of times we submitted - * that port to hw. We then count the number of times the hw reports - * a context completion or preemption. As only one context can - * be active on hw, we limit resubmission of context to port[0]. This - * is called Lite Restore, of the context. + * The set of inflight contexts is managed by reading CS events + * from the HW. On a context-switch event (not preemption), we + * know the HW has transitioned from port0 to port1, and we + * advance our inflight/active tracking accordingly. */ - struct execlist_port { - /** - * @request_count: combined request and submission count - */ - struct i915_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, execlists) ((p) - (execlists)->port) - - /** - * @context_id: context ID for port - */ - GEM_DEBUG_DECL(u32 context_id); - -#define EXECLIST_MAX_PORTS 2 - } port[EXECLIST_MAX_PORTS]; - + struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; /** - * @active: is the HW active? We consider the HW as active after - * submitting any context for execution and until we have seen the - * last context completion event. After that, we do not expect any - * more events until we submit, and so can park the HW. + * @pending: the next set of contexts submitted to ELSP * - * As we have a small number of different sources from which we feed - * the HW, we track the state of each inside a single bitfield. + * We store the array of contexts that we submit to HW (via ELSP) and + * promote them to the inflight array once HW has signaled the + * preemption or idle-to-active event. */ - unsigned int active; -#define EXECLISTS_ACTIVE_USER 0 -#define EXECLISTS_ACTIVE_PREEMPT 1 -#define EXECLISTS_ACTIVE_HWACK 2 + struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; /** * @port_mask: number of execlist ports - 1 @@ -212,6 +225,16 @@ struct intel_engine_execlists { unsigned int port_mask; /** + * @switch_priority_hint: Second context priority. + * + * We submit multiple contexts to the HW simultaneously and would + * like to occasionally switch between them to emulate timeslicing. + * To know when timeslicing is suitable, we track the priority of + * the context submitted second. + */ + int switch_priority_hint; + + /** * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of @@ -246,11 +269,6 @@ struct intel_engine_execlists { u32 *csb_status; /** - * @preempt_complete_status: expected CSB upon completing preemption - */ - u32 preempt_complete_status; - - /** * @csb_size: context status buffer FIFO size */ u8 csb_size; @@ -267,26 +285,32 @@ struct intel_engine_execlists { struct intel_engine_cs { struct drm_i915_private *i915; + struct intel_gt *gt; struct intel_uncore *uncore; char name[INTEL_ENGINE_CS_MAX_NAME]; enum intel_engine_id id; + enum intel_engine_id legacy_idx; + unsigned int hw_id; unsigned int guc_id; - intel_engine_mask_t mask; - u8 uabi_class; + intel_engine_mask_t mask; u8 class; u8 instance; + + u8 uabi_class; + u8 uabi_instance; + u32 context_size; u32 mmio_base; u32 uabi_capabilities; - struct intel_sseu sseu; + struct rb_node uabi_node; - struct intel_ring *buffer; + struct intel_sseu sseu; struct { spinlock_t lock; @@ -296,7 +320,6 @@ struct intel_engine_cs { struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ - struct intel_context *preempt_context; /* pinned; optional */ intel_engine_mask_t saturated; /* submitting semaphores too late? */ @@ -307,6 +330,11 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; void *pinned_default_state; + struct { + struct intel_ring *ring; + struct intel_timeline *timeline; + } legacy; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -363,7 +391,7 @@ struct intel_engine_cs { * when the command parser is enabled. Prevents the client from * modifying the batch contents after software parsing. */ - struct i915_gem_batch_pool batch_pool; + struct intel_engine_pool pool; struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; @@ -392,7 +420,6 @@ struct intel_engine_cs { const struct intel_context_ops *cops; int (*request_alloc)(struct i915_request *rq); - int (*init_context)(struct i915_request *rq); int (*emit_flush)(struct i915_request *request, u32 mode); #define EMIT_INVALIDATE BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c new file mode 100644 index 000000000000..77cd5de83930 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -0,0 +1,303 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/list.h> +#include <linux/list_sort.h> +#include <linux/llist.h> + +#include "i915_drv.h" +#include "intel_engine.h" +#include "intel_engine_user.h" + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) +{ + struct rb_node *p = i915->uabi_engines.rb_node; + + while (p) { + struct intel_engine_cs *it = + rb_entry(p, typeof(*it), uabi_node); + + if (class < it->uabi_class) + p = p->rb_left; + else if (class > it->uabi_class || + instance > it->uabi_instance) + p = p->rb_right; + else if (instance < it->uabi_instance) + p = p->rb_left; + else + return it; + } + + return NULL; +} + +void intel_engine_add_user(struct intel_engine_cs *engine) +{ + llist_add((struct llist_node *)&engine->uabi_node, + (struct llist_head *)&engine->i915->uabi_engines); +} + +static const u8 uabi_classes[] = { + [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER, + [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, + [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, + [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, +}; + +static int engine_cmp(void *priv, struct list_head *A, struct list_head *B) +{ + const struct intel_engine_cs *a = + container_of((struct rb_node *)A, typeof(*a), uabi_node); + const struct intel_engine_cs *b = + container_of((struct rb_node *)B, typeof(*b), uabi_node); + + if (uabi_classes[a->class] < uabi_classes[b->class]) + return -1; + if (uabi_classes[a->class] > uabi_classes[b->class]) + return 1; + + if (a->instance < b->instance) + return -1; + if (a->instance > b->instance) + return 1; + + return 0; +} + +static struct llist_node *get_engines(struct drm_i915_private *i915) +{ + return llist_del_all((struct llist_head *)&i915->uabi_engines); +} + +static void sort_engines(struct drm_i915_private *i915, + struct list_head *engines) +{ + struct llist_node *pos, *next; + + llist_for_each_safe(pos, next, get_engines(i915)) { + struct intel_engine_cs *engine = + container_of((struct rb_node *)pos, typeof(*engine), + uabi_node); + list_add((struct list_head *)&engine->uabi_node, engines); + } + list_sort(NULL, engines, engine_cmp); +} + +static void set_scheduler_caps(struct drm_i915_private *i915) +{ + static const struct { + u8 engine; + u8 sched; + } map[] = { +#define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) } + MAP(HAS_PREEMPTION, PREEMPTION), + MAP(HAS_SEMAPHORES, SEMAPHORES), + MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS), +#undef MAP + }; + struct intel_engine_cs *engine; + u32 enabled, disabled; + + enabled = 0; + disabled = 0; + for_each_uabi_engine(engine, i915) { /* all engines must agree! */ + int i; + + if (engine->schedule) + enabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + else + disabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + + for (i = 0; i < ARRAY_SIZE(map); i++) { + if (engine->flags & BIT(map[i].engine)) + enabled |= BIT(map[i].sched); + else + disabled |= BIT(map[i].sched); + } + } + + i915->caps.scheduler = enabled & ~disabled; + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) + i915->caps.scheduler = 0; +} + +const char *intel_engine_class_repr(u8 class) +{ + static const char * const uabi_names[] = { + [RENDER_CLASS] = "rcs", + [COPY_ENGINE_CLASS] = "bcs", + [VIDEO_DECODE_CLASS] = "vcs", + [VIDEO_ENHANCEMENT_CLASS] = "vecs", + }; + + if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class]) + return "xxx"; + + return uabi_names[class]; +} + +struct legacy_ring { + struct intel_gt *gt; + u8 class; + u8 instance; +}; + +static int legacy_ring_idx(const struct legacy_ring *ring) +{ + static const struct { + u8 base, max; + } map[] = { + [RENDER_CLASS] = { RCS0, 1 }, + [COPY_ENGINE_CLASS] = { BCS0, 1 }, + [VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS }, + [VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS }, + }; + + if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) + return -1; + + if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max)) + return -1; + + return map[ring->class].base + ring->instance; +} + +static void add_legacy_ring(struct legacy_ring *ring, + struct intel_engine_cs *engine) +{ + int idx; + + if (engine->gt != ring->gt || engine->class != ring->class) { + ring->gt = engine->gt; + ring->class = engine->class; + ring->instance = 0; + } + + idx = legacy_ring_idx(ring); + if (unlikely(idx == -1)) + return; + + GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine)); + ring->gt->engine[idx] = engine; + ring->instance++; + + engine->legacy_idx = idx; +} + +void intel_engines_driver_register(struct drm_i915_private *i915) +{ + struct legacy_ring ring = {}; + u8 uabi_instances[4] = {}; + struct list_head *it, *next; + struct rb_node **p, *prev; + LIST_HEAD(engines); + + sort_engines(i915, &engines); + + prev = NULL; + p = &i915->uabi_engines.rb_node; + list_for_each_safe(it, next, &engines) { + struct intel_engine_cs *engine = + container_of((struct rb_node *)it, typeof(*engine), + uabi_node); + char old[sizeof(engine->name)]; + + GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); + engine->uabi_class = uabi_classes[engine->class]; + + GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances)); + engine->uabi_instance = uabi_instances[engine->uabi_class]++; + + /* Replace the internal name with the final user facing name */ + memcpy(old, engine->name, sizeof(engine->name)); + scnprintf(engine->name, sizeof(engine->name), "%s%u", + intel_engine_class_repr(engine->class), + engine->uabi_instance); + DRM_DEBUG_DRIVER("renamed %s to %s\n", old, engine->name); + + rb_link_node(&engine->uabi_node, prev, p); + rb_insert_color(&engine->uabi_node, &i915->uabi_engines); + + GEM_BUG_ON(intel_engine_lookup_user(i915, + engine->uabi_class, + engine->uabi_instance) != engine); + + /* Fix up the mapping to match default execbuf::user_map[] */ + add_legacy_ring(&ring, engine); + + prev = &engine->uabi_node; + p = &prev->rb_right; + } + + if (IS_ENABLED(CONFIG_DRM_I915_SELFTESTS) && + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + struct intel_engine_cs *engine; + unsigned int isolation; + int class, inst; + int errors = 0; + + for (class = 0; class < ARRAY_SIZE(uabi_instances); class++) { + for (inst = 0; inst < uabi_instances[class]; inst++) { + engine = intel_engine_lookup_user(i915, + class, inst); + if (!engine) { + pr_err("UABI engine not found for { class:%d, instance:%d }\n", + class, inst); + errors++; + continue; + } + + if (engine->uabi_class != class || + engine->uabi_instance != inst) { + pr_err("Wrong UABI engine:%s { class:%d, instance:%d } found for { class:%d, instance:%d }\n", + engine->name, + engine->uabi_class, + engine->uabi_instance, + class, inst); + errors++; + continue; + } + } + } + + /* + * Make sure that classes with multiple engine instances all + * share the same basic configuration. + */ + isolation = intel_engines_has_context_isolation(i915); + for_each_uabi_engine(engine, i915) { + unsigned int bit = BIT(engine->uabi_class); + unsigned int expected = engine->default_state ? bit : 0; + + if ((isolation & bit) != expected) { + pr_err("mismatching default context state for class %d on engine %s\n", + engine->uabi_class, engine->name); + errors++; + } + } + + if (WARN(errors, "Invalid UABI engine mapping found")) + i915->uabi_engines = RB_ROOT; + } + + set_scheduler_caps(i915); +} + +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + unsigned int which; + + which = 0; + for_each_uabi_engine(engine, i915) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h new file mode 100644 index 000000000000..f845ea1cbfaa --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_USER_H +#define INTEL_ENGINE_USER_H + +#include <linux/types.h> + +struct drm_i915_private; +struct intel_engine_cs; + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); + +void intel_engine_add_user(struct intel_engine_cs *engine); +void intel_engines_driver_register(struct drm_i915_private *i915); + +const char *intel_engine_class_repr(u8 class); + +#endif /* INTEL_ENGINE_USER_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index eec31e36aca7..86e00a2db8a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -8,6 +8,13 @@ #define _INTEL_GPU_COMMANDS_H_ /* + * Target address alignments required for GPU access e.g. + * MI_STORE_DWORD_IMM. + */ +#define alignof_dword 4 +#define alignof_qword 8 + +/* * Instruction field definitions used by the command parser */ #define INSTR_CLIENT_SHIFT 29 @@ -179,11 +186,12 @@ #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) -#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2)) #define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) +#define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22) +#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) +#define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22) +#define XY_MONO_SRC_COPY_IMM_BLT (2 << 29 | 0x71 << 22 | 5) #define BLT_WRITE_A (2<<20) #define BLT_WRITE_RGB (1<<20) #define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) @@ -200,6 +208,8 @@ #define DISPLAY_PLANE_A (0<<20) #define DISPLAY_PLANE_B (1<<20) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */ +#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */ #define PIPE_CONTROL_FLUSH_L3 (1<<27) #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (1<<23) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c new file mode 100644 index 000000000000..d48ec9a76ed1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_uncore.h" + +void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +{ + gt->i915 = i915; + gt->uncore = &i915->uncore; + + spin_lock_init(>->irq_lock); + + INIT_LIST_HEAD(>->closed_vma); + spin_lock_init(>->closed_lock); + + intel_gt_init_hangcheck(gt); + intel_gt_init_reset(gt); + intel_gt_pm_init_early(gt); + intel_uc_init_early(>->uc); +} + +void intel_gt_init_hw(struct drm_i915_private *i915) +{ + i915->gt.ggtt = &i915->ggtt; +} + +static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) +{ + intel_uncore_rmw(uncore, reg, 0, set); +} + +static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) +{ + intel_uncore_rmw(uncore, reg, clr, 0); +} + +static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) +{ + intel_uncore_rmw(uncore, reg, 0, 0); +} + +static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) +{ + GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); + GEN6_RING_FAULT_REG_POSTING_READ(engine); +} + +void +intel_gt_clear_error_registers(struct intel_gt *gt, + intel_engine_mask_t engine_mask) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 eir; + + if (!IS_GEN(i915, 2)) + clear_register(uncore, PGTBL_ER); + + if (INTEL_GEN(i915) < 4) + clear_register(uncore, IPEIR(RENDER_RING_BASE)); + else + clear_register(uncore, IPEIR_I965); + + clear_register(uncore, EIR); + eir = intel_uncore_read(uncore, EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + rmw_set(uncore, EMR, eir); + intel_uncore_write(uncore, GEN2_IIR, + I915_MASTER_ERROR_INTERRUPT); + } + + if (INTEL_GEN(i915) >= 12) { + rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); + } else if (INTEL_GEN(i915) >= 8) { + rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); + } else if (INTEL_GEN(i915) >= 6) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine_masked(engine, i915, engine_mask, id) + gen8_clear_engine_error_register(engine); + } +} + +static void gen6_check_faults(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 fault; + + for_each_engine(engine, gt->i915, id) { + fault = GEN6_RING_FAULT_REG_READ(engine); + if (fault & RING_FAULT_VALID) { + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08lx\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? + "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } + } +} + +static void gen8_check_faults(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; + u32 fault; + + if (INTEL_GEN(gt->i915) >= 12) { + fault_reg = GEN12_RING_FAULT_REG; + fault_data0_reg = GEN12_FAULT_TLB_DATA0; + fault_data1_reg = GEN12_FAULT_TLB_DATA1; + } else { + fault_reg = GEN8_RING_FAULT_REG; + fault_data0_reg = GEN8_FAULT_TLB_DATA0; + fault_data1_reg = GEN8_FAULT_TLB_DATA1; + } + + fault = intel_uncore_read(uncore, fault_reg); + if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = intel_uncore_read(uncore, fault_data0_reg); + fault_data1 = intel_uncore_read(uncore, fault_data1_reg); + + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), + lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } +} + +void intel_gt_check_and_clear_faults(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + /* From GEN8 onwards we only have one 'All Engine Fault Register' */ + if (INTEL_GEN(i915) >= 8) + gen8_check_faults(gt); + else if (INTEL_GEN(i915) >= 6) + gen6_check_faults(gt); + else + return; + + intel_gt_clear_error_registers(gt, ALL_ENGINES); +} + +void intel_gt_flush_ggtt_writes(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + intel_wakeref_t wakeref; + + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). + */ + + wmb(); + + if (INTEL_INFO(i915)->has_coherent_ggtt) + return; + + intel_gt_chipset_flush(gt); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + struct intel_uncore *uncore = gt->uncore; + + spin_lock_irq(&uncore->lock); + intel_uncore_posting_read_fw(uncore, + RING_HEAD(RENDER_RING_BASE)); + spin_unlock_irq(&uncore->lock); + } +} + +void intel_gt_chipset_flush(struct intel_gt *gt) +{ + wmb(); + if (INTEL_GEN(gt->i915) < 6) + intel_gtt_chipset_flush(); +} + +int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create_stolen(i915, size); + if (!obj) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + gt->scratch = i915_vma_make_unshrinkable(vma); + + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +void intel_gt_fini_scratch(struct intel_gt *gt) +{ + i915_vma_unpin_and_release(>->scratch, 0); +} + +void intel_gt_driver_late_release(struct intel_gt *gt) +{ + intel_uc_driver_late_release(>->uc); + intel_gt_fini_reset(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h new file mode 100644 index 000000000000..4920cb351f10 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_GT__ +#define __INTEL_GT__ + +#include "intel_engine_types.h" +#include "intel_gt_types.h" +#include "intel_reset.h" + +struct drm_i915_private; + +static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) +{ + return container_of(uc, struct intel_gt, uc); +} + +static inline struct intel_gt *guc_to_gt(struct intel_guc *guc) +{ + return container_of(guc, struct intel_gt, uc.guc); +} + +static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) +{ + return container_of(huc, struct intel_gt, uc.huc); +} + +void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); +void intel_gt_init_hw(struct drm_i915_private *i915); + +void intel_gt_driver_late_release(struct intel_gt *gt); + +void intel_gt_check_and_clear_faults(struct intel_gt *gt); +void intel_gt_clear_error_registers(struct intel_gt *gt, + intel_engine_mask_t engine_mask); + +void intel_gt_flush_ggtt_writes(struct intel_gt *gt); +void intel_gt_chipset_flush(struct intel_gt *gt); + +void intel_gt_init_hangcheck(struct intel_gt *gt); + +int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size); +void intel_gt_fini_scratch(struct intel_gt *gt); + +static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, + enum intel_gt_scratch_field field) +{ + return i915_ggtt_offset(gt->scratch) + field; +} + +static inline bool intel_gt_is_wedged(struct intel_gt *gt) +{ + return __intel_reset_failed(>->reset); +} + +void intel_gt_queue_hangcheck(struct intel_gt *gt); + +#endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c new file mode 100644 index 000000000000..34a4fb624bf7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -0,0 +1,455 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/sched/clock.h> + +#include "i915_drv.h" +#include "i915_irq.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_uncore.h" + +static void guc_irq_handler(struct intel_guc *guc, u16 iir) +{ + if (iir & GUC_INTR_GUC2HOST) + intel_guc_to_host_event_handler(guc); +} + +static void +cs_irq_handler(struct intel_engine_cs *engine, u32 iir) +{ + bool tasklet = false; + + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) + tasklet = true; + + if (iir & GT_RENDER_USER_INTERRUPT) { + intel_engine_breadcrumbs_irq(engine); + tasklet |= intel_engine_needs_breadcrumb_tasklet(engine); + } + + if (tasklet) + tasklet_hi_schedule(&engine->execlists.tasklet); +} + +static u32 +gen11_gt_engine_identity(struct intel_gt *gt, + const unsigned int bank, const unsigned int bit) +{ + void __iomem * const regs = gt->uncore->regs; + u32 timeout_ts; + u32 ident; + + lockdep_assert_held(>->irq_lock); + + raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank)); + } while (!(ident & GEN11_INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), + GEN11_INTR_DATA_VALID); + + return ident; +} + +static void +gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, + const u16 iir) +{ + if (instance == OTHER_GUC_INSTANCE) + return guc_irq_handler(>->uc.guc, iir); + + if (instance == OTHER_GTPM_INSTANCE) + return gen11_rps_irq_handler(gt, iir); + + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); +} + +static void +gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, + const u8 instance, const u16 iir) +{ + struct intel_engine_cs *engine; + + if (instance <= MAX_ENGINE_INSTANCE) + engine = gt->engine_class[class][instance]; + else + engine = NULL; + + if (likely(engine)) + return cs_irq_handler(engine, iir); + + WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", + class, instance); +} + +static void +gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity) +{ + const u8 class = GEN11_INTR_ENGINE_CLASS(identity); + const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity); + const u16 intr = GEN11_INTR_ENGINE_INTR(identity); + + if (unlikely(!intr)) + return; + + if (class <= COPY_ENGINE_CLASS) + return gen11_engine_irq_handler(gt, class, instance, intr); + + if (class == OTHER_CLASS) + return gen11_other_irq_handler(gt, instance, intr); + + WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n", + class, instance, intr); +} + +static void +gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank) +{ + void __iomem * const regs = gt->uncore->regs; + unsigned long intr_dw; + unsigned int bit; + + lockdep_assert_held(>->irq_lock); + + intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + + for_each_set_bit(bit, &intr_dw, 32) { + const u32 ident = gen11_gt_engine_identity(gt, bank, bit); + + gen11_gt_identity_handler(gt, ident); + } + + /* Clear must be after shared has been served for engine */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); +} + +void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl) +{ + unsigned int bank; + + spin_lock(>->irq_lock); + + for (bank = 0; bank < 2; bank++) { + if (master_ctl & GEN11_GT_DW_IRQ(bank)) + gen11_gt_bank_handler(gt, bank); + } + + spin_unlock(>->irq_lock); +} + +bool gen11_gt_reset_one_iir(struct intel_gt *gt, + const unsigned int bank, const unsigned int bit) +{ + void __iomem * const regs = gt->uncore->regs; + u32 dw; + + lockdep_assert_held(>->irq_lock); + + dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + if (dw & BIT(bit)) { + /* + * According to the BSpec, DW_IIR bits cannot be cleared without + * first servicing the Selector & Shared IIR registers. + */ + gen11_gt_engine_identity(gt, bank, bit); + + /* + * We locked GT INT DW by reading it. If we want to (try + * to) recover from this successfully, we need to clear + * our bit, otherwise we are locking the register for + * everybody. + */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit)); + + return true; + } + + return false; +} + +void gen11_gt_irq_reset(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + /* Disable RCS, BCS, VCS and VECS class engines. */ + intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); + + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); +} + +void gen11_gt_irq_postinstall(struct intel_gt *gt) +{ + const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + struct intel_uncore *uncore = gt->uncore; + const u32 dmask = irqs << 16 | irqs; + const u32 smask = irqs << 16; + + BUILD_BUG_ON(irqs & 0xffff0000); + + /* Enable RCS, BCS, VCS and VECS class interrupts. */ + intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask); + intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask); + + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ + intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); + intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask); + intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask); + intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask); + intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); + + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + gt->pm_ier = 0x0; + gt->pm_imr = ~gt->pm_ier; + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); + + /* Same thing for GuC interrupts */ + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); +} + +void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) +{ + if (gt_iir & GT_RENDER_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]); + if (gt_iir & ILK_BSD_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); +} + +static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) +{ + if (!HAS_L3_DPF(gt->i915)) + return; + + spin_lock(>->irq_lock); + gen5_gt_disable_irq(gt, GT_PARITY_ERROR(gt->i915)); + spin_unlock(>->irq_lock); + + if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1) + gt->i915->l3_parity.which_slice |= 1 << 1; + + if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) + gt->i915->l3_parity.which_slice |= 1 << 0; + + schedule_work(>->i915->l3_parity.error_work); +} + +void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) +{ + if (gt_iir & GT_RENDER_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]); + if (gt_iir & GT_BSD_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); + if (gt_iir & GT_BLT_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]); + + if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | + GT_BSD_CS_ERROR_INTERRUPT | + GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) + DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir); + + if (gt_iir & GT_PARITY_ERROR(gt->i915)) + gen7_parity_error_irq_handler(gt, gt_iir); +} + +void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) +{ + void __iomem * const regs = gt->uncore->regs; + + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0)); + if (likely(gt_iir[0])) + raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); + } + + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { + gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); + if (likely(gt_iir[1])) + raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); + } + + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { + gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2)); + if (likely(gt_iir[2])) + raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]); + } + + if (master_ctl & GEN8_GT_VECS_IRQ) { + gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3)); + if (likely(gt_iir[3])) + raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]); + } +} + +void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) +{ + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + cs_irq_handler(gt->engine_class[RENDER_CLASS][0], + gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); + cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0], + gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); + } + + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { + cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0], + gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT); + cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1], + gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); + } + + if (master_ctl & GEN8_GT_VECS_IRQ) { + cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0], + gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); + } + + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { + gen6_rps_irq_handler(gt->i915, gt_iir[2]); + guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); + } +} + +void gen8_gt_irq_reset(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + GEN8_IRQ_RESET_NDX(uncore, GT, 0); + GEN8_IRQ_RESET_NDX(uncore, GT, 1); + GEN8_IRQ_RESET_NDX(uncore, GT, 2); + GEN8_IRQ_RESET_NDX(uncore, GT, 3); +} + +void gen8_gt_irq_postinstall(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + /* These are interrupts we'll toggle with the ring mask register */ + u32 gt_interrupts[] = { + (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), + + (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), + + 0, + + (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) + }; + + gt->pm_ier = 0x0; + gt->pm_imr = ~gt->pm_ier; + GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]); + GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]); + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. Same wil be the case for GuC interrupts. + */ + GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier); + GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]); +} + +static void gen5_gt_update_irq(struct intel_gt *gt, + u32 interrupt_mask, + u32 enabled_irq_mask) +{ + lockdep_assert_held(>->irq_lock); + + GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask); + + gt->gt_imr &= ~interrupt_mask; + gt->gt_imr |= (~enabled_irq_mask & interrupt_mask); + intel_uncore_write(gt->uncore, GTIMR, gt->gt_imr); +} + +void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask) +{ + gen5_gt_update_irq(gt, mask, mask); + intel_uncore_posting_read_fw(gt->uncore, GTIMR); +} + +void gen5_gt_disable_irq(struct intel_gt *gt, u32 mask) +{ + gen5_gt_update_irq(gt, mask, 0); +} + +void gen5_gt_irq_reset(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + GEN3_IRQ_RESET(uncore, GT); + if (INTEL_GEN(gt->i915) >= 6) + GEN3_IRQ_RESET(uncore, GEN6_PM); +} + +void gen5_gt_irq_postinstall(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + u32 pm_irqs = 0; + u32 gt_irqs = 0; + + gt->gt_imr = ~0; + if (HAS_L3_DPF(gt->i915)) { + /* L3 parity interrupt is always unmasked. */ + gt->gt_imr = ~GT_PARITY_ERROR(gt->i915); + gt_irqs |= GT_PARITY_ERROR(gt->i915); + } + + gt_irqs |= GT_RENDER_USER_INTERRUPT; + if (IS_GEN(gt->i915, 5)) + gt_irqs |= ILK_BSD_USER_INTERRUPT; + else + gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; + + GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs); + + if (INTEL_GEN(gt->i915) >= 6) { + /* + * RPS interrupts will get enabled/disabled on demand when RPS + * itself is enabled/disabled. + */ + if (HAS_ENGINE(gt->i915, VECS0)) { + pm_irqs |= PM_VEBOX_USER_INTERRUPT; + gt->pm_ier |= PM_VEBOX_USER_INTERRUPT; + } + + gt->pm_imr = 0xffffffff; + GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs); + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h new file mode 100644 index 000000000000..8f37593712c9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_IRQ_H +#define INTEL_GT_IRQ_H + +#include <linux/types.h> + +struct intel_gt; + +#define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ + GEN8_GT_BCS_IRQ | \ + GEN8_GT_VCS0_IRQ | \ + GEN8_GT_VCS1_IRQ | \ + GEN8_GT_VECS_IRQ | \ + GEN8_GT_PM_IRQ | \ + GEN8_GT_GUC_IRQ) + +void gen11_gt_irq_reset(struct intel_gt *gt); +void gen11_gt_irq_postinstall(struct intel_gt *gt); +void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl); + +bool gen11_gt_reset_one_iir(struct intel_gt *gt, + const unsigned int bank, + const unsigned int bit); + +void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir); + +void gen5_gt_irq_postinstall(struct intel_gt *gt); +void gen5_gt_irq_reset(struct intel_gt *gt); +void gen5_gt_disable_irq(struct intel_gt *gt, u32 mask); +void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask); + +void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir); + +void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]); +void gen8_gt_irq_reset(struct intel_gt *gt); +void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]); +void gen8_gt_irq_postinstall(struct intel_gt *gt); + +#endif /* INTEL_GT_IRQ_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7b5967751762..1363e069ec83 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,9 @@ */ #include "i915_drv.h" +#include "i915_params.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -14,10 +17,10 @@ static void pm_notify(struct drm_i915_private *i915, int state) blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); } -static int intel_gt_unpark(struct intel_wakeref *wf) +static int __gt_unpark(struct intel_wakeref *wf) { - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + struct drm_i915_private *i915 = gt->i915; GEM_TRACE("\n"); @@ -32,8 +35,8 @@ static int intel_gt_unpark(struct intel_wakeref *wf) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); + gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!gt->awake); intel_enable_gt_powersave(i915); @@ -43,19 +46,14 @@ static int intel_gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); - i915_queue_hangcheck(i915); + intel_gt_queue_hangcheck(gt); pm_notify(i915, INTEL_GT_UNPARK); return 0; } -void intel_gt_pm_get(struct drm_i915_private *i915) -{ - intel_wakeref_get(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_unpark); -} - -static int intel_gt_park(struct intel_wakeref *wf) +static int __gt_park(struct intel_wakeref *wf) { struct drm_i915_private *i915 = container_of(wf, typeof(*i915), gt.wakeref); @@ -69,34 +67,39 @@ static int intel_gt_park(struct intel_wakeref *wf) if (INTEL_GEN(i915) >= 6) gen6_rps_idle(i915); + /* Everything switched off, flush any residual interrupt just in case */ + intel_synchronize_irq(i915); + GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); return 0; } -void intel_gt_pm_put(struct drm_i915_private *i915) -{ - intel_wakeref_put(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_park); -} +static const struct intel_wakeref_ops wf_ops = { + .get = __gt_unpark, + .put = __gt_park, + .flags = INTEL_WAKEREF_PUT_ASYNC, +}; -void intel_gt_pm_init(struct drm_i915_private *i915) +void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_init(&i915->gt.wakeref); - BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications); + intel_wakeref_init(>->wakeref, >->i915->runtime_pm, &wf_ops); + + BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } -static bool reset_engines(struct drm_i915_private *i915) +static bool reset_engines(struct intel_gt *gt) { - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) return false; - return intel_gpu_reset(i915, ALL_ENGINES) == 0; + return __intel_gt_reset(gt, ALL_ENGINES) == 0; } /** * intel_gt_sanitize: called after the GPU has lost power - * @i915: the i915 device + * @gt: the i915 GT container * @force: ignore a failed reset and sanitize engine state anyway * * Anytime we reset the GPU, either with an explicit GPU reset or through a @@ -104,24 +107,27 @@ static bool reset_engines(struct drm_i915_private *i915) * to match. Note that calling intel_gt_sanitize() if the GPU has not * been reset results in much confusion! */ -void intel_gt_sanitize(struct drm_i915_private *i915, bool force) +void intel_gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; GEM_TRACE("\n"); - if (!reset_engines(i915) && !force) + intel_uc_sanitize(>->uc); + + if (!reset_engines(gt) && !force) return; - for_each_engine(engine, i915, id) - intel_engine_reset(engine, false); + for_each_engine(engine, gt->i915, id) + __intel_engine_reset(engine, false); } -void intel_gt_resume(struct drm_i915_private *i915) +int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* * After resume, we may need to poke into the pinned kernel @@ -129,15 +135,40 @@ void intel_gt_resume(struct drm_i915_private *i915) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ - for_each_engine(engine, i915, id) { + intel_gt_pm_get(gt); + for_each_engine(engine, gt->i915, id) { struct intel_context *ce; + intel_engine_pm_get(engine); + ce = engine->kernel_context; if (ce) ce->ops->reset(ce); - ce = engine->preempt_context; - if (ce) - ce->ops->reset(ce); + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + + intel_engine_pm_put(engine); + if (err) { + dev_err(gt->i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } } + intel_gt_pm_put(gt); + + return err; +} + +void intel_gt_runtime_suspend(struct intel_gt *gt) +{ + intel_uc_runtime_suspend(>->uc); +} + +int intel_gt_runtime_resume(struct intel_gt *gt) +{ + intel_gt_init_swizzling(gt); + + return intel_uc_runtime_resume(>->uc); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 7dd1130a19a4..fb39d99cd6ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -9,19 +9,44 @@ #include <linux/types.h> -struct drm_i915_private; +#include "intel_gt_types.h" +#include "intel_wakeref.h" enum { INTEL_GT_UNPARK, INTEL_GT_PARK, }; -void intel_gt_pm_get(struct drm_i915_private *i915); -void intel_gt_pm_put(struct drm_i915_private *i915); - -void intel_gt_pm_init(struct drm_i915_private *i915); - -void intel_gt_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); +static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) +{ + return intel_wakeref_is_active(>->wakeref); +} + +static inline void intel_gt_pm_get(struct intel_gt *gt) +{ + intel_wakeref_get(>->wakeref); +} + +static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) +{ + return intel_wakeref_get_if_active(>->wakeref); +} + +static inline void intel_gt_pm_put(struct intel_gt *gt) +{ + intel_wakeref_put(>->wakeref); +} + +static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) +{ + return intel_wakeref_wait_for_idle(>->wakeref); +} + +void intel_gt_pm_init_early(struct intel_gt *gt); + +void intel_gt_sanitize(struct intel_gt *gt, bool force); +int intel_gt_resume(struct intel_gt *gt); +void intel_gt_runtime_suspend(struct intel_gt *gt); +int intel_gt_runtime_resume(struct intel_gt *gt); #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c new file mode 100644 index 000000000000..babe866126d7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_reg.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" + +static void write_pm_imr(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 mask = gt->pm_imr; + i915_reg_t reg; + + if (INTEL_GEN(i915) >= 11) { + reg = GEN11_GPM_WGBOXPERF_INTR_MASK; + mask <<= 16; /* pm is in upper half */ + } else if (INTEL_GEN(i915) >= 8) { + reg = GEN8_GT_IMR(2); + } else { + reg = GEN6_PMIMR; + } + + intel_uncore_write(uncore, reg, mask); +} + +static void gen6_gt_pm_update_irq(struct intel_gt *gt, + u32 interrupt_mask, + u32 enabled_irq_mask) +{ + u32 new_val; + + WARN_ON(enabled_irq_mask & ~interrupt_mask); + + lockdep_assert_held(>->irq_lock); + + new_val = gt->pm_imr; + new_val &= ~interrupt_mask; + new_val |= ~enabled_irq_mask & interrupt_mask; + + if (new_val != gt->pm_imr) { + gt->pm_imr = new_val; + write_pm_imr(gt); + } +} + +void gen6_gt_pm_unmask_irq(struct intel_gt *gt, u32 mask) +{ + gen6_gt_pm_update_irq(gt, mask, mask); +} + +void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask) +{ + gen6_gt_pm_update_irq(gt, mask, 0); +} + +void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask) +{ + struct intel_uncore *uncore = gt->uncore; + i915_reg_t reg = INTEL_GEN(gt->i915) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; + + lockdep_assert_held(>->irq_lock); + + intel_uncore_write(uncore, reg, reset_mask); + intel_uncore_write(uncore, reg, reset_mask); + intel_uncore_posting_read(uncore, reg); +} + +static void write_pm_ier(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 mask = gt->pm_ier; + i915_reg_t reg; + + if (INTEL_GEN(i915) >= 11) { + reg = GEN11_GPM_WGBOXPERF_INTR_ENABLE; + mask <<= 16; /* pm is in upper half */ + } else if (INTEL_GEN(i915) >= 8) { + reg = GEN8_GT_IER(2); + } else { + reg = GEN6_PMIER; + } + + intel_uncore_write(uncore, reg, mask); +} + +void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask) +{ + lockdep_assert_held(>->irq_lock); + + gt->pm_ier |= enable_mask; + write_pm_ier(gt); + gen6_gt_pm_unmask_irq(gt, enable_mask); +} + +void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask) +{ + lockdep_assert_held(>->irq_lock); + + gt->pm_ier &= ~disable_mask; + gen6_gt_pm_mask_irq(gt, disable_mask); + write_pm_ier(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h new file mode 100644 index 000000000000..b29816a04809 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_IRQ_H +#define INTEL_GT_PM_IRQ_H + +#include <linux/types.h> + +struct intel_gt; + +void gen6_gt_pm_unmask_irq(struct intel_gt *gt, u32 mask); +void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask); + +void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask); +void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask); + +void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask); + +#endif /* INTEL_GT_PM_IRQ_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h new file mode 100644 index 000000000000..dc295c196d11 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_GT_TYPES__ +#define __INTEL_GT_TYPES__ + +#include <linux/ktime.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include "uc/intel_uc.h" + +#include "i915_vma.h" +#include "intel_engine_types.h" +#include "intel_reset_types.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct i915_ggtt; +struct intel_engine_cs; +struct intel_uncore; + +struct intel_hangcheck { + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) + + struct delayed_work work; +}; + +struct intel_gt { + struct drm_i915_private *i915; + struct intel_uncore *uncore; + struct i915_ggtt *ggtt; + + struct intel_uc uc; + + struct intel_gt_timelines { + spinlock_t lock; /* protects active_list */ + struct list_head active_list; + + /* Pack multiple timelines' seqnos into the same page */ + spinlock_t hwsp_lock; + struct list_head hwsp_free_list; + } timelines; + + struct intel_wakeref wakeref; + + struct list_head closed_vma; + spinlock_t closed_lock; /* guards the list of closed_vma */ + + struct intel_hangcheck hangcheck; + struct intel_reset reset; + + /** + * Is the GPU currently considered idle, or busy executing + * userspace requests? Whilst idle, we allow runtime power + * management to power down the hardware and display clocks. + * In order to reduce the effect on performance, there + * is a slight delay before we do so. + */ + intel_wakeref_t awake; + + struct blocking_notifier_head pm_notifications; + + ktime_t last_init_time; + + struct i915_vma *scratch; + + spinlock_t irq_lock; + u32 gt_imr; + u32 pm_ier; + u32 pm_imr; + + u32 pm_guc_events; + + struct intel_engine_cs *engine[I915_NUM_ENGINES]; + struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] + [MAX_ENGINE_INSTANCE + 1]; +}; + +enum intel_gt_scratch_field { + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, + + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128, + + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, + + /* 8 bytes */ + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, + +}; + +#endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 6bcfa6456c45..05d042cdefe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -22,8 +22,10 @@ * */ -#include "intel_reset.h" #include "i915_drv.h" +#include "intel_engine.h" +#include "intel_gt.h" +#include "intel_reset.h" struct hangcheck { u64 acthd; @@ -57,9 +59,6 @@ static bool subunits_stuck(struct intel_engine_cs *engine) int slice; int subslice; - if (engine->id != RCS0) - return true; - intel_engine_get_instdone(engine, &instdone); /* There might be unstable subunit states even when @@ -103,7 +102,6 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd) static enum intel_engine_hangcheck_action engine_stuck(struct intel_engine_cs *engine, u64 acthd) { - struct drm_i915_private *dev_priv = engine->i915; enum intel_engine_hangcheck_action ha; u32 tmp; @@ -111,7 +109,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) if (ha != ENGINE_DEAD) return ha; - if (IS_GEN(dev_priv, 2)) + if (IS_GEN(engine->i915, 2)) return ENGINE_DEAD; /* Is the chip hanging on a WAIT_FOR_EVENT? @@ -121,8 +119,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = ENGINE_READ(engine, RING_CTL); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, engine->mask, 0, - "stuck wait on %s", engine->name); + intel_gt_handle_error(engine->gt, engine->mask, 0, + "stuck wait on %s", engine->name); ENGINE_WRITE(engine, RING_CTL, tmp); return ENGINE_WAIT_KICK; } @@ -222,7 +220,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, I915_ENGINE_WEDGED_TIMEOUT); } -static void hangcheck_declare_hang(struct drm_i915_private *i915, +static void hangcheck_declare_hang(struct intel_gt *gt, intel_engine_mask_t hung, intel_engine_mask_t stuck) { @@ -238,12 +236,12 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, hung &= ~stuck; len = scnprintf(msg, sizeof(msg), "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, i915, hung, tmp) + for_each_engine_masked(engine, gt->i915, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); + return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg); } /* @@ -254,11 +252,10 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, * we kick the ring. If we see no progress on three subsequent calls * we assume chip is wedged and try to fix it by resetting the chip. */ -static void i915_hangcheck_elapsed(struct work_struct *work) +static void hangcheck_elapsed(struct work_struct *work) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), - gpu_error.hangcheck_work.work); + struct intel_gt *gt = + container_of(work, typeof(*gt), hangcheck.work.work); intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -267,13 +264,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!i915_modparams.enable_hangcheck) return; - if (!READ_ONCE(dev_priv->gt.awake)) + if (!READ_ONCE(gt->awake)) return; - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(gt)) return; - wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); if (!wakeref) return; @@ -281,9 +278,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) * periodically arm the mmio checker to see if we are triggering * any invalid access. */ - intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); + intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, gt->i915, id) { struct hangcheck hc; intel_engine_signal_breadcrumbs(engine); @@ -305,7 +302,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (GEM_SHOW_DEBUG() && (hung | stuck)) { struct drm_printer p = drm_debug_printer("hangcheck"); - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, gt->i915, id) { if (intel_engine_is_idle(engine)) continue; @@ -314,20 +311,37 @@ static void i915_hangcheck_elapsed(struct work_struct *work) } if (wedged) { - dev_err(dev_priv->drm.dev, + dev_err(gt->i915->drm.dev, "GPU recovery timed out," " cancelling all in-flight rendering.\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(gt); } if (hung) - hangcheck_declare_hang(dev_priv, hung, stuck); + hangcheck_declare_hang(gt, hung, stuck); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); /* Reset timer in case GPU hangs without another request being added */ - i915_queue_hangcheck(dev_priv); + intel_gt_queue_hangcheck(gt); +} + +void intel_gt_queue_hangcheck(struct intel_gt *gt) +{ + unsigned long delay; + + if (unlikely(!i915_modparams.enable_hangcheck)) + return; + + /* + * Don't continually defer the hangcheck so that it is always run at + * least once after work has been scheduled on any ring. Otherwise, + * we will ignore a hung ring if a second ring is kept busy. + */ + + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + queue_delayed_work(system_long_wq, >->hangcheck.work, delay); } void intel_engine_init_hangcheck(struct intel_engine_cs *engine) @@ -336,10 +350,9 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine) engine->hangcheck.action_timestamp = jiffies; } -void intel_hangcheck_init(struct drm_i915_private *i915) +void intel_gt_init_hangcheck(struct intel_gt *gt) { - INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, - i915_hangcheck_elapsed); + INIT_DELAYED_WORK(>->hangcheck.work, hangcheck_elapsed); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b42b5f158295..d42584439f51 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -136,9 +136,12 @@ #include "gem/i915_gem_context.h" #include "i915_drv.h" -#include "i915_gem_render_state.h" +#include "i915_perf.h" +#include "i915_trace.h" #include "i915_vgpu.h" #include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -161,6 +164,15 @@ #define GEN8_CTX_STATUS_COMPLETED_MASK \ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ +#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) +#define GEN12_IDLE_CTX_ID 0x7FF +#define GEN12_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 @@ -214,13 +226,34 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); + static void execlists_init_reg_state(u32 *reg_state, struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + +static inline void +ring_set_paused(const struct intel_engine_cs *engine, int state) +{ + /* + * We inspect HWS_PREEMPT with a semaphore inside + * engine->emit_fini_breadcrumb. If the dword is true, + * the ring is paused as the semaphore will busywait + * until the dword is false. + */ + engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; + if (state) + wmb(); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -236,6 +269,17 @@ static int effective_prio(const struct i915_request *rq) int prio = rq_prio(rq); /* + * If this request is special and must not be interrupted at any + * cost, so be it. Note we are only checking the most recent request + * in the context and so may be masking an earlier vip request. It + * is hoped that under the conditions where nopreempt is used, this + * will not matter (i.e. all requests to that context will be + * nopreempt for as long as desired). + */ + if (i915_request_has_nopreempt(rq)) + prio = I915_PRIORITY_UNPREEMPTABLE; + + /* * On unwinding the active request, we give it a priority bump * if it has completed waiting on any semaphore. If we know that * the request has already started, we can prevent an unwanted @@ -245,6 +289,7 @@ static int effective_prio(const struct i915_request *rq) prio |= I915_PRIORITY_NOSEMAPHORE; /* Restrict mere WAIT boosts from triggering preemption */ + BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ return prio | __NO_PREEMPTION; } @@ -271,10 +316,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, { int last_prio; - if (!engine->preempt_context) - return false; - - if (i915_request_completed(rq)) + if (!intel_engine_has_semaphores(engine)) return false; /* @@ -338,9 +380,6 @@ __maybe_unused static inline bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { - const struct intel_engine_execlists *execlists = - &prev->engine->execlists; - /* * Without preemption, the prev may refer to the still active element * which we refuse to let go. @@ -348,7 +387,7 @@ assert_priority_queue(const struct i915_request *prev, * Even with preemption, there are times when we think it is better not * to preempt and leave an ostensibly lower priority request in flight. */ - if (port_request(execlists->port) == prev) + if (i915_request_is_active(prev)) return true; return rq_prio(prev) >= rq_prio(next); @@ -389,13 +428,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - desc = ctx->desc_template; /* bits 0-11 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + desc = INTEL_LEGACY_32B_CONTEXT; + if (i915_vm_is_4lvl(ce->vm)) + desc = INTEL_LEGACY_64B_CONTEXT; + desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; + + desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (IS_GEN(engine->i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); - /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing @@ -442,13 +485,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct intel_engine_cs *owner; if (i915_request_completed(rq)) - break; + continue; /* XXX */ __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->hw_context->inflight); - /* * Push the request back into the queue for later resubmission. * If this request is not native to this physical engine (i.e. @@ -468,6 +509,19 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + /* + * Decouple the virtual breadcrumb before moving it + * back to the virtual engine -- we don't want the + * request to complete in the background and try + * and cancel the breadcrumb on the virtual engine + * (instead of the old engine where it is linked)! + */ + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) { + spin_lock(&rq->lock); + i915_request_cancel_breadcrumb(rq); + spin_unlock(&rq->lock); + } rq->engine = owner; owner->submit_request(rq); active = NULL; @@ -500,32 +554,45 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -inline void -execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port) +static inline struct intel_engine_cs * +__execlists_schedule_in(struct i915_request *rq) { - execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); -} + struct intel_engine_cs * const engine = rq->engine; + struct intel_context * const ce = rq->hw_context; -inline void -execlists_user_end(struct intel_engine_execlists *execlists) -{ - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); + intel_context_get(ce); + + intel_gt_pm_get(engine->gt); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(engine); + + return engine; } -static inline void -execlists_context_schedule_in(struct i915_request *rq) +static inline struct i915_request * +execlists_schedule_in(struct i915_request *rq, int idx) { - GEM_BUG_ON(rq->hw_context->inflight); + struct intel_context * const ce = rq->hw_context; + struct intel_engine_cs *old; - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(rq->engine); - rq->hw_context->inflight = rq->engine; + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); + trace_i915_request_in(rq, idx); + + old = READ_ONCE(ce->inflight); + do { + if (!old) { + WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); + break; + } + } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); + + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + return i915_request_get(rq); } -static void kick_siblings(struct i915_request *rq) +static void kick_siblings(struct i915_request *rq, struct intel_context *ce) { - struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine); + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); if (next && next->execution_mask & ~rq->execution_mask) @@ -533,29 +600,53 @@ static void kick_siblings(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq, unsigned long status) +__execlists_schedule_out(struct i915_request *rq, + struct intel_engine_cs * const engine) { - rq->hw_context->inflight = NULL; - intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, status); - trace_i915_request_out(rq); + struct intel_context * const ce = rq->hw_context; + + intel_engine_context_out(engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + intel_gt_pm_put(engine->gt); /* - * If this is part of a virtual engine, its next request may have - * been blocked waiting for access to the active context. We have - * to kick all the siblings again in case we need to switch (e.g. - * the next request is not runnable on this engine). Hopefully, - * we will already have submitted the next request before the - * tasklet runs and do not need to rebuild each virtual tree - * and kick everyone again. + * If this is part of a virtual engine, its next request may + * have been blocked waiting for access to the active context. + * We have to kick all the siblings again in case we need to + * switch (e.g. the next request is not runnable on this + * engine). Hopefully, we will already have submitted the next + * request before the tasklet runs and do not need to rebuild + * each virtual tree and kick everyone again. */ - if (rq->engine != rq->hw_context->engine) - kick_siblings(rq); + if (ce->engine != engine) + kick_siblings(rq, ce); + + intel_context_put(ce); +} + +static inline void +execlists_schedule_out(struct i915_request *rq) +{ + struct intel_context * const ce = rq->hw_context; + struct intel_engine_cs *cur, *old; + + trace_i915_request_out(rq); + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + + old = READ_ONCE(ce->inflight); + do + cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; + while (!try_cmpxchg(&ce->inflight, &old, cur)); + if (!cur) + __execlists_schedule_out(rq, old); + + i915_request_put(rq); } -static u64 execlists_update_context(struct i915_request *rq) +static u64 execlists_update_context(const struct i915_request *rq) { struct intel_context *ce = rq->hw_context; + u64 desc; ce->lrc_reg_state[CTX_RING_TAIL + 1] = intel_ring_set_tail(rq->ring, rq->tail); @@ -576,7 +667,11 @@ static u64 execlists_update_context(struct i915_request *rq) * wmb). */ mb(); - return ce->lrc_desc; + + desc = ce->lrc_desc; + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + + return desc; } static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) @@ -590,12 +685,65 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc } } +static __maybe_unused void +trace_ports(const struct intel_engine_execlists *execlists, + const char *msg, + struct i915_request * const *ports) +{ + const struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", + engine->name, msg, + ports[0]->fence.context, + ports[0]->fence.seqno, + i915_request_completed(ports[0]) ? "!" : + i915_request_started(ports[0]) ? "*" : + "", + ports[1] ? ports[1]->fence.context : 0, + ports[1] ? ports[1]->fence.seqno : 0); +} + +static __maybe_unused bool +assert_pending_valid(const struct intel_engine_execlists *execlists, + const char *msg) +{ + struct i915_request * const *port, *rq; + struct intel_context *ce = NULL; + + trace_ports(execlists, msg, execlists->pending); + + if (!execlists->pending[0]) + return false; + + if (execlists->pending[execlists_num_ports(execlists)]) + return false; + + for (port = execlists->pending; (rq = *port); port++) { + if (ce == rq->hw_context) + return false; + + ce = rq->hw_context; + if (i915_request_completed(rq)) + continue; + + if (i915_active_is_idle(&ce->active)) + return false; + + if (!i915_vma_is_pinned(ce->state)) + return false; + } + + return ce; +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - struct execlist_port *port = execlists->port; unsigned int n; + GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); + /* * We can skip acquiring intel_runtime_pm_get() here as it was taken * on our behalf by the request (see i915_gem_mark_busy()) and it will @@ -604,7 +752,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * that all ELSP are drained i.e. we have processed the CSB, * before allowing ourselves to idle and calling intel_runtime_pm_put(). */ - GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref)); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); /* * ELSQ note: the submit queue is not cleared after being submitted @@ -613,38 +761,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * of elsq entries, keep this in mind before changing the loop below. */ for (n = execlists_num_ports(execlists); n--; ) { - struct i915_request *rq; - unsigned int count; - u64 desc; - - rq = port_unpack(&port[n], &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_schedule_in(rq); - port_set(&port[n], port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - - GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, n, - port[n].context_id, count, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - rq_prio(rq)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } + struct i915_request *rq = execlists->pending[n]; - write_desc(execlists, desc, n); + write_desc(execlists, + rq ? execlists_update_context(rq) : 0, + n); } /* we need to manually load the submit queue */ if (execlists->ctrl_reg) writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct intel_context *ce) @@ -668,6 +794,7 @@ static bool can_merge_ctx(const struct intel_context *prev, static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { + GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); if (!can_merge_ctx(prev->hw_context, next->hw_context)) @@ -676,58 +803,6 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(rq == port_request(port)); - - if (port_isset(port)) - i915_request_put(port_request(port)); - - port_set(port, port_pack(i915_request_get(rq), port_count(port))); -} - -static void inject_preempt_context(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = engine->preempt_context; - unsigned int n; - - GEM_BUG_ON(execlists->preempt_complete_status != - upper_32_bits(ce->lrc_desc)); - - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ - GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(execlists); --n; ) - write_desc(execlists, 0, n); - - write_desc(execlists, ce->lrc_desc, n); - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); - execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); - - (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); -} - -static void complete_preempt_context(struct intel_engine_execlists *execlists) -{ - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); - - if (inject_preempt_hang(execlists)) - return; - - execlists_cancel_port_requests(execlists); - __unwind_incomplete_requests(container_of(execlists, - struct intel_engine_cs, - execlists)); -} - static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { @@ -792,7 +867,7 @@ static bool virtual_matches(const struct virtual_engine *ve, * we reuse the register offsets). This is a very small * hystersis on the greedy seelction algorithm. */ - inflight = READ_ONCE(ve->context.inflight); + inflight = intel_context_inflight(&ve->context); if (inflight && inflight != engine) return false; @@ -815,13 +890,120 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_unlock(&old->breadcrumbs.irq_lock); } +static struct i915_request * +last_active(const struct intel_engine_execlists *execlists) +{ + struct i915_request * const *last = execlists->active; + + while (*last && i915_request_completed(*last)) + last++; + + return *last; +} + +static void defer_request(struct i915_request *rq, struct list_head * const pl) +{ + LIST_HEAD(list); + + /* + * We want to move the interrupted request to the back of + * the round-robin list (i.e. its priority level), but + * in doing so, we must then move all requests that were in + * flight and were waiting for the interrupted request to + * be run after it again. + */ + do { + struct i915_dependency *p; + + GEM_BUG_ON(i915_request_is_active(rq)); + list_move_tail(&rq->sched.link, pl); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + /* No waiter should start before its signaler */ + GEM_BUG_ON(i915_request_started(w) && + !i915_request_completed(rq)); + + GEM_BUG_ON(i915_request_is_active(w)); + if (list_empty(&w->sched.link)) + continue; /* Not yet submitted; unready */ + + if (rq_prio(w) < rq_prio(rq)) + continue; + + GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void defer_active(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = __unwind_incomplete_requests(engine); + if (!rq) + return; + + defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); +} + +static bool +need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + int hint; + + if (!intel_engine_has_semaphores(engine)) + return false; + + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return false; + + hint = max(rq_prio(list_next_entry(rq, sched.link)), + engine->execlists.queue_priority_hint); + + return hint >= effective_prio(rq); +} + +static int +switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return INT_MIN; + + return rq_prio(list_next_entry(rq, sched.link)); +} + +static bool +enable_timeslice(const struct intel_engine_execlists *execlists) +{ + const struct i915_request *rq = *execlists->active; + + if (i915_request_completed(rq)) + return false; + + return execlists->switch_priority_hint >= effective_prio(rq); +} + +static void record_preemption(struct intel_engine_execlists *execlists) +{ + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; - struct i915_request *last = port_request(port); + struct i915_request **port = execlists->pending; + struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -867,65 +1049,100 @@ static void execlists_dequeue(struct intel_engine_cs *engine) break; } + /* + * If the queue is higher priority than the last + * request in the currently active context, submit afresh. + * We will resubmit again afterwards in case we need to split + * the active context to interject the preemption request, + * i.e. we will retrigger preemption following the ack in case + * of trouble. + */ + last = last_active(execlists); if (last) { - /* - * Don't resubmit or switch until all outstanding - * preemptions (lite-restore) are seen. Then we - * know the next preemption status we see corresponds - * to this ELSP update. - */ - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(!port_count(&port[0])); + if (need_preempt(engine, last, rb)) { + GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", + engine->name, + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); + record_preemption(execlists); - /* - * If we write to ELSP a second time before the HW has had - * a chance to respond to the previous write, we can confuse - * the HW and hit "undefined behaviour". After writing to ELSP, - * we must then wait until we see a context-switch event from - * the HW to indicate that it has had a chance to respond. - */ - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - return; + /* + * Don't let the RING_HEAD advance past the breadcrumb + * as we unwind (and until we resubmit) so that we do + * not accidentally tell it to go backwards. + */ + ring_set_paused(engine, 1); - if (need_preempt(engine, last, rb)) { - inject_preempt_context(engine); - return; - } + /* + * Note that we have not stopped the GPU at this point, + * so we are unwinding the incomplete requests as they + * remain inflight and so by the time we do complete + * the preemption, some of the unwound requests may + * complete! + */ + __unwind_incomplete_requests(engine); - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). However, we have to double check that the - * priorities of the ports haven't been switch. - */ - if (port_count(&port[1])) - return; + /* + * If we need to return to the preempted context, we + * need to skip the lite-restore and force it to + * reload the RING_TAIL. Otherwise, the HW has a + * tendency to ignore us rewinding the TAIL to the + * end of an earlier request. + */ + last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last = NULL; + } else if (need_timeslice(engine, last) && + !timer_pending(&engine->execlists.timer)) { + GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", + engine->name, + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; + ring_set_paused(engine, 1); + defer_active(engine); + + /* + * Unlike for preemption, if we rewind and continue + * executing the same context as previously active, + * the order of execution will remain the same and + * the tail will only advance. We do not need to + * force a full context restore, as a lite-restore + * is sufficient to resample the monotonic TAIL. + * + * If we switch to any other context, similarly we + * will not rewind TAIL of current context, and + * normal save/restore will preserve state and allow + * us to later continue executing the same request. + */ + last = NULL; + } else { + /* + * Otherwise if we already have a request pending + * for execution after the current one, we can + * just wait until the next CS event before + * queuing more. In either case we will force a + * lite-restore preemption event, but if we wait + * we hopefully coalesce several updates into a single + * submission. + */ + if (!list_is_last(&last->sched.link, + &engine->active.requests)) + return; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_fini_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } } while (rb) { /* XXX virtual is always taking precedence */ @@ -955,9 +1172,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } + if (i915_request_completed(rq)) { + ve->request = NULL; + ve->base.execlists.queue_priority_hint = INT_MIN; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + rq->engine = engine; + __i915_request_submit(rq); + + spin_unlock(&ve->base.active.lock); + + rb = rb_first_cached(&execlists->virtual); + continue; + } + if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this rq for another engine */ + return; /* leave this for another */ } GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", @@ -1006,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - submit = true; - last = rq; + if (!i915_request_completed(rq)) { + submit = true; + last = rq; + } } spin_unlock(&ve->base.active.lock); @@ -1021,6 +1254,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + if (i915_request_completed(rq)) + goto skip; + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -1060,19 +1296,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - - if (submit) - port_assign(port, last); + *port = execlists_schedule_in(last, port - execlists->pending); port++; - - GEM_BUG_ON(port_isset(port)); } - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - last = rq; submit = true; +skip: + __i915_request_submit(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -1097,54 +1328,34 @@ done: * interrupt for secondary ports). */ execlists->queue_priority_hint = queue_prio(execlists); + GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n", + engine->name, execlists->queue_priority_hint, + yesno(submit)); if (submit) { - port_assign(port, last); + *port = execlists_schedule_in(last, port - execlists->pending); + memset(port + 1, 0, (last_port - port) * sizeof(*port)); + execlists->switch_priority_hint = + switch_prio(engine, *execlists->pending); execlists_submit_ports(engine); + } else { + ring_set_paused(engine, 0); } - - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); - - /* Re-evaluate the executing context setup after each preemptive kick */ - if (last) - execlists_user_begin(execlists, execlists->port); - - /* If the engine is now idle, so should be the flag; and vice versa. */ - GEM_BUG_ON(execlists_is_active(&engine->execlists, - EXECLISTS_ACTIVE_USER) == - !port_isset(engine->execlists.port)); } -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) +static void +cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); + struct i915_request * const *port, *rq; - while (num_ports-- && port_isset(port)) { - struct i915_request *rq = port_request(port); + for (port = execlists->pending; (rq = *port); port++) + execlists_schedule_out(rq); + memset(execlists->pending, 0, sizeof(execlists->pending)); - GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", - rq->engine->name, - (unsigned int)(port - execlists->port), - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); - - GEM_BUG_ON(!execlists->active); - execlists_context_schedule_out(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); - - i915_request_put(rq); - - memset(port, 0, sizeof(*port)); - port++; - } - - execlists_clear_all_active(execlists); + for (port = execlists->active; (rq = *port); port++) + execlists_schedule_out(rq); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); } static inline void @@ -1160,15 +1371,100 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } +enum csb_step { + CSB_NOP, + CSB_PROMOTE, + CSB_PREEMPT, + CSB_COMPLETE, +}; + +/* + * Starting with Gen12, the status has a new format: + * + * bit 0: switched to new queue + * bit 1: reserved + * bit 2: semaphore wait mode (poll or signal), only valid when + * switch detail is set to "wait on semaphore" + * bits 3-5: engine class + * bits 6-11: engine instance + * bits 12-14: reserved + * bits 15-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bits 32-35: context switch detail + * - 0: ctx complete + * - 1: wait on sync flip + * - 2: wait on vblank + * - 3: wait on scanline + * - 4: wait on semaphore + * - 5: context preempted (not on SEMAPHORE_WAIT or + * WAIT_FOR_EVENT) + * bit 36: reserved + * bits 37-43: wait detail (for switch detail 1 to 4) + * bits 44-46: reserved + * bits 47-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from + */ +static inline enum csb_step +gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + u32 lower_dw = csb[0]; + u32 upper_dw = csb[1]; + bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); + bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + + if (!ctx_away_valid && ctx_to_valid) + return CSB_PROMOTE; + + /* + * The context switch detail is not guaranteed to be 5 when a preemption + * occurs, so we can't just check for that. The check below works for + * all the cases we care about, including preemptions of WAIT + * instructions and lite-restore. Preempt-to-idle via the CTRL register + * would require some extra handling, but we don't support that. + */ + if (new_queue && ctx_away_valid) + return CSB_PREEMPT; + + /* + * switch detail = 5 is covered by the case above and we do not expect a + * context switch on an unsuccessful wait instruction since we always + * use polling mode. + */ + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); + + if (*execlists->active) { + GEM_BUG_ON(!ctx_away_valid); + return CSB_COMPLETE; + } + + return CSB_NOP; +} + +static inline enum csb_step +gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + unsigned int status = *csb; + + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) + return CSB_PROMOTE; + + if (status & GEN8_CTX_STATUS_PREEMPTED) + return CSB_PREEMPT; + + if (*execlists->active) + return CSB_COMPLETE; + + return CSB_NOP; +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; const u32 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; - lockdep_assert_held(&engine->active.lock); GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* @@ -1198,9 +1494,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - struct i915_request *rq; - unsigned int status; - unsigned int count; + enum csb_step csb_step; if (++head == num_entries) head = 0; @@ -1223,68 +1517,43 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n", engine->name, head, - buf[2 * head + 0], buf[2 * head + 1], - execlists->active); - - status = buf[2 * head]; - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); - - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; + buf[2 * head + 0], buf[2 * head + 1]); - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + if (INTEL_GEN(engine->i915) >= 12) + csb_step = gen12_csb_parse(execlists, buf + 2 * head); + else + csb_step = gen8_csb_parse(execlists, buf + 2 * head); - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } + switch (csb_step) { + case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ + trace_ports(execlists, "preempted", execlists->active); - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; + while (*execlists->active) + execlists_schedule_out(*execlists->active++); - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); + /* fallthrough */ + case CSB_PROMOTE: /* switch pending to inflight */ + GEM_BUG_ON(*execlists->active); + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + execlists->active = + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending)); - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->fence.context : 0, - rq ? rq->fence.seqno : 0, - rq ? hwsp_seqno(rq) : 0, - rq ? rq_prio(rq) : 0); + if (enable_timeslice(execlists)) + mod_timer(&execlists->timer, jiffies + 1); - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); - GEM_BUG_ON(count == 0); - if (--count == 0) { - /* - * On the final event corresponding to the - * submission of this context, we expect either - * an element-switch event or a completion - * event (and on completion, the active-idle - * marker). No more preemptions, lite-restore - * or otherwise. - */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + WRITE_ONCE(execlists->pending[0], NULL); + break; + + case CSB_COMPLETE: /* port0 completed, advanced to port1 */ + trace_ports(execlists, "completed", execlists->active); /* * We rely on the hardware being strongly @@ -1292,22 +1561,16 @@ static void process_csb(struct intel_engine_cs *engine) * coherent (visible from the CPU) before the * user interrupt and CSB is processed. */ - GEM_BUG_ON(!i915_request_completed(rq)); - - execlists_context_schedule_out(rq, - INTEL_CONTEXT_SCHEDULE_OUT); - i915_request_put(rq); + GEM_BUG_ON(!i915_request_completed(*execlists->active) && + !reset_in_progress(execlists)); + execlists_schedule_out(*execlists->active++); - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); + break; - port = execlists_port_complete(execlists, port); - if (port_isset(port)) - execlists_user_begin(execlists, port); - else - execlists_user_end(execlists); - } else { - port_set(port, port_pack(rq, count)); + case CSB_NOP: + break; } } while (head != tail); @@ -1330,9 +1593,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - - process_csb(engine); - if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) + if (!engine->execlists.pending[0]) execlists_dequeue(engine); } @@ -1345,14 +1606,21 @@ static void execlists_submission_tasklet(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; unsigned long flags; - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - !!intel_wakeref_active(&engine->wakeref), - engine->execlists.active); + process_csb(engine); + if (!READ_ONCE(engine->execlists.pending[0])) { + spin_lock_irqsave(&engine->active.lock, flags); + __execlists_submission_tasklet(engine); + spin_unlock_irqrestore(&engine->active.lock, flags); + } +} - spin_lock_irqsave(&engine->active.lock, flags); - __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); +static void execlists_submission_timer(struct timer_list *timer) +{ + struct intel_engine_cs *engine = + from_timer(engine, timer, execlists.timer); + + /* Kick the tasklet for some interrupt coalescing and reset handling */ + tasklet_hi_schedule(&engine->execlists.tasklet); } static void queue_request(struct intel_engine_cs *engine, @@ -1376,12 +1644,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) tasklet_hi_schedule(&execlists->tasklet); } -static void submit_queue(struct intel_engine_cs *engine, int prio) +static void submit_queue(struct intel_engine_cs *engine, + const struct i915_request *rq) { - if (prio > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = prio; - __submit_queue_imm(engine); - } + struct intel_engine_execlists *execlists = &engine->execlists; + + if (rq_prio(rq) <= execlists->queue_priority_hint) + return; + + execlists->queue_priority_hint = rq_prio(rq); + __submit_queue_imm(engine); } static void execlists_submit_request(struct i915_request *request) @@ -1397,7 +1669,7 @@ static void execlists_submit_request(struct i915_request *request) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, rq_prio(request)); + submit_queue(engine, request); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -1405,28 +1677,58 @@ static void execlists_submit_request(struct i915_request *request) static void __execlists_context_fini(struct intel_context *ce) { intel_ring_put(ce->ring); - - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) __execlists_context_fini(ce); + intel_context_fini(ce); intel_context_free(ce); } +static void +set_redzone(void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; + vaddr += engine->context_size; + + memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); +} + +static void +check_redzone(const void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; + vaddr += engine->context_size; + + if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) + dev_err_once(engine->i915->drm.dev, + "%s context redzone overwritten!\n", + engine->name); +} + static void execlists_context_unpin(struct intel_context *ce) { + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, + ce->engine); + i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_unpin(ce->ring); + intel_ring_reset(ce->ring, ce->ring->tail); } static void @@ -1444,9 +1746,12 @@ __execlists_update_reg_state(struct intel_context *ce, regs[CTX_RING_TAIL + 1] = ring->tail; /* RPCS */ - if (engine->class == RENDER_CLASS) + if (engine->class == RENDER_CLASS) { regs[CTX_R_PWR_CLK_STATE + 1] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); + + i915_oa_init_reg_state(engine, ce, regs); + } } static int @@ -1456,19 +1761,12 @@ __execlists_context_pin(struct intel_context *ce, void *vaddr; int ret; - GEM_BUG_ON(!ce->gem_context->vm); - - ret = execlists_context_deferred_alloc(ce, engine); - if (ret) - goto err; GEM_BUG_ON(!ce->state); - ret = intel_context_active_acquire(ce, - engine->i915->ggtt.pin_bias | - PIN_OFFSET_BIAS | - PIN_HIGH); + ret = intel_context_active_acquire(ce); if (ret) goto err; + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); vaddr = i915_gem_object_pin_map(ce->state->obj, i915_coherent_map_type(engine->i915) | @@ -1478,13 +1776,9 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - ret = i915_gem_context_pin_hw_id(ce->gem_context); if (ret) - goto unpin_ring; + goto unpin_map; ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -1492,8 +1786,6 @@ __execlists_context_pin(struct intel_context *ce, return 0; -unpin_ring: - intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_active: @@ -1507,6 +1799,11 @@ static int execlists_context_pin(struct intel_context *ce) return __execlists_context_pin(ce, ce->engine); } +static int execlists_context_alloc(struct intel_context *ce) +{ + return __execlists_context_alloc(ce, ce->engine); +} + static void execlists_context_reset(struct intel_context *ce) { /* @@ -1530,6 +1827,8 @@ static void execlists_context_reset(struct intel_context *ce) } static const struct intel_context_ops execlists_context_ops = { + .alloc = execlists_context_alloc, + .pin = execlists_context_pin, .unpin = execlists_context_unpin, @@ -1575,8 +1874,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = - i915_vm_to_ppgtt(rq->gem_context->vm); + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm); int err, i; u32 *cs; @@ -1649,7 +1947,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(request->gem_context->vm)) + if (i915_vm_is_4lvl(request->hw_context->vm)) ret = request->engine->emit_flush(request, EMIT_INVALIDATE); else ret = emit_pdps(request); @@ -1682,7 +1980,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) /* NB no one else is allowed to scribble over scratch + 256! */ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); *batch++ = 0; *batch++ = MI_LOAD_REGISTER_IMM(1); @@ -1696,12 +1995,19 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); *batch++ = 0; return batch; } +static u32 slm_offset(struct intel_engine_cs *engine) +{ + return intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA); +} + /* * Typically we only have one indirect_ctx and per_ctx batch buffer which are * initialized at the beginning and shared across all contexts but this field @@ -1733,8 +2039,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - i915_scratch_offset(engine->i915) + - 2 * CACHELINE_BYTES); + slm_offset(engine)); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1880,7 +2185,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -1920,6 +2225,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return 0; switch (INTEL_GEN(engine->i915)) { + case 12: case 11: return 0; case 10: @@ -1976,22 +2282,23 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) static void enable_execlists(struct intel_engine_cs *engine) { + u32 mode; + + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ if (INTEL_GEN(engine->i915) >= 11) - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); else - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); + ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); - ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - ENGINE_WRITE(engine, - RING_HWS_PGA, - i915_ggtt_offset(engine->status_page.vma)); + ENGINE_WRITE_FW(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); } @@ -1999,7 +2306,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) { bool unexpected = false; - if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) { + if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); unexpected = true; } @@ -2047,34 +2354,32 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) __tasklet_disable_sync_once(&execlists->tasklet); GEM_BUG_ON(!reset_in_progress(execlists)); - intel_engine_stop_cs(engine); - /* And flush any current direct submission. */ spin_lock_irqsave(&engine->active.lock, flags); spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static bool lrc_regs_ok(const struct i915_request *rq) -{ - const struct intel_ring *ring = rq->ring; - const u32 *regs = rq->hw_context->lrc_reg_state; - - /* Quick spot check for the common signs of context corruption */ - if (regs[CTX_RING_BUFFER_CONTROL + 1] != - (RING_CTL_SIZE(ring->size) | RING_VALID)) - return false; - - if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma)) - return false; - - return true; + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * FIXME: Wa for more modern gens needs to be validated + */ + intel_engine_stop_cs(engine); } -static void reset_csb_pointers(struct intel_engine_execlists *execlists) +static void reset_csb_pointers(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; const unsigned int reset_value = execlists->csb_size - 1; + ring_set_paused(engine, 0); + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that @@ -2094,15 +2399,15 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->engine->active.requests; - const struct intel_context * const context = rq->hw_context; + const struct list_head * const list = &rq->timeline->requests; + const struct intel_context * const ce = rq->hw_context; struct i915_request *active = NULL; - list_for_each_entry_from_reverse(rq, list, sched.link) { + list_for_each_entry_from_reverse(rq, list, link) { if (i915_request_completed(rq)) break; - if (rq->hw_context != context) + if (rq->hw_context != ce) break; active = rq; @@ -2121,33 +2426,27 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) process_csb(engine); /* drain preemption events */ /* Following the reset, we need to reload the CSB read/write pointers */ - reset_csb_pointers(&engine->execlists); + reset_csb_pointers(engine); /* * Save the currently executing context, even if we completed * its request, it was still running at the time of the * reset and will have been clobbered. */ - if (!port_isset(execlists->port)) - goto out_clear; + rq = execlists_active(execlists); + if (!rq) + goto unwind; - rq = port_request(execlists->port); ce = rq->hw_context; - - /* - * Catch up with any missed context-switch interrupts. - * - * Ideally we would just read the remaining CSB entries now that we - * know the gpu is idle. However, the CSB registers are sometimes^W - * often trashed across a GPU reset! Instead we have to rely on - * guessing the missed context-switch events by looking at what - * requests were completed. - */ - execlists_cancel_port_requests(execlists); - + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); rq = active_request(rq); - if (!rq) + if (!rq) { + ce->ring->head = ce->ring->tail; goto out_replay; + } + + ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -2161,7 +2460,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * Otherwise, if we have not started yet, the request should replay * perfectly and we do not need to flag the result as being erroneous. */ - if (!i915_request_started(rq) && lrc_regs_ok(rq)) + if (!i915_request_started(rq)) goto out_replay; /* @@ -2175,8 +2474,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - i915_reset_request(rq, stalled); - if (!stalled && lrc_regs_ok(rq)) + __i915_request_reset(rq, stalled); + if (!stalled) goto out_replay; /* @@ -2196,17 +2495,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) execlists_init_reg_state(regs, ce, engine, ce->ring); out_replay: - /* Rerun the request; its payload has been neutered (if guilty). */ - ce->ring->head = - rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; + GEM_TRACE("%s replay {head:%04x, tail:%04x\n", + engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); +unwind: /* Push back any incomplete requests for replay after the reset. */ + cancel_port_requests(execlists); __unwind_incomplete_requests(engine); - -out_clear: - execlists_clear_all_active(execlists); } static void execlists_reset(struct intel_engine_cs *engine, bool stalled) @@ -2302,7 +2599,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; @@ -2440,7 +2736,8 @@ static int gen8_emit_flush_render(struct i915_request *request, { struct intel_engine_cs *engine = request->engine; u32 scratch_addr = - i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; + intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2505,6 +2802,63 @@ static int gen8_emit_flush_render(struct i915_request *request, return 0; } +static int gen11_emit_flush_render(struct i915_request *request, + u32 mode) +{ + struct intel_engine_cs *engine = request->engine; + const u32 scratch_addr = + intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + + if (mode & EMIT_FLUSH) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + intel_ring_advance(request, cs); + } + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -2520,15 +2874,28 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) return cs; } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - 0); + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + return cs; +} + +static __always_inline u32* +gen8_emit_fini_breadcrumb_footer(struct i915_request *request, + u32 *cs) +{ *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = emit_preempt_busywait(request, cs); request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2536,51 +2903,53 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + 0); + + return gen8_emit_fini_breadcrumb_footer(request, cs); +} + static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE); + + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL, 0); - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - request->tail = intel_ring_offset(request, cs); - assert_ring_tail_valid(request->ring, request->tail); - - return gen8_emit_wa_tail(request, cs); + return gen8_emit_fini_breadcrumb_footer(request, cs); } -static int gen8_init_rcs_context(struct i915_request *rq) +static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, + u32 *cs) { - int ret; - - ret = intel_engine_emit_ctx_wa(rq); - if (ret) - return ret; - - ret = intel_rcs_context_init_mocs(rq); - /* - * Failing to program the MOCS is non-fatal.The system will not - * run at peak performance. So generate an error and carry on. - */ - if (ret) - DRM_ERROR("MOCS failed to program: expect performance issues.\n"); + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); - return i915_gem_render_state_emit(rq); + return gen8_emit_fini_breadcrumb_footer(request, cs); } static void execlists_park(struct intel_engine_cs *engine) { - intel_engine_park(engine); + del_timer(&engine->execlists.timer); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -2598,11 +2967,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (!intel_vgpu_active(engine->i915)) + if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (engine->preempt_context && - HAS_LOGICAL_RING_PREEMPTION(engine->i915)) - engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + } } static void execlists_destroy(struct intel_engine_cs *engine) @@ -2671,22 +3040,32 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -int intel_execlists_submission_setup(struct intel_engine_cs *engine) +static void rcs_submission_override(struct intel_engine_cs *engine) { - /* Intentionally left blank. */ - engine->buffer = NULL; + switch (INTEL_GEN(engine->i915)) { + case 12: + case 11: + engine->emit_flush = gen11_emit_flush_render; + engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; + break; + default: + engine->emit_flush = gen8_emit_flush_render; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + break; + } +} +int intel_execlists_submission_setup(struct intel_engine_cs *engine) +{ tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); + timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); - if (engine->class == RENDER_CLASS) { - engine->init_context = gen8_init_rcs_context; - engine->emit_flush = gen8_emit_flush_render; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; - } + if (engine->class == RENDER_CLASS) + rcs_submission_override(engine); return 0; } @@ -2703,9 +3082,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) if (ret) return ret; - intel_engine_init_workarounds(engine); - intel_engine_init_whitelist(engine); - if (intel_init_workaround_bb(engine)) /* * We continue even if we fail to initialize WA batch @@ -2724,11 +3100,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_ELSP(base)); } - execlists->preempt_complete_status = ~0u; - if (engine->preempt_context) - execlists->preempt_complete_status = - upper_32_bits(engine->preempt_context->lrc_desc); - execlists->csb_status = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; @@ -2740,7 +3111,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; - reset_csb_pointers(execlists); + reset_csb_pointers(engine); return 0; } @@ -2753,6 +3124,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 12: + indirect_ctx_offset = + GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 11: indirect_ctx_offset = GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; @@ -2779,7 +3154,7 @@ static void execlists_init_reg_state(u32 *regs, struct intel_engine_cs *engine, struct intel_ring *ring) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm); + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); bool rcs = engine->class == RENDER_CLASS; u32 base = engine->mmio_base; @@ -2870,8 +3245,6 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - - i915_oa_init_reg_state(engine, ce, regs); } regs[CTX_END] = MI_BATCH_BUFFER_END; @@ -2896,6 +3269,8 @@ populate_lr_context(struct intel_context *ce, return ret; } + set_redzone(vaddr, engine); + if (engine->default_state) { /* * We only want to copy over the template context state; @@ -2923,11 +3298,6 @@ populate_lr_context(struct intel_context *ce, if (!engine->default_state) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ce->gem_context == engine->i915->preempt_context && - INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); ret = 0; err_unpin_ctx: @@ -2938,27 +3308,16 @@ err_unpin_ctx: return ret; } -static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) -{ - if (ctx->timeline) - return i915_timeline_get(ctx->timeline); - else - return i915_timeline_create(ctx->i915, NULL); -} - -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; + struct intel_ring *ring; struct i915_vma *vma; u32 context_size; - struct intel_ring *ring; - struct i915_timeline *timeline; int ret; - if (ce->state) - return 0; - + GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* @@ -2966,27 +3325,32 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, * for our own use and for sharing with the GuC. */ context_size += LRC_HEADER_PAGES * PAGE_SIZE; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + context_size += I915_GTT_PAGE_SIZE; /* for redzone */ ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(ctx_obj)) return PTR_ERR(ctx_obj); - vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto error_deref_obj; } - timeline = get_timeline(ce->gem_context); - if (IS_ERR(timeline)) { - ret = PTR_ERR(timeline); - goto error_deref_obj; + if (!ce->timeline) { + struct intel_timeline *tl; + + tl = intel_timeline_create(engine->gt, NULL); + if (IS_ERR(tl)) { + ret = PTR_ERR(tl); + goto error_deref_obj; + } + + ce->timeline = tl; } - ring = intel_engine_create_ring(engine, - timeline, - ce->gem_context->ring_size); - i915_timeline_put(timeline); + ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -3044,6 +3408,7 @@ static void virtual_context_destroy(struct kref *kref) if (ve->context.state) __execlists_context_fini(&ve->context); + intel_context_fini(&ve->context); kfree(ve->bonds); kfree(ve); @@ -3096,6 +3461,8 @@ static void virtual_context_enter(struct intel_context *ce) for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_get(ve->siblings[n]); + + intel_timeline_enter(ce->timeline); } static void virtual_context_exit(struct intel_context *ce) @@ -3103,6 +3470,8 @@ static void virtual_context_exit(struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); unsigned int n; + intel_timeline_exit(ce->timeline); + for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_put(ve->siblings[n]); } @@ -3296,11 +3665,11 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, return ERR_PTR(-ENOMEM); ve->base.i915 = ctx->i915; + ve->base.gt = siblings[0]->gt; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - ve->base.flags = I915_ENGINE_IS_VIRTUAL; /* * The decision on whether to submit a request using semaphores @@ -3397,8 +3766,18 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; ve->base.emit_fini_breadcrumb_dw = sibling->emit_fini_breadcrumb_dw; + + ve->base.flags = sibling->flags; } + ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + + err = __execlists_context_alloc(&ve->context, siblings[0]); + if (err) + goto err_put; + + __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags); + return &ve->context; err_put: diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 6bf34738b4e5..b8f20ad71169 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -64,5 +64,6 @@ #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 #define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 #define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A +#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xD #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 1f9db50b1869..728704bbbe18 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ #include "i915_drv.h" #include "intel_engine.h" +#include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" @@ -61,6 +62,10 @@ struct drm_i915_mocs_table { #define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ +/* + * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means + * the same as LE_UC + */ #define LE_0_PAGETABLE _LE_CACHEABILITY(0) #define LE_1_UC _LE_CACHEABILITY(1) #define LE_2_WT _LE_CACHEABILITY(2) @@ -99,8 +104,9 @@ struct drm_i915_mocs_table { * of bspec. * * Entries not part of the following tables are undefined as far as - * userspace is concerned and shouldn't be relied upon. For the time - * being they will be initialized to PTE. + * userspace is concerned and shouldn't be relied upon. For Gen < 12 + * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for + * PTE and will be initialized to an invalid value. * * The last two entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older @@ -136,14 +142,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { }; #define GEN11_MOCS_ENTRIES \ - /* Base - Uncached (Deprecated) */ \ - MOCS_ENTRY(I915_MOCS_UNCACHED, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_1_UC), \ - /* Base - L3 + LeCC:PAT (Deprecated) */ \ - MOCS_ENTRY(I915_MOCS_PTE, \ - LE_0_PAGETABLE | LE_TC_1_LLC, \ - L3_3_WB), \ + /* Entries 0 and 1 are defined per-platform */ \ /* Base - L3 + LLC */ \ MOCS_ENTRY(2, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ @@ -241,49 +240,86 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ L3_1_UC) +static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { + /* Base - Error (Reserved for Non-Use) */ + MOCS_ENTRY(0, 0x0, 0x0), + /* Base - Reserved */ + MOCS_ENTRY(1, 0x0, 0x0), + + GEN11_MOCS_ENTRIES, + + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC | LE_SCF(1), + L3_3_WB), +}; + static const struct drm_i915_mocs_entry icelake_mocs_table[] = { + /* Base - Uncached (Deprecated) */ + MOCS_ENTRY(I915_MOCS_UNCACHED, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* Base - L3 + LeCC:PAT (Deprecated) */ + MOCS_ENTRY(I915_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_1_LLC, + L3_3_WB), + GEN11_MOCS_ENTRIES }; -/** - * get_mocs_settings() - * @dev_priv: i915 device. - * @table: Output table that will be made to point at appropriate - * MOCS values for the device. - * - * This function will return the values of the MOCS table that needs to - * be programmed for the platform. It will return the values that need - * to be programmed and if they need to be programmed. - * - * Return: true if there are applicable MOCS settings for the device. - */ -static bool get_mocs_settings(struct drm_i915_private *dev_priv, +static bool get_mocs_settings(struct intel_gt *gt, struct drm_i915_mocs_table *table) { + struct drm_i915_private *i915 = gt->i915; bool result = false; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(i915) >= 12) { + table->size = ARRAY_SIZE(tigerlake_mocs_table); + table->table = tigerlake_mocs_table; + table->n_entries = GEN11_NUM_MOCS_ENTRIES; + result = true; + } else if (IS_GEN(i915, 11)) { table->size = ARRAY_SIZE(icelake_mocs_table); table->table = icelake_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; result = true; - } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skylake_mocs_table; result = true; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEN9_LP(i915)) { table->size = ARRAY_SIZE(broxton_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; result = true; } else { - WARN_ONCE(INTEL_GEN(dev_priv) >= 9, + WARN_ONCE(INTEL_GEN(i915) >= 9, "Platform that should have a MOCS table does not.\n"); } /* WaDisableSkipCaching:skl,bxt,kbl,glk */ - if (IS_GEN(dev_priv, 9)) { + if (IS_GEN(i915, 9)) { int i; for (i = 0; i < table->size; i++) @@ -338,12 +374,20 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table, */ void intel_mocs_init_engine(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; + struct intel_gt *gt = engine->gt; + struct intel_uncore *uncore = gt->uncore; struct drm_i915_mocs_table table; unsigned int index; u32 unused_value; - if (!get_mocs_settings(dev_priv, &table)) + /* Platforms with global MOCS do not need per-engine initialization. */ + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) + return; + + /* Called under a blanket forcewake */ + assert_forcewakes_active(uncore, FORCEWAKE_ALL); + + if (!get_mocs_settings(gt, &table)) return; /* Set unused values to PTE */ @@ -352,24 +396,48 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) for (index = 0; index < table.size; index++) { u32 value = get_entry_control(&table, index); - I915_WRITE(mocs_register(engine->id, index), value); + intel_uncore_write_fw(uncore, + mocs_register(engine->id, index), + value); } /* All remaining entries are also unused */ for (; index < table.n_entries; index++) - I915_WRITE(mocs_register(engine->id, index), unused_value); + intel_uncore_write_fw(uncore, + mocs_register(engine->id, index), + unused_value); +} + +static void intel_mocs_init_global(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + struct drm_i915_mocs_table table; + unsigned int index; + + GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); + + if (!get_mocs_settings(gt, &table)) + return; + + if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) + return; + + for (index = 0; index < table.size; index++) + intel_uncore_write(uncore, + GEN12_GLOBAL_MOCS(index), + table.table[index].control_value); + + /* + * Ok, now set the unused entries to the invalid entry (index 0). These + * entries are officially undefined and no contract for the contents and + * settings is given for these entries. + */ + for (; index < table.n_entries; index++) + intel_uncore_write(uncore, + GEN12_GLOBAL_MOCS(index), + table.table[0].control_value); } -/** - * emit_mocs_control_table() - emit the mocs control table - * @rq: Request to set up the MOCS table for. - * @table: The values to program into the control regs. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - * - * Return: 0 on success, otherwise the error status. - */ static int emit_mocs_control_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { @@ -429,17 +497,6 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, return low | high << 16; } -/** - * emit_mocs_l3cc_table() - emit the mocs control table - * @rq: Request to set up the MOCS table for. - * @table: The values to program into the control regs. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. This register set is - * programmed in pairs. - * - * Return: 0 on success, otherwise the error status. - */ static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { @@ -488,27 +545,14 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, return 0; } -/** - * intel_mocs_init_l3cc_table() - program the mocs control table - * @dev_priv: i915 device private - * - * This function simply programs the mocs registers for the given table - * starting at the given address. This register set is programmed in pairs. - * - * These registers may get programmed more than once, it is simpler to - * re-program 32 registers than maintain the state of when they were programmed. - * We are always reprogramming with the same values and this only on context - * start. - * - * Return: Nothing. - */ -void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) +static void intel_mocs_init_l3cc_table(struct intel_gt *gt) { + struct intel_uncore *uncore = gt->uncore; struct drm_i915_mocs_table table; unsigned int i; u16 unused_value; - if (!get_mocs_settings(dev_priv, &table)) + if (!get_mocs_settings(gt, &table)) return; /* Set unused values to PTE */ @@ -518,28 +562,32 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) u16 low = get_entry_l3cc(&table, 2 * i); u16 high = get_entry_l3cc(&table, 2 * i + 1); - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, high)); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, high)); } /* Odd table size - 1 left over */ if (table.size & 0x01) { u16 low = get_entry_l3cc(&table, 2 * i); - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, unused_value)); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, unused_value)); i++; } /* All remaining entries are also unused */ for (; i < table.n_entries / 2; i++) - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, unused_value, unused_value)); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(&table, unused_value, + unused_value)); } /** - * intel_rcs_context_init_mocs() - program the MOCS register. - * @rq: Request to set up the MOCS tables for. + * intel_mocs_emit() - program the MOCS register. + * @rq: Request to use to set up the MOCS tables. * * This function will emit a batch buffer with the values required for * programming the MOCS register values for all the currently supported @@ -553,12 +601,16 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) * * Return: 0 on success, otherwise the error status. */ -int intel_rcs_context_init_mocs(struct i915_request *rq) +int intel_mocs_emit(struct i915_request *rq) { struct drm_i915_mocs_table t; int ret; - if (get_mocs_settings(rq->i915, &t)) { + if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) || + rq->engine->class != RENDER_CLASS) + return 0; + + if (get_mocs_settings(rq->engine->gt, &t)) { /* Program the RCS control registers */ ret = emit_mocs_control_table(rq, &t); if (ret) @@ -572,3 +624,11 @@ int intel_rcs_context_init_mocs(struct i915_request *rq) return 0; } + +void intel_mocs_init(struct intel_gt *gt) +{ + intel_mocs_init_l3cc_table(gt); + + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) + intel_mocs_init_global(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 0913704a1af2..2ae816b7ca19 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,12 +49,13 @@ * context handling keep the MOCS in step. */ -struct drm_i915_private; struct i915_request; struct intel_engine_cs; +struct intel_gt; -int intel_rcs_context_init_mocs(struct i915_request *rq); -void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); +void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +int intel_mocs_emit(struct i915_request *rq); + #endif diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c new file mode 100644 index 000000000000..6d05f9c64178 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -0,0 +1,236 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Mika Kuoppala <mika.kuoppala@intel.com> + * + */ + +#include "i915_drv.h" +#include "intel_renderstate.h" + +struct intel_renderstate { + const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; +}; + +static const struct intel_renderstate_rodata * +render_state_get_rodata(const struct intel_engine_cs *engine) +{ + if (engine->class != RENDER_CLASS) + return NULL; + + switch (INTEL_GEN(engine->i915)) { + case 6: + return &gen6_null_state; + case 7: + return &gen7_null_state; + case 8: + return &gen8_null_state; + case 9: + return &gen9_null_state; + } + + return NULL; +} + +/* + * Macro to add commands to auxiliary batch. + * This macro only checks for page overflow before inserting the commands, + * this is sufficient as the null state generator makes the final batch + * with two passes to build command and state separately. At this point + * the size of both are known and it compacts them by relocating the state + * right after the commands taking care of alignment so we should sufficient + * space below them for adding new commands. + */ +#define OUT_BATCH(batch, i, val) \ + do { \ + if ((i) >= PAGE_SIZE / sizeof(u32)) \ + goto err; \ + (batch)[(i)++] = (val); \ + } while(0) + +static int render_state_setup(struct intel_renderstate *so, + struct drm_i915_private *i915) +{ + const struct intel_renderstate_rodata *rodata = so->rodata; + unsigned int i = 0, reloc_index = 0; + unsigned int needs_clflush; + u32 *d; + int ret; + + ret = i915_gem_object_prepare_write(so->obj, &needs_clflush); + if (ret) + return ret; + + d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); + + while (i < rodata->batch_items) { + u32 s = rodata->batch[i]; + + if (i * 4 == rodata->reloc[reloc_index]) { + u64 r = s + so->vma->node.start; + s = lower_32_bits(r); + if (HAS_64BIT_RELOC(i915)) { + if (i + 1 >= rodata->batch_items || + rodata->batch[i + 1] != 0) + goto err; + + d[i++] = s; + s = upper_32_bits(r); + } + + reloc_index++; + } + + d[i++] = s; + } + + if (rodata->reloc[reloc_index] != -1) { + DRM_ERROR("only %d relocs resolved\n", reloc_index); + goto err; + } + + so->batch_offset = i915_ggtt_offset(so->vma); + so->batch_size = rodata->batch_items * sizeof(u32); + + while (i % CACHELINE_DWORDS) + OUT_BATCH(d, i, MI_NOOP); + + so->aux_offset = i * sizeof(u32); + + if (HAS_POOLED_EU(i915)) { + /* + * We always program 3x6 pool config but depending upon which + * subslice is disabled HW drops down to appropriate config + * shown below. + * + * In the below table 2x6 config always refers to + * fused-down version, native 2x6 is not available and can + * be ignored + * + * SNo subslices config eu pool configuration + * ----------------------------------------------------------- + * 1 3 subslices enabled (3x6) - 0x00777000 (9+9) + * 2 ss0 disabled (2x6) - 0x00777000 (3+9) + * 3 ss1 disabled (2x6) - 0x00770000 (6+6) + * 4 ss2 disabled (2x6) - 0x00007000 (9+3) + */ + u32 eu_pool_config = 0x00777000; + + OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); + OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); + OUT_BATCH(d, i, eu_pool_config); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + } + + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); + so->aux_size = i * sizeof(u32) - so->aux_offset; + so->aux_offset += so->batch_offset; + /* + * Since we are sending length, we need to strictly conform to + * all requirements. For Gen2 this must be a multiple of 8. + */ + so->aux_size = ALIGN(so->aux_size, 8); + + if (needs_clflush) + drm_clflush_virt_range(d, i * sizeof(u32)); + kunmap_atomic(d); + + ret = 0; +out: + i915_gem_object_finish_access(so->obj); + return ret; + +err: + kunmap_atomic(d); + ret = -EINVAL; + goto out; +} + +#undef OUT_BATCH + +int intel_renderstate_emit(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct intel_renderstate so = {}; /* keep the compiler happy */ + int err; + + so.rodata = render_state_get_rodata(engine); + if (!so.rodata) + return 0; + + if (so.rodata->batch_items * 4 > PAGE_SIZE) + return -EINVAL; + + so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(so.obj)) + return PTR_ERR(so.obj); + + so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so.vma)) { + err = PTR_ERR(so.vma); + goto err_obj; + } + + err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma; + + err = render_state_setup(&so, rq->i915); + if (err) + goto err_unpin; + + err = engine->emit_bb_start(rq, + so.batch_offset, so.batch_size, + I915_DISPATCH_SECURE); + if (err) + goto err_unpin; + + if (so.aux_size > 8) { + err = engine->emit_bb_start(rq, + so.aux_offset, so.aux_size, + I915_DISPATCH_SECURE); + if (err) + goto err_unpin; + } + + i915_vma_lock(so.vma); + err = i915_request_await_object(rq, so.vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(so.vma, rq, 0); + i915_vma_unlock(so.vma); +err_unpin: + i915_vma_unpin(so.vma); +err_vma: + i915_vma_close(so.vma); +err_obj: + i915_gem_object_put(so.obj); + return err; +} diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h new file mode 100644 index 000000000000..8d5079145054 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_RENDERSTATE_H_ +#define _INTEL_RENDERSTATE_H_ + +#include <linux/types.h> + +struct i915_request; + +struct intel_renderstate_rodata { + const u32 *reloc; + const u32 *batch; + const u32 batch_items; +}; + +#define RO_RENDERSTATE(_g) \ + const struct intel_renderstate_rodata gen ## _g ## _null_state = { \ + .reloc = gen ## _g ## _null_state_relocs, \ + .batch = gen ## _g ## _null_state_batch, \ + .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \ + } + +extern const struct intel_renderstate_rodata gen6_null_state; +extern const struct intel_renderstate_rodata gen7_null_state; +extern const struct intel_renderstate_rodata gen8_null_state; +extern const struct intel_renderstate_rodata gen9_null_state; + +int intel_renderstate_emit(struct i915_request *rq); + +#endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4c478b38e420..b9d84d52e986 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -7,6 +7,7 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> +#include "display/intel_display_types.h" #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" @@ -15,26 +16,17 @@ #include "i915_gpu_error.h" #include "i915_irq.h" #include "intel_engine_pm.h" +#include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_reset.h" -#include "intel_guc.h" +#include "uc/intel_guc.h" #define RESET_MAX_RETRIES 3 /* XXX How to handle concurrent GGTT updates using tiling registers? */ #define RESET_UNDER_STOP_MACHINE 0 -static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) -{ - intel_uncore_rmw(uncore, reg, 0, set); -} - -static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) -{ - intel_uncore_rmw(uncore, reg, clr, 0); -} - static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) { intel_uncore_rmw_fw(uncore, reg, 0, set); @@ -123,7 +115,7 @@ static void context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } -void i915_reset_request(struct i915_request *rq, bool guilty) +void __i915_request_reset(struct i915_request *rq, bool guilty) { GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", rq->engine->name, @@ -144,48 +136,6 @@ void i915_reset_request(struct i915_request *rq, bool guilty) } } -static void gen3_stop_engine(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - - GEM_TRACE("%s\n", engine->name); - - if (intel_engine_stop_cs(engine)) - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); - - intel_uncore_write_fw(uncore, - RING_HEAD(base), - intel_uncore_read_fw(uncore, RING_TAIL(base))); - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ - - intel_uncore_write_fw(uncore, RING_HEAD(base), 0); - intel_uncore_write_fw(uncore, RING_TAIL(base), 0); - intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - intel_uncore_write_fw(uncore, RING_CTL(base), 0); - - /* Check acts as a post */ - if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - GEM_TRACE("%s: ring head [%x] not parked\n", - engine->name, - intel_uncore_read_fw(uncore, RING_HEAD(base))); -} - -static void i915_stop_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (INTEL_GEN(i915) < 3) - return; - - for_each_engine_masked(engine, i915, engine_mask, tmp) - gen3_stop_engine(engine); -} - static bool i915_in_reset(struct pci_dev *pdev) { u8 gdrst; @@ -194,11 +144,11 @@ static bool i915_in_reset(struct pci_dev *pdev) return gdrst & GRDOM_RESET_STATUS; } -static int i915_do_reset(struct drm_i915_private *i915, +static int i915_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = gt->i915->drm.pdev; int err; /* Assert reset for at least 20 usec, and wait for acknowledgement. */ @@ -223,22 +173,22 @@ static bool g4x_reset_complete(struct pci_dev *pdev) return (gdrst & GRDOM_RESET_ENABLE) == 0; } -static int g33_do_reset(struct drm_i915_private *i915, +static int g33_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = gt->i915->drm.pdev; pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for_atomic(g4x_reset_complete(pdev), 50); } -static int g4x_do_reset(struct drm_i915_private *i915, +static int g4x_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; - struct intel_uncore *uncore = &i915->uncore; + struct pci_dev *pdev = gt->i915->drm.pdev; + struct intel_uncore *uncore = gt->uncore; int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ @@ -270,11 +220,11 @@ out: return ret; } -static int ironlake_do_reset(struct drm_i915_private *i915, +static int ironlake_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; int ret; intel_uncore_write_fw(uncore, ILK_GDSR, @@ -306,10 +256,9 @@ out: } /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ -static int gen6_hw_domain_reset(struct drm_i915_private *i915, - u32 hw_domain_mask) +static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; int err; /* @@ -331,7 +280,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *i915, return err; } -static int gen6_reset_engines(struct drm_i915_private *i915, +static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -351,13 +300,13 @@ static int gen6_reset_engines(struct drm_i915_private *i915, intel_engine_mask_t tmp; hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; } } - return gen6_hw_domain_reset(i915, hw_mask); + return gen6_hw_domain_reset(gt, hw_mask); } static u32 gen11_lock_sfc(struct intel_engine_cs *engine) @@ -455,7 +404,7 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); } -static int gen11_reset_engines(struct drm_i915_private *i915, +static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -478,17 +427,17 @@ static int gen11_reset_engines(struct drm_i915_private *i915, hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; hw_mask |= gen11_lock_sfc(engine); } } - ret = gen6_hw_domain_reset(i915, hw_mask); + ret = gen6_hw_domain_reset(gt, hw_mask); if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) gen11_unlock_sfc(engine); return ret; @@ -538,7 +487,7 @@ static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); } -static int gen8_reset_engines(struct drm_i915_private *i915, +static int gen8_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -547,7 +496,7 @@ static int gen8_reset_engines(struct drm_i915_private *i915, intel_engine_mask_t tmp; int ret; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) goto skip_reset; @@ -563,23 +512,23 @@ static int gen8_reset_engines(struct drm_i915_private *i915, * We rather take context corruption instead of * failed reset with a wedged driver/gpu. And * active bb execution case should be covered by - * i915_stop_engines we have before the reset. + * stop_engines() we have before the reset. */ } - if (INTEL_GEN(i915) >= 11) - ret = gen11_reset_engines(i915, engine_mask, retry); + if (INTEL_GEN(gt->i915) >= 11) + ret = gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(i915, engine_mask, retry); + ret = gen6_reset_engines(gt, engine_mask, retry); skip_reset: - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) gen8_engine_reset_cancel(engine); return ret; } -typedef int (*reset_func)(struct drm_i915_private *, +typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); @@ -601,15 +550,14 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) return NULL; } -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) +int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) { const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; reset_func reset; int ret = -ETIMEDOUT; int retry; - reset = intel_get_gpu_reset(i915); + reset = intel_get_gpu_reset(gt->i915); if (!reset) return -ENODEV; @@ -617,31 +565,14 @@ int intel_gpu_reset(struct drm_i915_private *i915, * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - * - * FIXME: Wa for more modern gens needs to be validated - */ - if (retry) - i915_stop_engines(i915, engine_mask); - GEM_TRACE("engine_mask=%x\n", engine_mask); preempt_disable(); - ret = reset(i915, engine_mask, retry); + ret = reset(gt, engine_mask, retry); preempt_enable(); } - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; } @@ -659,17 +590,17 @@ bool intel_has_reset_engine(struct drm_i915_private *i915) return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; } -int intel_reset_guc(struct drm_i915_private *i915) +int intel_reset_guc(struct intel_gt *gt) { u32 guc_domain = - INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; int ret; - GEM_BUG_ON(!HAS_GUC(i915)); + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(i915, guc_domain); - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + ret = gen6_hw_domain_reset(gt, guc_domain); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; } @@ -687,56 +618,59 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ - intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } -static void revoke_mmaps(struct drm_i915_private *i915) +static void revoke_mmaps(struct intel_gt *gt) { int i; - for (i = 0; i < i915->ggtt.num_fences; i++) { + for (i = 0; i < gt->ggtt->num_fences; i++) { struct drm_vma_offset_node *node; struct i915_vma *vma; u64 vma_offset; - vma = READ_ONCE(i915->ggtt.fence_regs[i].vma); + vma = READ_ONCE(gt->ggtt->fence_regs[i].vma); if (!vma) continue; if (!i915_vma_has_userfault(vma)) continue; - GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]); + GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); node = &vma->obj->base.vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; - unmap_mapping_range(i915->drm.anon_inode->i_mapping, + unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, 1); } } -static void reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct intel_gt *gt) { struct intel_engine_cs *engine; + intel_engine_mask_t awake = 0; enum intel_engine_id id; - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) { + if (intel_engine_pm_get_if_awake(engine)) + awake |= engine->mask; reset_prepare_engine(engine); + } + + intel_uc_reset_prepare(>->uc); - intel_uc_reset_prepare(i915); + return awake; } -static void gt_revoke(struct drm_i915_private *i915) +static void gt_revoke(struct intel_gt *gt) { - revoke_mmaps(i915); + revoke_mmaps(gt); } -static int gt_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) +static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -746,14 +680,14 @@ static int gt_reset(struct drm_i915_private *i915, * Everything depends on having the GTT running, so we need to start * there. */ - err = i915_ggtt_enable_hw(i915); + err = i915_ggtt_enable_hw(gt->i915); if (err) return err; - for_each_engine(engine, i915, id) - intel_engine_reset(engine, stalled_mask & engine->mask); + for_each_engine(engine, gt->i915, id) + __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(i915); + i915_gem_restore_fences(gt->i915); return err; } @@ -761,20 +695,21 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); - intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); + + intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915) +static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { reset_finish_engine(engine); - intel_engine_signal_breadcrumbs(engine); + if (awake & engine->mask) + intel_engine_pm_put(engine); } - intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -794,19 +729,19 @@ static void nop_submit_request(struct i915_request *request) intel_engine_queue_breadcrumbs(engine); } -static void __i915_gem_set_wedged(struct drm_i915_private *i915) +static void __intel_gt_set_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; + intel_engine_mask_t awake; enum intel_engine_id id; - if (test_bit(I915_WEDGED, &error->flags)) + if (test_bit(I915_WEDGED, >->reset.flags)) return; - if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { struct drm_printer p = drm_debug_printer(__func__); - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -817,17 +752,14 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - reset_prepare(i915); + awake = reset_prepare(gt); /* Even if the GPU reset fails, it should still stop the engines */ - if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_gpu_reset(i915, ALL_ENGINES); + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + __intel_gt_reset(gt, ALL_ENGINES); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) engine->submit_request = nop_submit_request; - engine->schedule = NULL; - } - i915->caps.scheduler = 0; /* * Make sure no request can slip through without getting completed by @@ -835,37 +767,37 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * in nop_submit_request. */ synchronize_rcu_expedited(); - set_bit(I915_WEDGED, &error->flags); + set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) engine->cancel_requests(engine); - reset_finish(i915); + reset_finish(gt, awake); GEM_TRACE("end\n"); } -void i915_gem_set_wedged(struct drm_i915_private *i915) +void intel_gt_set_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; intel_wakeref_t wakeref; - mutex_lock(&error->wedge_mutex); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - __i915_gem_set_wedged(i915); - mutex_unlock(&error->wedge_mutex); + mutex_lock(>->reset.mutex); + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + __intel_gt_set_wedged(gt); + mutex_unlock(>->reset.mutex); } -static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) +static bool __intel_gt_unset_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; - struct i915_timeline *tl; + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl; + unsigned long flags; - if (!test_bit(I915_WEDGED, &error->flags)) + if (!test_bit(I915_WEDGED, >->reset.flags)) return true; - if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ + if (!gt->scratch) /* Never full initialised, recovery impossible */ return false; GEM_TRACE("start\n"); @@ -880,14 +812,16 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) * * No more can be submitted until we reset the wedged bit. */ - mutex_lock(&i915->gt.timelines.mutex); - list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { + spin_lock_irqsave(&timelines->lock, flags); + list_for_each_entry(tl, &timelines->active_list, link) { struct i915_request *rq; rq = i915_active_request_get_unlocked(&tl->last_request); if (!rq) continue; + spin_unlock_irqrestore(&timelines->lock, flags); + /* * All internal dependencies (i915_requests) will have * been flushed by the set-wedge, but we may be stuck waiting @@ -897,10 +831,14 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) */ dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); i915_request_put(rq); + + /* Restart iteration after droping lock */ + spin_lock_irqsave(&timelines->lock, flags); + tl = list_entry(&timelines->active_list, typeof(*tl), link); } - mutex_unlock(&i915->gt.timelines.mutex); + spin_unlock_irqrestore(&timelines->lock, flags); - intel_gt_sanitize(i915, false); + intel_gt_sanitize(gt, false); /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -911,49 +849,62 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) * the nop_submit_request on reset, we can do this from normal * context and do not require stop_machine(). */ - intel_engines_reset_default_submission(i915); + intel_engines_reset_default_submission(gt); GEM_TRACE("end\n"); smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ - clear_bit(I915_WEDGED, &i915->gpu_error.flags); + clear_bit(I915_WEDGED, >->reset.flags); return true; } -bool i915_gem_unset_wedged(struct drm_i915_private *i915) +bool intel_gt_unset_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; bool result; - mutex_lock(&error->wedge_mutex); - result = __i915_gem_unset_wedged(i915); - mutex_unlock(&error->wedge_mutex); + mutex_lock(>->reset.mutex); + result = __intel_gt_unset_wedged(gt); + mutex_unlock(>->reset.mutex); return result; } -static int do_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) +static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { int err, i; - gt_revoke(i915); + gt_revoke(gt); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = __intel_gt_reset(gt, ALL_ENGINES); for (i = 0; err && i < RESET_MAX_RETRIES; i++) { msleep(10 * (i + 1)); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = __intel_gt_reset(gt, ALL_ENGINES); } if (err) return err; - return gt_reset(i915, stalled_mask); + return gt_reset(gt, stalled_mask); +} + +static int resume(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + for_each_engine(engine, gt->i915, id) { + ret = engine->resume(engine); + if (ret) + return ret; + } + + return 0; } /** - * i915_reset - reset chip after a hang - * @i915: #drm_i915_private to reset + * intel_gt_reset - reset chip after a hang + * @gt: #intel_gt to reset * @stalled_mask: mask of the stalled engines with the guilty requests * @reason: user error message for why we are resetting * @@ -968,49 +919,50 @@ static int do_reset(struct drm_i915_private *i915, * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason) +void intel_gt_reset(struct intel_gt *gt, + intel_engine_mask_t stalled_mask, + const char *reason) { - struct i915_gpu_error *error = &i915->gpu_error; + intel_engine_mask_t awake; int ret; - GEM_TRACE("flags=%lx\n", error->flags); + GEM_TRACE("flags=%lx\n", gt->reset.flags); might_sleep(); - GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - mutex_lock(&error->wedge_mutex); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); + mutex_lock(>->reset.mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ - if (!__i915_gem_unset_wedged(i915)) + if (!__intel_gt_unset_wedged(gt)) goto unlock; if (reason) - dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); - error->reset_count++; + dev_notice(gt->i915->drm.dev, + "Resetting chip for %s\n", reason); + atomic_inc(>->i915->gpu_error.reset_count); - reset_prepare(i915); + awake = reset_prepare(gt); - if (!intel_has_gpu_reset(i915)) { + if (!intel_has_gpu_reset(gt->i915)) { if (i915_modparams.reset) - dev_err(i915->drm.dev, "GPU reset not supported\n"); + dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else DRM_DEBUG_DRIVER("GPU reset disabled\n"); goto error; } - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_disable_interrupts(i915); + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_runtime_pm_disable_interrupts(gt->i915); - if (do_reset(i915, stalled_mask)) { - dev_err(i915->drm.dev, "Failed to reset chip\n"); + if (do_reset(gt, stalled_mask)) { + dev_err(gt->i915->drm.dev, "Failed to reset chip\n"); goto taint; } - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_enable_interrupts(i915); + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_runtime_pm_enable_interrupts(gt->i915); - intel_overlay_reset(i915); + intel_overlay_reset(gt->i915); /* * Next we need to restore the context, but we don't use those @@ -1020,19 +972,23 @@ void i915_reset(struct drm_i915_private *i915, * was running at the time of the reset (i.e. we weren't VT * switched away). */ - ret = i915_gem_init_hw(i915); + ret = i915_gem_init_hw(gt->i915); if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); - goto error; + goto taint; } - i915_queue_hangcheck(i915); + ret = resume(gt); + if (ret) + goto taint; + + intel_gt_queue_hangcheck(gt); finish: - reset_finish(i915); + reset_finish(gt, awake); unlock: - mutex_unlock(&error->wedge_mutex); + mutex_unlock(>->reset.mutex); return; taint: @@ -1050,18 +1006,17 @@ taint: */ add_taint_for_CI(TAINT_WARN); error: - __i915_gem_set_wedged(i915); + __intel_gt_set_wedged(gt); goto finish; } -static inline int intel_gt_reset_engine(struct drm_i915_private *i915, - struct intel_engine_cs *engine) +static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) { - return intel_gpu_reset(i915, engine->mask); + return __intel_gt_reset(engine->gt, engine->mask); } /** - * i915_reset_engine - reset GPU engine to recover from a hang + * intel_engine_reset - reset GPU engine to recover from a hang * @engine: engine to reset * @msg: reason for GPU reset; or NULL for no dev_notice() * @@ -1073,15 +1028,15 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915, * - reset engine (which will force the engine to idle) * - re-init/configure engine */ -int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) +int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { - struct i915_gpu_error *error = &engine->i915->gpu_error; + struct intel_gt *gt = engine->gt; int ret; - GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); - GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); + GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_get_if_awake(engine)) return 0; reset_prepare_engine(engine); @@ -1089,16 +1044,16 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) if (msg) dev_notice(engine->i915->drm.dev, "Resetting %s for %s\n", engine->name, msg); - error->reset_engine_count[engine->id]++; + atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->i915->guc.execbuf_client) - ret = intel_gt_reset_engine(engine->i915, engine); + if (!engine->gt->uc.guc.execbuf_client) + ret = intel_gt_reset_engine(engine); else - ret = intel_guc_reset_engine(&engine->i915->guc, engine); + ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->i915->guc.execbuf_client ? "GuC " : "", + engine->gt->uc.guc.execbuf_client ? "GuC " : "", engine->name, ret); goto out; } @@ -1108,7 +1063,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - intel_engine_reset(engine, true); + __intel_engine_reset(engine, true); /* * The engine and its registers (and workarounds in case of render) @@ -1116,25 +1071,23 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * process to program RING_MODE, HWSP and re-enable submission. */ ret = engine->resume(engine); - if (ret) - goto out; out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); + intel_engine_pm_put(engine); return ret; } -static void i915_reset_device(struct drm_i915_private *i915, - u32 engine_mask, - const char *reason) +static void intel_gt_reset_global(struct intel_gt *gt, + u32 engine_mask, + const char *reason) { - struct i915_gpu_error *error = &i915->gpu_error; - struct kobject *kobj = &i915->drm.primary->kdev->kobj; + struct kobject *kobj = >->i915->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - struct i915_wedge_me w; + struct intel_wedge_me w; kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); @@ -1142,137 +1095,24 @@ static void i915_reset_device(struct drm_i915_private *i915, kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ - i915_wedge_on_timeout(&w, i915, 5 * HZ) { - intel_prepare_reset(i915); + intel_wedge_on_timeout(&w, gt, 5 * HZ) { + intel_prepare_reset(gt->i915); /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(&error->reset_backoff_srcu); + synchronize_srcu_expedited(>->reset.backoff_srcu); - i915_reset(i915, engine_mask, reason); + intel_gt_reset(gt, engine_mask, reason); - intel_finish_reset(i915); + intel_finish_reset(gt->i915); } - if (!test_bit(I915_WEDGED, &error->flags)) + if (!test_bit(I915_WEDGED, >->reset.flags)) kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } -static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) -{ - intel_uncore_rmw(uncore, reg, 0, 0); -} - -static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) -{ - GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); - GEN6_RING_FAULT_REG_POSTING_READ(engine); -} - -static void clear_error_registers(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) -{ - struct intel_uncore *uncore = &i915->uncore; - u32 eir; - - if (!IS_GEN(i915, 2)) - clear_register(uncore, PGTBL_ER); - - if (INTEL_GEN(i915) < 4) - clear_register(uncore, IPEIR(RENDER_RING_BASE)); - else - clear_register(uncore, IPEIR_I965); - - clear_register(uncore, EIR); - eir = intel_uncore_read(uncore, EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); - rmw_set(uncore, EMR, eir); - intel_uncore_write(uncore, GEN2_IIR, - I915_MASTER_ERROR_INTERRUPT); - } - - if (INTEL_GEN(i915) >= 8) { - rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); - intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); - } else if (INTEL_GEN(i915) >= 6) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine_masked(engine, i915, engine_mask, id) - gen8_clear_engine_error_register(engine); - } -} - -static void gen6_check_faults(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 fault; - - for_each_engine(engine, dev_priv, id) { - fault = GEN6_RING_FAULT_REG_READ(engine); - if (fault & RING_FAULT_VALID) { - DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08lx\n" - "\tAddress space: %s\n" - "\tSource ID: %d\n" - "\tType: %d\n", - fault & PAGE_MASK, - fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } - } -} - -static void gen8_check_faults(struct drm_i915_private *dev_priv) -{ - u32 fault = I915_READ(GEN8_RING_FAULT_REG); - - if (fault & RING_FAULT_VALID) { - u32 fault_data0, fault_data1; - u64 fault_addr; - - fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); - fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); - fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | - ((u64)fault_data0 << 12); - - DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), - lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } -} - -void i915_check_and_clear_faults(struct drm_i915_private *i915) -{ - /* From GEN8 onwards we only have one 'All Engine Fault Register' */ - if (INTEL_GEN(i915) >= 8) - gen8_check_faults(i915); - else if (INTEL_GEN(i915) >= 6) - gen6_check_faults(i915); - else - return; - - clear_error_registers(i915, ALL_ENGINES); -} - /** - * i915_handle_error - handle a gpu error - * @i915: i915 device private + * intel_gt_handle_error - handle a gpu error + * @gt: the intel_gt * @engine_mask: mask representing engines that are hung * @flags: control flags * @fmt: Error message format string @@ -1283,12 +1123,11 @@ void i915_check_and_clear_faults(struct drm_i915_private *i915) * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...) +void intel_gt_handle_error(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...) { - struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; intel_wakeref_t wakeref; intel_engine_mask_t tmp; @@ -1312,33 +1151,31 @@ void i915_handle_error(struct drm_i915_private *i915, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - engine_mask &= INTEL_INFO(i915)->engine_mask; + engine_mask &= INTEL_INFO(gt->i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(i915, engine_mask, msg); - clear_error_registers(i915, engine_mask); + i915_capture_error_state(gt->i915, engine_mask, msg); + intel_gt_clear_error_registers(gt, engine_mask); } /* * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(i915) && !__i915_wedged(error)) { - for_each_engine_masked(engine, i915, engine_mask, tmp) { + if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) + >->reset.flags)) continue; - if (i915_reset_engine(engine, msg) == 0) + if (intel_engine_reset(engine, msg) == 0) engine_mask &= ~engine->mask; - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - wake_up_bit(&error->flags, - I915_RESET_ENGINE + engine->id); + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags); } } @@ -1346,9 +1183,9 @@ void i915_handle_error(struct drm_i915_private *i915, goto out; /* Full reset needs the mutex, stop any other user trying to do so. */ - if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) { - wait_event(error->reset_queue, - !test_bit(I915_RESET_BACKOFF, &error->flags)); + if (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) { + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); goto out; /* piggy-back on the other reset */ } @@ -1356,113 +1193,119 @@ void i915_handle_error(struct drm_i915_private *i915, synchronize_rcu_expedited(); /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, i915, tmp) { + for_each_engine(engine, gt->i915, tmp) { while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) - wait_on_bit(&error->flags, + >->reset.flags)) + wait_on_bit(>->reset.flags, I915_RESET_ENGINE + engine->id, TASK_UNINTERRUPTIBLE); } - i915_reset_device(i915, engine_mask, msg); + intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, i915, tmp) { - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - } - - clear_bit(I915_RESET_BACKOFF, &error->flags); - wake_up_all(&error->reset_queue); + for_each_engine(engine, gt->i915, tmp) + clear_bit_unlock(I915_RESET_ENGINE + engine->id, + >->reset.flags); + clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); + smp_mb__after_atomic(); + wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } -int i915_reset_trylock(struct drm_i915_private *i915) +int intel_gt_reset_trylock(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; int srcu; - might_lock(&error->reset_backoff_srcu); + might_lock(>->reset.backoff_srcu); might_sleep(); rcu_read_lock(); - while (test_bit(I915_RESET_BACKOFF, &error->flags)) { + while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { rcu_read_unlock(); - if (wait_event_interruptible(error->reset_queue, + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, - &error->flags))) + >->reset.flags))) return -EINTR; rcu_read_lock(); } - srcu = srcu_read_lock(&error->reset_backoff_srcu); + srcu = srcu_read_lock(>->reset.backoff_srcu); rcu_read_unlock(); return srcu; } -void i915_reset_unlock(struct drm_i915_private *i915, int tag) -__releases(&i915->gpu_error.reset_backoff_srcu) +void intel_gt_reset_unlock(struct intel_gt *gt, int tag) +__releases(>->reset.backoff_srcu) { - struct i915_gpu_error *error = &i915->gpu_error; - - srcu_read_unlock(&error->reset_backoff_srcu, tag); + srcu_read_unlock(>->reset.backoff_srcu, tag); } -int i915_terminally_wedged(struct drm_i915_private *i915) +int intel_gt_terminally_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; - might_sleep(); - if (!__i915_wedged(error)) + if (!intel_gt_is_wedged(gt)) return 0; /* Reset still in progress? Maybe we will recover? */ - if (!test_bit(I915_RESET_BACKOFF, &error->flags)) + if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) return -EIO; /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(&i915->drm.struct_mutex)) + if (mutex_is_locked(>->i915->drm.struct_mutex)) return -EAGAIN; - if (wait_event_interruptible(error->reset_queue, + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, - &error->flags))) + >->reset.flags))) return -EINTR; - return __i915_wedged(error) ? -EIO : 0; + return intel_gt_is_wedged(gt) ? -EIO : 0; +} + +void intel_gt_init_reset(struct intel_gt *gt) +{ + init_waitqueue_head(>->reset.queue); + mutex_init(>->reset.mutex); + init_srcu_struct(>->reset.backoff_srcu); +} + +void intel_gt_fini_reset(struct intel_gt *gt) +{ + cleanup_srcu_struct(>->reset.backoff_srcu); } -static void i915_wedge_me(struct work_struct *work) +static void intel_wedge_me(struct work_struct *work) { - struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); + struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); - dev_err(w->i915->drm.dev, + dev_err(w->gt->i915->drm.dev, "%s timed out, cancelling all in-flight rendering.\n", w->name); - i915_gem_set_wedged(w->i915); + intel_gt_set_wedged(w->gt); } -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) +void __intel_init_wedge(struct intel_wedge_me *w, + struct intel_gt *gt, + long timeout, + const char *name) { - w->i915 = i915; + w->gt = gt; w->name = name; - INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); + INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); schedule_delayed_work(&w->work, timeout); } -void __i915_fini_wedge(struct i915_wedge_me *w) +void __intel_fini_wedge(struct intel_wedge_me *w) { cancel_delayed_work_sync(&w->work); destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; + w->gt = NULL; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 580ebdb59eca..37a987b17108 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -11,58 +11,67 @@ #include <linux/types.h> #include <linux/srcu.h> -#include "gt/intel_engine_types.h" +#include "intel_engine_types.h" +#include "intel_reset_types.h" struct drm_i915_private; struct i915_request; struct intel_engine_cs; +struct intel_gt; struct intel_guc; +void intel_gt_init_reset(struct intel_gt *gt); +void intel_gt_fini_reset(struct intel_gt *gt); + __printf(4, 5) -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...); +void intel_gt_handle_error(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) -void i915_check_and_clear_faults(struct drm_i915_private *i915); - -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason); -int i915_reset_engine(struct intel_engine_cs *engine, - const char *reason); - -void i915_reset_request(struct i915_request *rq, bool guilty); +void intel_gt_reset(struct intel_gt *gt, + intel_engine_mask_t stalled_mask, + const char *reason); +int intel_engine_reset(struct intel_engine_cs *engine, + const char *reason); -int __must_check i915_reset_trylock(struct drm_i915_private *i915); -void i915_reset_unlock(struct drm_i915_private *i915, int tag); +void __i915_request_reset(struct i915_request *rq, bool guilty); -int i915_terminally_wedged(struct drm_i915_private *i915); +int __must_check intel_gt_reset_trylock(struct intel_gt *gt); +void intel_gt_reset_unlock(struct intel_gt *gt, int tag); -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); +void intel_gt_set_wedged(struct intel_gt *gt); +bool intel_gt_unset_wedged(struct intel_gt *gt); +int intel_gt_terminally_wedged(struct intel_gt *gt); -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); +int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); -int intel_reset_guc(struct drm_i915_private *i915); +int intel_reset_guc(struct intel_gt *gt); -struct i915_wedge_me { +struct intel_wedge_me { struct delayed_work work; - struct drm_i915_private *i915; + struct intel_gt *gt; const char *name; }; -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name); -void __i915_fini_wedge(struct i915_wedge_me *w); +void __intel_init_wedge(struct intel_wedge_me *w, + struct intel_gt *gt, + long timeout, + const char *name); +void __intel_fini_wedge(struct intel_wedge_me *w); -#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __i915_fini_wedge((W))) +#define intel_wedge_on_timeout(W, GT, TIMEOUT) \ + for (__intel_init_wedge((W), (GT), (TIMEOUT), __func__); \ + (W)->gt; \ + __intel_fini_wedge((W))) + +static inline bool __intel_reset_failed(const struct intel_reset *reset) +{ + return unlikely(test_bit(I915_WEDGED, &reset->flags)); +} + +bool intel_has_gpu_reset(struct drm_i915_private *i915); +bool intel_has_reset_engine(struct drm_i915_private *i915); #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h new file mode 100644 index 000000000000..31968356e0c0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_RESET_TYPES_H_ +#define __INTEL_RESET_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/wait.h> +#include <linux/srcu.h> + +struct intel_reset { + /** + * flags: Control various stages of the GPU reset + * + * #I915_RESET_BACKOFF - When we start a global reset, we need to + * serialise with any other users attempting to do the same, and + * any global resources that may be clobber by the reset (such as + * FENCE registers). + * + * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to + * acquire the struct_mutex to reset an engine, we need an explicit + * flag to prevent two concurrent reset attempts in the same engine. + * As the number of engines continues to grow, allocate the flags from + * the most significant bits. + * + * #I915_WEDGED - If reset fails and we can no longer use the GPU, + * we set the #I915_WEDGED bit. Prior to command submission, e.g. + * i915_request_alloc(), this bit is checked and the sequence + * aborted (with -EIO reported to userspace) if set. + */ + unsigned long flags; +#define I915_RESET_BACKOFF 0 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 +#define I915_WEDGED (BITS_PER_LONG - 1) + + struct mutex mutex; /* serialises wedging/unwedging */ + + /** + * Waitqueue to signal when the reset has completed. Used by clients + * that wait for dev_priv->mm.wedged to settle. + */ + wait_queue_head_t queue; + + struct srcu_struct backoff_srcu; +}; + +#endif /* _INTEL_RESET_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c6023bc9452d..601c16239fdf 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -34,9 +34,11 @@ #include "gem/i915_gem_context.h" #include "i915_drv.h" -#include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" #include "intel_reset.h" #include "intel_workarounds.h" @@ -75,7 +77,8 @@ gen2_render_ring_flush(struct i915_request *rq, u32 mode) *cs++ = cmd; while (num_store_dw--) { *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = i915_scratch_offset(rq->i915); + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = 0; } *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; @@ -148,7 +151,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) */ if (mode & EMIT_INVALIDATE) { *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; @@ -156,7 +161,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) *cs++ = MI_FLUSH; *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; } @@ -208,7 +215,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) static int gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) { - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); u32 *cs; cs = intel_ring_begin(rq, 6); @@ -241,7 +250,9 @@ gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) static int gen6_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); u32 *cs, flags = 0; int ret; @@ -299,7 +310,9 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; /* Finally we can flush and with it emit the breadcrumb */ @@ -342,7 +355,9 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq) static int gen7_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); u32 *cs, flags = 0; /* @@ -623,7 +638,7 @@ static bool stop_ring(struct intel_engine_cs *engine) static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ring *ring = engine->buffer; + struct intel_ring *ring = engine->legacy.ring; int ret = 0; GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", @@ -631,6 +646,7 @@ static int xcs_resume(struct intel_engine_cs *engine) intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); + /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (!stop_ring(engine)) { /* G45 ring initialization often fails to reset head to zero */ DRM_DEBUG_DRIVER("%s head not reset to zero " @@ -662,19 +678,16 @@ static int xcs_resume(struct intel_engine_cs *engine) intel_engine_reset_breadcrumbs(engine); /* Enforce ordering by reading HEAD register back */ - ENGINE_READ(engine, RING_HEAD); + ENGINE_POSTING_READ(engine, RING_HEAD); - /* Initialize the ring. This must happen _after_ we've cleared the ring + /* + * Initialize the ring. This must happen _after_ we've cleared the ring * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring - * register values. */ + * register values. + */ ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma)); - /* WaClearRingBufHeadRegAtInit:ctg,elk */ - if (ENGINE_READ(engine, RING_HEAD)) - DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", - engine->name, ENGINE_READ(engine, RING_HEAD)); - /* Check that the ring offsets point within the ring! */ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); @@ -725,7 +738,45 @@ out: static void reset_prepare(struct intel_engine_cs *engine) { - intel_engine_stop_cs(engine); + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + GEM_TRACE("%s\n", engine->name); + + if (intel_engine_stop_cs(engine)) + GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); + + intel_uncore_write_fw(uncore, + RING_HEAD(base), + intel_uncore_read_fw(uncore, RING_TAIL(base))); + intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ + + intel_uncore_write_fw(uncore, RING_HEAD(base), 0); + intel_uncore_write_fw(uncore, RING_TAIL(base), 0); + intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); + + /* The ring must be empty before it is disabled */ + intel_uncore_write_fw(uncore, RING_CTL(base), 0); + + /* Check acts as a post */ + if (intel_uncore_read_fw(uncore, RING_HEAD(base))) + GEM_TRACE("%s: ring head [%x] not parked\n", + engine->name, + intel_uncore_read_fw(uncore, RING_HEAD(base))); } static void reset_ring(struct intel_engine_cs *engine, bool stalled) @@ -781,14 +832,14 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) * If the request was innocent, we try to replay the request * with the restored context. */ - i915_reset_request(rq, stalled); + __i915_request_reset(rq, stalled); - GEM_BUG_ON(rq->ring != engine->buffer); + GEM_BUG_ON(rq->ring != engine->legacy.ring); head = rq->head; } else { - head = engine->buffer->tail; + head = engine->legacy.ring->tail; } - engine->buffer->head = intel_ring_wrap(engine->buffer, head); + engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -797,21 +848,6 @@ static void reset_finish(struct intel_engine_cs *engine) { } -static int intel_rcs_ctx_init(struct i915_request *rq) -{ - int ret; - - ret = intel_engine_emit_ctx_wa(rq); - if (ret != 0) - return ret; - - ret = i915_gem_render_state_emit(rq); - if (ret) - return ret; - - return 0; -} - static int rcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -948,13 +984,13 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) static void gen5_irq_enable(struct intel_engine_cs *engine) { - gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); } static void gen5_irq_disable(struct intel_engine_cs *engine) { - gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); } static void @@ -1015,14 +1051,14 @@ gen6_irq_enable(struct intel_engine_cs *engine) /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ ENGINE_POSTING_READ(engine, RING_IMR); - gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); } static void gen6_irq_disable(struct intel_engine_cs *engine) { ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); } static void @@ -1033,14 +1069,14 @@ hsw_vebox_irq_enable(struct intel_engine_cs *engine) /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ ENGINE_POSTING_READ(engine, RING_IMR); - gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask); + gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask); } static void hsw_vebox_irq_disable(struct intel_engine_cs *engine) { ENGINE_WRITE(engine, RING_IMR, ~0); - gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask); + gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask); } static int @@ -1071,9 +1107,11 @@ i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_scratch_offset(rq->i915); + u32 *cs, cs_offset = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); - GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); + GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1100,7 +1138,7 @@ i830_emit_bb_start(struct i915_request *rq, * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; *cs++ = cs_offset; @@ -1149,16 +1187,12 @@ i915_emit_bb_start(struct i915_request *rq, int intel_ring_pin(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); unsigned int flags; void *addr; int ret; - GEM_BUG_ON(ring->vaddr); - - ret = i915_timeline_pin(ring->timeline); - if (ret) - return ret; + if (atomic_fetch_inc(&ring->pin_count)) + return 0; flags = PIN_GLOBAL; @@ -1172,33 +1206,35 @@ int intel_ring_pin(struct intel_ring *ring) ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - goto unpin_timeline; + goto err_unpin; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, map); + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); - goto unpin_ring; + goto err_ring; } - vma->obj->pin_global++; + i915_vma_make_unshrinkable(vma); + GEM_BUG_ON(ring->vaddr); ring->vaddr = addr; + return 0; -unpin_ring: +err_ring: i915_vma_unpin(vma); -unpin_timeline: - i915_timeline_unpin(ring->timeline); +err_unpin: + atomic_dec(&ring->pin_count); return ret; } void intel_ring_reset(struct intel_ring *ring, u32 tail) { - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - + tail = intel_ring_wrap(ring, tail); ring->tail = tail; ring->head = tail; ring->emit = tail; @@ -1207,34 +1243,37 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_unpin(struct intel_ring *ring) { - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); + struct i915_vma *vma = ring->vma; + + if (!atomic_dec_and_test(&ring->pin_count)) + return; /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->tail); + intel_ring_reset(ring, ring->emit); - if (i915_vma_is_map_and_fenceable(ring->vma)) - i915_vma_unpin_iomap(ring->vma); + i915_vma_unset_ggtt_write(vma); + if (i915_vma_is_map_and_fenceable(vma)) + i915_vma_unpin_iomap(vma); else - i915_gem_object_unpin_map(ring->vma->obj); - ring->vaddr = NULL; + i915_gem_object_unpin_map(vma->obj); - ring->vma->obj->pin_global--; - i915_vma_unpin(ring->vma); + GEM_BUG_ON(!ring->vaddr); + ring->vaddr = NULL; - i915_timeline_unpin(ring->timeline); + i915_vma_unpin(vma); + i915_vma_make_purgeable(vma); } -static struct i915_vma * -intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) { - struct i915_address_space *vm = &dev_priv->ggtt.vm; + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create_stolen(dev_priv, size); + obj = i915_gem_object_create_stolen(i915, size); if (!obj) - obj = i915_gem_object_create_internal(dev_priv, size); + obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1257,10 +1296,9 @@ err: } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, - int size) +intel_engine_create_ring(struct intel_engine_cs *engine, int size) { + struct drm_i915_private *i915 = engine->i915; struct intel_ring *ring; struct i915_vma *vma; @@ -1272,8 +1310,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, return ERR_PTR(-ENOMEM); kref_init(&ring->ref); - INIT_LIST_HEAD(&ring->request_list); - ring->timeline = i915_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1281,12 +1317,12 @@ intel_engine_create_ring(struct intel_engine_cs *engine, * of the buffer. */ ring->effective_size = size; - if (IS_I830(engine->i915) || IS_I845G(engine->i915)) + if (IS_I830(i915) || IS_I845G(i915)) ring->effective_size -= 2 * CACHELINE_BYTES; intel_ring_update_space(ring); - vma = intel_ring_create_vma(engine->i915, size); + vma = create_ring_vma(engine->gt->ggtt, size); if (IS_ERR(vma)) { kfree(ring); return ERR_CAST(vma); @@ -1303,13 +1339,11 @@ void intel_ring_free(struct kref *ref) i915_vma_close(ring->vma); i915_vma_put(ring->vma); - i915_timeline_put(ring->timeline); kfree(ring); } static void __ring_context_fini(struct intel_context *ce) { - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); i915_gem_object_put(ce->state->obj); } @@ -1322,33 +1356,45 @@ static void ring_context_destroy(struct kref *ref) if (ce->state) __ring_context_fini(ce); + intel_context_fini(ce); intel_context_free(ce); } -static int __context_pin_ppgtt(struct i915_gem_context *ctx) +static struct i915_address_space *vm_alias(struct intel_context *ce) +{ + struct i915_address_space *vm; + + vm = ce->vm; + if (i915_is_ggtt(vm)) + vm = &i915_vm_to_ggtt(vm)->alias->vm; + + return vm; +} + +static int __context_pin_ppgtt(struct intel_context *ce) { struct i915_address_space *vm; int err = 0; - vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm; + vm = vm_alias(ce); if (vm) err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm))); return err; } -static void __context_unpin_ppgtt(struct i915_gem_context *ctx) +static void __context_unpin_ppgtt(struct intel_context *ce) { struct i915_address_space *vm; - vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm; + vm = vm_alias(ce); if (vm) gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); } static void ring_context_unpin(struct intel_context *ce) { - __context_unpin_ppgtt(ce->gem_context); + __context_unpin_ppgtt(ce); } static struct i915_vma * @@ -1404,7 +1450,7 @@ alloc_context_vma(struct intel_engine_cs *engine) i915_gem_object_unpin_map(obj); } - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -1419,16 +1465,17 @@ err_obj: return ERR_PTR(err); } -static int ring_context_pin(struct intel_context *ce) +static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - int err; /* One ringbuffer to rule them all */ - GEM_BUG_ON(!engine->buffer); - ce->ring = engine->buffer; + GEM_BUG_ON(!engine->legacy.ring); + ce->ring = engine->legacy.ring; + ce->timeline = intel_timeline_get(engine->legacy.timeline); - if (!ce->state && engine->context_size) { + GEM_BUG_ON(ce->state); + if (engine->context_size) { struct i915_vma *vma; vma = alloc_context_vma(engine); @@ -1438,11 +1485,18 @@ static int ring_context_pin(struct intel_context *ce) ce->state = vma; } - err = intel_context_active_acquire(ce, PIN_HIGH); + return 0; +} + +static int ring_context_pin(struct intel_context *ce) +{ + int err; + + err = intel_context_active_acquire(ce); if (err) return err; - err = __context_pin_ppgtt(ce->gem_context); + err = __context_pin_ppgtt(ce); if (err) goto err_active; @@ -1459,6 +1513,8 @@ static void ring_context_reset(struct intel_context *ce) } static const struct intel_context_ops ring_context_ops = { + .alloc = ring_context_alloc, + .pin = ring_context_pin, .unpin = ring_context_unpin, @@ -1484,7 +1540,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = ppgtt->pd->base.ggtt_offset << 10; + *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; intel_ring_advance(rq, cs); @@ -1503,7 +1559,8 @@ static int flush_pd_dir(struct i915_request *rq) /* Stall until the page table load is complete */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = i915_scratch_offset(rq->i915); + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; intel_ring_advance(rq, cs); @@ -1619,7 +1676,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_scratch_offset(rq->i915); + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1632,7 +1690,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) return 0; } -static int remap_l3(struct i915_request *rq, int slice) +static int remap_l3_slice(struct i915_request *rq, int slice) { u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; int i; @@ -1660,15 +1718,34 @@ static int remap_l3(struct i915_request *rq, int slice) return 0; } +static int remap_l3(struct i915_request *rq) +{ + struct i915_gem_context *ctx = rq->gem_context; + int i, err; + + if (!ctx->remap_slice) + return 0; + + for (i = 0; i < MAX_L3_SLICES; i++) { + if (!(ctx->remap_slice & BIT(i))) + continue; + + err = remap_l3_slice(rq, i); + if (err) + return err; + } + + ctx->remap_slice = 0; + return 0; +} + static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *ctx = rq->gem_context; - struct i915_address_space *vm = - ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm; + struct i915_address_space *vm = vm_alias(rq->hw_context); unsigned int unwind_mm = 0; u32 hw_flags = 0; - int ret, i; + int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); @@ -1712,7 +1789,7 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ - if (i915_gem_context_is_kernel(ctx)) + if (i915_gem_context_is_kernel(rq->gem_context)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); @@ -1746,18 +1823,9 @@ static int switch_context(struct i915_request *rq) goto err_mm; } - if (ctx->remap_slice) { - for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(ctx->remap_slice & BIT(i))) - continue; - - ret = remap_l3(rq, i); - if (ret) - goto err_mm; - } - - ctx->remap_slice = 0; - } + ret = remap_l3(rq); + if (ret) + goto err_mm; return 0; @@ -1795,7 +1863,10 @@ static int ring_request_alloc(struct i915_request *request) return 0; } -static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) +static noinline int +wait_for_space(struct intel_ring *ring, + struct intel_timeline *tl, + unsigned int bytes) { struct i915_request *target; long timeout; @@ -1803,15 +1874,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (intel_ring_update_space(ring) >= bytes) return 0; - GEM_BUG_ON(list_empty(&ring->request_list)); - list_for_each_entry(target, &ring->request_list, ring_link) { + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, ring->emit, ring->size)) break; } - if (WARN_ON(&target->ring_link == &ring->request_list)) + if (GEM_WARN_ON(&target->link == &tl->requests)) return -ENOSPC; timeout = i915_request_wait(target, @@ -1878,7 +1952,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) */ GEM_BUG_ON(!rq->reserved_space); - ret = wait_for_space(ring, total_bytes); + ret = wait_for_space(ring, rq->timeline, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } @@ -2081,10 +2155,14 @@ static void ring_destroy(struct intel_engine_cs *engine) WARN_ON(INTEL_GEN(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - intel_ring_unpin(engine->buffer); - intel_ring_put(engine->buffer); - intel_engine_cleanup_common(engine); + + intel_ring_unpin(engine->legacy.ring); + intel_ring_put(engine->legacy.ring); + + intel_timeline_unpin(engine->legacy.timeline); + intel_timeline_put(engine->legacy.timeline); + kfree(engine); } @@ -2157,11 +2235,9 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (INTEL_GEN(i915) >= 7) { - engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; } else if (IS_GEN(i915, 6)) { - engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; } else if (IS_GEN(i915, 5)) { @@ -2258,43 +2334,51 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) int intel_ring_submission_init(struct intel_engine_cs *engine) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct intel_ring *ring; int err; - timeline = i915_timeline_create(engine->i915, engine->status_page.vma); + timeline = intel_timeline_create(engine->gt, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; } GEM_BUG_ON(timeline->has_initial_breadcrumb); - ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); - i915_timeline_put(timeline); + err = intel_timeline_pin(timeline); + if (err) + goto err_timeline; + + ring = intel_engine_create_ring(engine, SZ_16K); if (IS_ERR(ring)) { err = PTR_ERR(ring); - goto err; + goto err_timeline_unpin; } err = intel_ring_pin(ring); if (err) goto err_ring; - GEM_BUG_ON(engine->buffer); - engine->buffer = ring; + GEM_BUG_ON(engine->legacy.ring); + engine->legacy.ring = ring; + engine->legacy.timeline = timeline; err = intel_engine_init_common(engine); if (err) - goto err_unpin; + goto err_ring_unpin; - GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); return 0; -err_unpin: +err_ring_unpin: intel_ring_unpin(ring); err_ring: intel_ring_put(ring); +err_timeline_unpin: + intel_timeline_unpin(timeline); +err_timeline: + intel_timeline_put(timeline); err: intel_engine_cleanup_common(engine); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index a0756f006f5f..6bf2d87da109 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -49,7 +49,7 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, * cases which disable slices for functional, apart for performance * reasons. So in this case we select a known stable subset. */ - if (!i915->perf.oa.exclusive_stream) { + if (!i915->perf.exclusive_stream) { ctx_sseu = *req_sseu; } else { ctx_sseu = intel_sseu_from_device_info(sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c new file mode 100644 index 000000000000..9cb01d9828f1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -0,0 +1,563 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "gt/intel_gt_types.h" + +#include "i915_drv.h" + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline.h" + +#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) +#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) + +struct intel_timeline_hwsp { + struct intel_gt *gt; + struct intel_gt_timelines *gt_timelines; + struct list_head free_link; + struct i915_vma *vma; + u64 free_bitmap; +}; + +struct intel_timeline_cacheline { + struct i915_active active; + struct intel_timeline_hwsp *hwsp; + void *vaddr; +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS +}; + +static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static struct i915_vma * +hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) +{ + struct intel_gt_timelines *gt = &timeline->gt->timelines; + struct intel_timeline_hwsp *hwsp; + + BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); + + spin_lock_irq(>->hwsp_lock); + + /* hwsp_free_list only contains HWSP that have available cachelines */ + hwsp = list_first_entry_or_null(>->hwsp_free_list, + typeof(*hwsp), free_link); + if (!hwsp) { + struct i915_vma *vma; + + spin_unlock_irq(>->hwsp_lock); + + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); + if (!hwsp) + return ERR_PTR(-ENOMEM); + + vma = __hwsp_alloc(timeline->gt); + if (IS_ERR(vma)) { + kfree(hwsp); + return vma; + } + + vma->private = hwsp; + hwsp->gt = timeline->gt; + hwsp->vma = vma; + hwsp->free_bitmap = ~0ull; + hwsp->gt_timelines = gt; + + spin_lock_irq(>->hwsp_lock); + list_add(&hwsp->free_link, >->hwsp_free_list); + } + + GEM_BUG_ON(!hwsp->free_bitmap); + *cacheline = __ffs64(hwsp->free_bitmap); + hwsp->free_bitmap &= ~BIT_ULL(*cacheline); + if (!hwsp->free_bitmap) + list_del(&hwsp->free_link); + + spin_unlock_irq(>->hwsp_lock); + + GEM_BUG_ON(hwsp->vma->private != hwsp); + return hwsp->vma; +} + +static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) +{ + struct intel_gt_timelines *gt = hwsp->gt_timelines; + unsigned long flags; + + spin_lock_irqsave(>->hwsp_lock, flags); + + /* As a cacheline becomes available, publish the HWSP on the freelist */ + if (!hwsp->free_bitmap) + list_add_tail(&hwsp->free_link, >->hwsp_free_list); + + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); + hwsp->free_bitmap |= BIT_ULL(cacheline); + + /* And if no one is left using it, give the page back to the system */ + if (hwsp->free_bitmap == ~0ull) { + i915_vma_put(hwsp->vma); + list_del(&hwsp->free_link); + kfree(hwsp); + } + + spin_unlock_irqrestore(>->hwsp_lock, flags); +} + +static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) +{ + GEM_BUG_ON(!i915_active_is_idle(&cl->active)); + + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + i915_vma_put(cl->hwsp->vma); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + + i915_active_fini(&cl->active); + kfree(cl); +} + +static void __cacheline_retire(struct i915_active *active) +{ + struct intel_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + i915_vma_unpin(cl->hwsp->vma); + if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) + __idle_cacheline_free(cl); +} + +static int __cacheline_active(struct i915_active *active) +{ + struct intel_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + __i915_vma_pin(cl->hwsp->vma); + return 0; +} + +static struct intel_timeline_cacheline * +cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) +{ + struct intel_timeline_cacheline *cl; + void *vaddr; + + GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); + + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (!cl) + return ERR_PTR(-ENOMEM); + + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + kfree(cl); + return ERR_CAST(vaddr); + } + + i915_vma_get(hwsp->vma); + cl->hwsp = hwsp; + cl->vaddr = page_pack_bits(vaddr, cacheline); + + i915_active_init(hwsp->gt->i915, &cl->active, + __cacheline_active, __cacheline_retire); + + return cl; +} + +static void cacheline_acquire(struct intel_timeline_cacheline *cl) +{ + if (cl) + i915_active_acquire(&cl->active); +} + +static void cacheline_release(struct intel_timeline_cacheline *cl) +{ + if (cl) + i915_active_release(&cl->active); +} + +static void cacheline_free(struct intel_timeline_cacheline *cl) +{ + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); + cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); + + if (i915_active_is_idle(&cl->active)) + __idle_cacheline_free(cl); +} + +int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) +{ + void *vaddr; + + kref_init(&timeline->kref); + atomic_set(&timeline->pin_count, 0); + + timeline->gt = gt; + + timeline->has_initial_breadcrumb = !hwsp; + timeline->hwsp_cacheline = NULL; + + if (!hwsp) { + struct intel_timeline_cacheline *cl; + unsigned int cacheline; + + hwsp = hwsp_alloc(timeline, &cacheline); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + + cl = cacheline_alloc(hwsp->private, cacheline); + if (IS_ERR(cl)) { + __idle_hwsp_free(hwsp->private, cacheline); + return PTR_ERR(cl); + } + + timeline->hwsp_cacheline = cl; + timeline->hwsp_offset = cacheline * CACHELINE_BYTES; + + vaddr = page_mask_bits(cl->vaddr); + } else { + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + } + + timeline->hwsp_seqno = + memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); + + timeline->hwsp_ggtt = i915_vma_get(hwsp); + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); + + timeline->fence_context = dma_fence_context_alloc(1); + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + + return 0; +} + +static void timelines_init(struct intel_gt *gt) +{ + struct intel_gt_timelines *timelines = >->timelines; + + spin_lock_init(&timelines->lock); + INIT_LIST_HEAD(&timelines->active_list); + + spin_lock_init(&timelines->hwsp_lock); + INIT_LIST_HEAD(&timelines->hwsp_free_list); +} + +void intel_timelines_init(struct drm_i915_private *i915) +{ + timelines_init(&i915->gt); +} + +void intel_timeline_fini(struct intel_timeline *timeline) +{ + GEM_BUG_ON(atomic_read(&timeline->pin_count)); + GEM_BUG_ON(!list_empty(&timeline->requests)); + + if (timeline->hwsp_cacheline) + cacheline_free(timeline->hwsp_cacheline); + else + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + + i915_vma_put(timeline->hwsp_ggtt); +} + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) +{ + struct intel_timeline *timeline; + int err; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + err = intel_timeline_init(timeline, gt, global_hwsp); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + + return timeline; +} + +int intel_timeline_pin(struct intel_timeline *tl) +{ + int err; + + if (atomic_add_unless(&tl->pin_count, 1, 0)) + return 0; + + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + return err; + + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + + cacheline_acquire(tl->hwsp_cacheline); + if (atomic_fetch_inc(&tl->pin_count)) { + cacheline_release(tl->hwsp_cacheline); + __i915_vma_unpin(tl->hwsp_ggtt); + } + + return 0; +} + +void intel_timeline_enter(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + unsigned long flags; + + lockdep_assert_held(&tl->mutex); + + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + if (tl->active_count++) + return; + GEM_BUG_ON(!tl->active_count); /* overflow? */ + + spin_lock_irqsave(&timelines->lock, flags); + list_add(&tl->link, &timelines->active_list); + spin_unlock_irqrestore(&timelines->lock, flags); +} + +void intel_timeline_exit(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + unsigned long flags; + + lockdep_assert_held(&tl->mutex); + + GEM_BUG_ON(!tl->active_count); + if (--tl->active_count) + return; + + spin_lock_irqsave(&timelines->lock, flags); + list_del(&tl->link); + spin_unlock_irqrestore(&timelines->lock, flags); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); +} + +static u32 timeline_advance(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + + return tl->seqno += 1 + tl->has_initial_breadcrumb; +} + +static void timeline_rollback(struct intel_timeline *tl) +{ + tl->seqno -= 1 + tl->has_initial_breadcrumb; +} + +static noinline int +__intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + struct intel_timeline_cacheline *cl; + unsigned int cacheline; + struct i915_vma *vma; + void *vaddr; + int err; + + /* + * If there is an outstanding GPU reference to this cacheline, + * such as it being sampled by a HW semaphore on another timeline, + * we cannot wraparound our seqno value (the HW semaphore does + * a strict greater-than-or-equals compare, not i915_seqno_passed). + * So if the cacheline is still busy, we must detach ourselves + * from it and leave it inflight alongside its users. + * + * However, if nobody is watching and we can guarantee that nobody + * will, we could simply reuse the same cacheline. + * + * if (i915_active_request_is_signaled(&tl->last_request) && + * i915_active_is_signaled(&tl->hwsp_cacheline->active)) + * return 0; + * + * That seems unlikely for a busy timeline that needed to wrap in + * the first place, so just replace the cacheline. + */ + + vma = hwsp_alloc(tl, &cacheline); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_rollback; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) { + __idle_hwsp_free(vma->private, cacheline); + goto err_rollback; + } + + cl = cacheline_alloc(vma->private, cacheline); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + __idle_hwsp_free(vma->private, cacheline); + goto err_unpin; + } + GEM_BUG_ON(cl->hwsp->vma != vma); + + /* + * Attach the old cacheline to the current request, so that we only + * free it after the current request is retired, which ensures that + * all writes into the cacheline from previous requests are complete. + */ + err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq); + if (err) + goto err_cacheline; + + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ + cacheline_free(tl->hwsp_cacheline); + + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ + i915_vma_put(tl->hwsp_ggtt); + + tl->hwsp_ggtt = i915_vma_get(vma); + + vaddr = page_mask_bits(cl->vaddr); + tl->hwsp_offset = cacheline * CACHELINE_BYTES; + tl->hwsp_seqno = + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + + tl->hwsp_offset += i915_ggtt_offset(vma); + + cacheline_acquire(cl); + tl->hwsp_cacheline = cl; + + *seqno = timeline_advance(tl); + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); + return 0; + +err_cacheline: + cacheline_free(cl); +err_unpin: + i915_vma_unpin(vma); +err_rollback: + timeline_rollback(tl); + return err; +} + +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + *seqno = timeline_advance(tl); + + /* Replace the HWSP on wraparound for HW semaphores */ + if (unlikely(!*seqno && tl->hwsp_cacheline)) + return __intel_timeline_get_seqno(tl, rq, seqno); + + return 0; +} + +static int cacheline_ref(struct intel_timeline_cacheline *cl, + struct i915_request *rq) +{ + return i915_active_ref(&cl->active, rq->timeline, rq); +} + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) +{ + struct intel_timeline_cacheline *cl = from->hwsp_cacheline; + struct intel_timeline *tl = from->timeline; + int err; + + GEM_BUG_ON(to->timeline == tl); + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = i915_request_completed(from); + if (!err) + err = cacheline_ref(cl, to); + if (!err) { + if (likely(cl == tl->hwsp_cacheline)) { + *hwsp = tl->hwsp_offset; + } else { /* across a seqno wrap, recover the original offset */ + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * + CACHELINE_BYTES; + } + } + mutex_unlock(&tl->mutex); + + return err; +} + +void intel_timeline_unpin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + if (!atomic_dec_and_test(&tl->pin_count)) + return; + + cacheline_release(tl->hwsp_cacheline); + + __i915_vma_unpin(tl->hwsp_ggtt); +} + +void __intel_timeline_free(struct kref *kref) +{ + struct intel_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + intel_timeline_fini(timeline); + kfree(timeline); +} + +static void timelines_fini(struct intel_gt *gt) +{ + struct intel_gt_timelines *timelines = >->timelines; + + GEM_BUG_ON(!list_empty(&timelines->active_list)); + GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); +} + +void intel_timelines_fini(struct drm_i915_private *i915) +{ + timelines_fini(&i915->gt); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "gt/selftests/mock_timeline.c" +#include "gt/selftest_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h new file mode 100644 index 000000000000..f583af1ba18d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -0,0 +1,94 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H + +#include <linux/lockdep.h> + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline_types.h" + +int intel_timeline_init(struct intel_timeline *tl, + struct intel_gt *gt, + struct i915_vma *hwsp); +void intel_timeline_fini(struct intel_timeline *tl); + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); + +static inline struct intel_timeline * +intel_timeline_get(struct intel_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __intel_timeline_free(struct kref *kref); +static inline void intel_timeline_put(struct intel_timeline *timeline) +{ + kref_put(&timeline->kref, __intel_timeline_free); +} + +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + +int intel_timeline_pin(struct intel_timeline *tl); +void intel_timeline_enter(struct intel_timeline *tl); +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno); +void intel_timeline_exit(struct intel_timeline *tl); +void intel_timeline_unpin(struct intel_timeline *tl); + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *until, + u32 *hwsp_offset); + +void intel_timelines_init(struct drm_i915_private *i915); +void intel_timelines_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h new file mode 100644 index 000000000000..2b1baf2fcc8e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_TIMELINE_TYPES_H__ +#define __I915_TIMELINE_TYPES_H__ + +#include <linux/list.h> +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/types.h> + +#include "i915_active_types.h" + +struct drm_i915_private; +struct i915_vma; +struct intel_timeline_cacheline; +struct i915_syncmap; + +struct intel_timeline { + u64 fence_context; + u32 seqno; + + struct mutex mutex; /* protects the flow of requests */ + + /* + * pin_count and active_count track essentially the same thing: + * How many requests are in flight or may be under construction. + * + * We need two distinct counters so that we can assign different + * lifetimes to the events for different use-cases. For example, + * we want to permanently keep the timeline pinned for the kernel + * context so that we can issue requests at any time without having + * to acquire space in the GGTT. However, we want to keep tracking + * the activity (to be able to detect when we become idle) along that + * permanently pinned timeline and so end up requiring two counters. + * + * Note that the active_count is protected by the intel_timeline.mutex, + * but the pin_count is protected by a combination of serialisation + * from the intel_context caller plus internal atomicity. + */ + atomic_t pin_count; + unsigned int active_count; + + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + struct intel_timeline_cacheline *hwsp_cacheline; + + bool has_initial_breadcrumb; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_active_request_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_active_request last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + + struct list_head link; + struct intel_gt *gt; + + struct kref kref; +}; + +#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 15e90fd2cfdc..45481eb1fa3c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_context.h" +#include "intel_gt.h" #include "intel_workarounds.h" /** @@ -49,9 +50,10 @@ * - Public functions to init or apply the given workaround type. */ -static void wa_init_start(struct i915_wa_list *wal, const char *name) +static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) { wal->name = name; + wal->engine_name = engine_name; } #define WA_LIST_CHUNK (1 << 4) @@ -73,8 +75,8 @@ static void wa_init_finish(struct i915_wa_list *wal) if (!wal->count) return; - DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", - wal->wa_count, wal->name); + DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", + wal->wa_count, wal->name, wal->engine_name); } static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) @@ -175,19 +177,6 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } -static void -ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val, - /* Bonkers HW, skip verifying */ - }; - - _wa_add(wal, &wa); -} - #define WA_SET_BIT_MASKED(addr, mask) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) @@ -308,11 +297,6 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, @@ -536,12 +520,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL); - /* WaDisableBankHangMode:icl */ - wa_write(wal, - GEN8_L3CNTLREG, - intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | - GEN8_ERRDETBCTRL); - /* Wa_1604370585:icl (pre-prod) * Formerly known as WaPushConstantDereferenceHoldDisable */ @@ -586,6 +564,11 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN11_SAMPLER_ENABLE_HEADLESS_MSG); } +static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -596,9 +579,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) return; - wa_init_start(wal, name); + wa_init_start(wal, name, engine->name); - if (IS_GEN(i915, 11)) + if (IS_GEN(i915, 12)) + tgl_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 11)) icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) cnl_ctx_workarounds_init(engine, wal); @@ -766,7 +751,10 @@ static void wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - u32 mcr_slice_subslice_mask; + unsigned int slice, subslice; + u32 l3_en, mcr, mcr_mask; + + GEM_BUG_ON(INTEL_GEN(i915) < 10); /* * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl @@ -774,42 +762,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * the case, we might need to program MCR select to a valid L3Bank * by default, to make sure we correctly read certain registers * later on (in the range 0xB100 - 0xB3FF). - * This might be incompatible with - * WaProgramMgsrForCorrectSliceSpecificMmioReads. - * Fortunately, this should not happen in production hardware, so - * we only assert that this is the case (instead of implementing - * something more complex that requires checking the range of every - * MMIO read). - */ - if (INTEL_GEN(i915) >= 10 && - is_power_of_2(sseu->slice_mask)) { - /* - * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches - * enabled subslice, no need to redirect MCR packet - */ - u32 slice = fls(sseu->slice_mask); - u32 fuse3 = - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); - u8 ss_mask = sseu->subslice_mask[slice]; - - u8 enabled_mask = (ss_mask | ss_mask >> - GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; - u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; - - /* - * Production silicon should have matched L3Bank and - * subslice enabled - */ - WARN_ON((enabled_mask & disabled_mask) != enabled_mask); - } - - if (INTEL_GEN(i915) >= 11) - mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | - GEN11_MCR_SUBSLICE_MASK; - else - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; - /* + * * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl * Before any MMIO read into slice/subslice specific registers, MCR * packet control register needs to be programmed to point to any @@ -819,11 +772,51 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * are consistent across s/ss in almost all cases. In the rare * occasions, such as INSTDONE, where this value is dependent * on s/ss combo, the read should be done with read_subslice_reg. + * + * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both + * to which subslice, or to which L3 bank, the respective mmio reads + * will go, we have to find a common index which works for both + * accesses. + * + * Case where we cannot find a common index fortunately should not + * happen in production hardware, so we only emit a warning instead of + * implementing something more complex that requires checking the range + * of every MMIO read. */ - wa_write_masked_or(wal, - GEN8_MCR_SELECTOR, - mcr_slice_subslice_mask, - intel_calculate_mcr_s_ss_select(i915)); + + if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { + u32 l3_fuse = + intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & + GEN10_L3BANK_MASK; + + DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse); + l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); + } else { + l3_en = ~0; + } + + slice = fls(sseu->slice_mask) - 1; + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = fls(l3_en & sseu->subslice_mask[slice]); + if (!subslice) { + DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", + sseu->subslice_mask[slice], l3_en); + subslice = fls(l3_en); + WARN_ON(!subslice); + } + subslice--; + + if (INTEL_GEN(i915) >= 11) { + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + } else { + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + } + + DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr); + + wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); } static void @@ -900,9 +893,16 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void +tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ +} + +static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_GEN(i915, 11)) + if (IS_GEN(i915, 12)) + tgl_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 11)) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) cnl_gt_workarounds_init(i915, wal); @@ -926,7 +926,7 @@ void intel_gt_init_workarounds(struct drm_i915_private *i915) { struct i915_wa_list *wal = &i915->gt_wa_list; - wa_init_start(wal, "GT"); + wa_init_start(wal, "GT", "global"); gt_init_workarounds(i915, wal); wa_init_finish(wal); } @@ -990,9 +990,9 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) spin_unlock_irqrestore(&uncore->lock, flags); } -void intel_gt_apply_workarounds(struct drm_i915_private *i915) +void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(&i915->uncore, &i915->gt_wa_list); + wa_list_apply(gt->uncore, >->i915->gt_wa_list); } static bool wa_list_verify(struct intel_uncore *uncore, @@ -1011,10 +1011,23 @@ static bool wa_list_verify(struct intel_uncore *uncore, return ok; } -bool intel_gt_verify_workarounds(struct drm_i915_private *i915, - const char *from) +bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); + return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); +} + +static inline bool is_nonpriv_flags_valid(u32 flags) +{ + /* Check only valid flag bits are set */ + if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) + return false; + + /* NB: Only 3 out of 4 enum values are valid for access field */ + if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == + RING_FORCE_TO_NONPRIV_ACCESS_INVALID) + return false; + + return true; } static void @@ -1027,6 +1040,9 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) return; + if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) + return; + wa.reg.reg |= flags; _wa_add(wal, &wa); } @@ -1034,7 +1050,7 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) static void whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) { - whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW); + whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); } static void gen9_whitelist_build(struct i915_wa_list *w) @@ -1098,10 +1114,25 @@ static void glk_whitelist_build(struct intel_engine_cs *engine) static void cfl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + if (engine->class != RENDER_CLASS) return; - gen9_whitelist_build(&engine->whitelist); + gen9_whitelist_build(w); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); } static void cnl_whitelist_build(struct intel_engine_cs *engine) @@ -1129,18 +1160,31 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); break; case VIDEO_DECODE_CLASS: /* hucStatusRegOffset */ whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), - RING_FORCE_TO_NONPRIV_RD); + RING_FORCE_TO_NONPRIV_ACCESS_RD); /* hucUKernelHdrInfoRegOffset */ whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), - RING_FORCE_TO_NONPRIV_RD); + RING_FORCE_TO_NONPRIV_ACCESS_RD); /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), - RING_FORCE_TO_NONPRIV_RD); + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; default: @@ -1148,14 +1192,20 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) } } +static void tgl_whitelist_build(struct intel_engine_cs *engine) +{ +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - wa_init_start(w, "whitelist"); + wa_init_start(w, "whitelist", engine->name); - if (IS_GEN(i915, 11)) + if (IS_GEN(i915, 12)) + tgl_whitelist_build(engine); + else if (IS_GEN(i915, 11)) icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) cnl_whitelist_build(engine); @@ -1212,10 +1262,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); /* WaPipelineFlushCoherentLines:icl */ - ignore_wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES, - GEN8_LQSC_FLUSH_COHERENT_LINES); + wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* * Wa_1405543622:icl @@ -1242,10 +1291,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - ignore_wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* WaForwardProgressSoftReset:icl */ wa_write_or(wal, @@ -1258,8 +1306,18 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH | - GEN7_DISABLE_SAMPLER_PREFETCH); + GEN7_DISABLE_DEMAND_PREFETCH); + + /* Wa_1606682166:icl */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); + + /* Wa_1409178092:icl */ + wa_write_masked_or(wal, + GEN11_SCRATCH2, + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, + 0); } if (IS_GEN_RANGE(i915, 9, 11)) { @@ -1328,7 +1386,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) return; - if (engine->id == RCS0) + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else xcs_engine_wa_init(engine, wal); @@ -1338,10 +1396,10 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8)) + if (INTEL_GEN(engine->i915) < 8) return; - wa_init_start(wal, engine->name); + wa_init_start(wal, "engine", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); } @@ -1384,26 +1442,50 @@ err_obj: return ERR_PTR(err); } +static bool mcr_range(struct drm_i915_private *i915, u32 offset) +{ + /* + * Registers in this range are affected by the MCR selector + * which only controls CPU initiated MMIO. Routing does not + * work for CS access so we cannot verify them on this path. + */ + if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff)) + return true; + + return false; +} + static int wa_list_srm(struct i915_request *rq, const struct i915_wa_list *wal, struct i915_vma *vma) { + struct drm_i915_private *i915 = rq->i915; + unsigned int i, count = 0; const struct i915_wa *wa; - unsigned int i; u32 srm, *cs; srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(i915) >= 8) srm++; - cs = intel_ring_begin(rq, 4 * wal->count); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) + count++; + } + + cs = intel_ring_begin(rq, 4 * count); if (IS_ERR(cs)) return PTR_ERR(cs); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + u32 offset = i915_mmio_reg_offset(wa->reg); + + if (mcr_range(i915, offset)) + continue; + *cs++ = srm; - *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = offset; *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; *cs++ = 0; } @@ -1426,7 +1508,7 @@ static int engine_wa_list_verify(struct intel_context *ce, if (!wal->count) return 0; - vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count); + vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1453,9 +1535,13 @@ static int engine_wa_list_verify(struct intel_context *ce, } err = 0; - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) + continue; + if (!wa_verify(wa, results[i], wal->name, from)) err = -ENXIO; + } i915_gem_object_unpin_map(vma->obj); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h index 3761a6ee58bb..8c9c769c2204 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -14,6 +14,7 @@ struct drm_i915_private; struct i915_request; struct intel_engine_cs; +struct intel_gt; static inline void intel_wa_list_free(struct i915_wa_list *wal) { @@ -25,9 +26,8 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); int intel_engine_emit_ctx_wa(struct i915_request *rq); void intel_gt_init_workarounds(struct drm_i915_private *i915); -void intel_gt_apply_workarounds(struct drm_i915_private *i915); -bool intel_gt_verify_workarounds(struct drm_i915_private *i915, - const char *from); +void intel_gt_apply_workarounds(struct intel_gt *gt); +bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from); void intel_engine_init_whitelist(struct intel_engine_cs *engine); void intel_engine_apply_whitelist(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 42ac1fb99572..e27ab1b710b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -20,6 +20,7 @@ struct i915_wa { struct i915_wa_list { const char *name; + const char *engine_name; struct i915_wa *list; unsigned int count; unsigned int wa_count; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 086801b51441..5d43cbc3f345 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -27,58 +27,40 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_engine_pool.h" #include "mock_engine.h" #include "selftests/mock_request.h" -struct mock_ring { - struct intel_ring base; - struct i915_timeline timeline; -}; - -static void mock_timeline_pin(struct i915_timeline *tl) +static void mock_timeline_pin(struct intel_timeline *tl) { - tl->pin_count++; + atomic_inc(&tl->pin_count); } -static void mock_timeline_unpin(struct i915_timeline *tl) +static void mock_timeline_unpin(struct intel_timeline *tl) { - GEM_BUG_ON(!tl->pin_count); - tl->pin_count--; + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + atomic_dec(&tl->pin_count); } static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct mock_ring *ring; + struct intel_ring *ring; ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); if (!ring) return NULL; - if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) { - kfree(ring); - return NULL; - } - - kref_init(&ring->base.ref); - ring->base.size = sz; - ring->base.effective_size = sz; - ring->base.vaddr = (void *)(ring + 1); - ring->base.timeline = &ring->timeline; + kref_init(&ring->ref); + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + atomic_set(&ring->pin_count, 1); - INIT_LIST_HEAD(&ring->base.request_list); - intel_ring_update_space(&ring->base); + intel_ring_update_space(ring); - return &ring->base; -} - -static void mock_ring_free(struct intel_ring *base) -{ - struct mock_ring *ring = container_of(base, typeof(*ring), base); - - i915_timeline_fini(&ring->timeline); - kfree(ring); + return ring; } static struct i915_request *first_request(struct mock_engine *engine) @@ -129,7 +111,6 @@ static void hw_delay_complete(struct timer_list *t) static void mock_context_unpin(struct intel_context *ce) { - mock_timeline_unpin(ce->ring->timeline); } static void mock_context_destroy(struct kref *ref) @@ -138,31 +119,41 @@ static void mock_context_destroy(struct kref *ref) GEM_BUG_ON(intel_context_is_pinned(ce)); - if (ce->ring) - mock_ring_free(ce->ring); + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + kfree(ce->ring); + mock_timeline_unpin(ce->timeline); + } + intel_context_fini(ce); intel_context_free(ce); } -static int mock_context_pin(struct intel_context *ce) +static int mock_context_alloc(struct intel_context *ce) { - int ret; - - if (!ce->ring) { - ce->ring = mock_ring(ce->engine); - if (!ce->ring) - return -ENOMEM; + ce->ring = mock_ring(ce->engine); + if (!ce->ring) + return -ENOMEM; + + GEM_BUG_ON(ce->timeline); + ce->timeline = intel_timeline_create(ce->engine->gt, NULL); + if (IS_ERR(ce->timeline)) { + kfree(ce->engine); + return PTR_ERR(ce->timeline); } - ret = intel_context_active_acquire(ce, PIN_HIGH); - if (ret) - return ret; + mock_timeline_pin(ce->timeline); - mock_timeline_pin(ce->ring->timeline); return 0; } +static int mock_context_pin(struct intel_context *ce) +{ + return intel_context_active_acquire(ce); +} + static const struct intel_context_ops mock_context_ops = { + .alloc = mock_context_alloc, + .pin = mock_context_pin, .unpin = mock_context_unpin, @@ -256,9 +247,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; + engine->base.gt = &i915->gt; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); + engine->base.instance = id; engine->base.status_page.addr = (void *)(engine + 1); engine->base.cops = &mock_context_ops; @@ -277,29 +270,26 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + intel_engine_add_user(&engine->base); + return &engine->base; } int mock_engine_init(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; - int err; + struct intel_context *ce; intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); + intel_engine_pool_init(&engine->pool); - engine->kernel_context = - i915_gem_context_get_engine(i915->kernel_context, engine->id); - if (IS_ERR(engine->kernel_context)) - goto err_breadcrumbs; - - err = intel_context_pin(engine->kernel_context); - intel_context_put(engine->kernel_context); - if (err) + ce = create_kernel_context(engine); + if (IS_ERR(ce)) goto err_breadcrumbs; + engine->kernel_context = ce; return 0; err_breadcrumbs: @@ -333,6 +323,7 @@ void mock_engine_free(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); intel_engine_fini_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c new file mode 100644 index 000000000000..9d1ea26c7a2d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -0,0 +1,456 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_selftest.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" + +#include "gem/selftests/mock_context.h" +#include "selftests/igt_flush_test.h" +#include "selftests/mock_drm.h" + +static int request_sync(struct i915_request *rq) +{ + long timeout; + int err = 0; + + i915_request_get(rq); + + i915_request_add(rq); + timeout = i915_request_wait(rq, 0, HZ / 10); + if (timeout < 0) { + err = timeout; + } else { + mutex_lock(&rq->timeline->mutex); + i915_request_retire_upto(rq); + mutex_unlock(&rq->timeline->mutex); + } + + i915_request_put(rq); + + return err; +} + +static int context_sync(struct intel_context *ce) +{ + struct intel_timeline *tl = ce->timeline; + int err = 0; + + mutex_lock(&tl->mutex); + do { + struct i915_request *rq; + long timeout; + + rcu_read_lock(); + rq = rcu_dereference(tl->last_request.request); + if (rq) + rq = i915_request_get_rcu(rq); + rcu_read_unlock(); + if (!rq) + break; + + timeout = i915_request_wait(rq, 0, HZ / 10); + if (timeout < 0) + err = timeout; + else + i915_request_retire_upto(rq); + + i915_request_put(rq); + } while (!err); + mutex_unlock(&tl->mutex); + + return err; +} + +static int __live_context_size(struct intel_engine_cs *engine, + struct i915_gem_context *fixme) +{ + struct intel_context *ce; + struct i915_request *rq; + void *vaddr; + int err; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err; + + vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(engine->i915)); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + intel_context_unpin(ce); + goto err; + } + + /* + * Note that execlists also applies a redzone which it checks on + * context unpin when debugging. We are using the same location + * and same poison value so that our checks overlap. Despite the + * redundancy, we want to keep this little selftest so that we + * get coverage of any and all submission backends, and we can + * always extend this test to ensure we trick the HW into a + * compromising position wrt to the various sections that need + * to be written into the context state. + * + * TLDR; this overlaps with the execlists redzone. + */ + if (HAS_EXECLISTS(engine->i915)) + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; + + vaddr += engine->context_size - I915_GTT_PAGE_SIZE; + memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); + + rq = intel_context_create_request(ce); + intel_context_unpin(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = request_sync(rq); + if (err) + goto err_unpin; + + /* Force the context switch */ + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + err = request_sync(rq); + if (err) + goto err_unpin; + + if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { + pr_err("%s context overwrote trailing red-zone!", engine->name); + err = -EINVAL; + } + +err_unpin: + i915_gem_object_unpin_map(ce->state->obj); +err: + intel_context_put(ce); + return err; +} + +static int live_context_size(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + enum intel_engine_id id; + int err = 0; + + /* + * Check that our context sizes are correct by seeing if the + * HW tries to write past the end of one. + */ + + mutex_lock(>->i915->drm.struct_mutex); + + fixme = kernel_context(gt->i915); + if (IS_ERR(fixme)) { + err = PTR_ERR(fixme); + goto unlock; + } + + for_each_engine(engine, gt->i915, id) { + struct { + struct drm_i915_gem_object *state; + void *pinned; + } saved; + + if (!engine->context_size) + continue; + + intel_engine_pm_get(engine); + + /* + * Hide the old default state -- we lie about the context size + * and get confused when the default state is smaller than + * expected. For our do nothing request, inheriting the + * active state is sufficient, we are only checking that we + * don't use more than we planned. + */ + saved.state = fetch_and_zero(&engine->default_state); + saved.pinned = fetch_and_zero(&engine->pinned_default_state); + + /* Overlaps with the execlists redzone */ + engine->context_size += I915_GTT_PAGE_SIZE; + + err = __live_context_size(engine, fixme); + + engine->context_size -= I915_GTT_PAGE_SIZE; + + engine->pinned_default_state = saved.pinned; + engine->default_state = saved.state; + + intel_engine_pm_put(engine); + + if (err) + break; + } + + kernel_context_close(fixme); +unlock: + mutex_unlock(>->i915->drm.struct_mutex); + return err; +} + +static int __live_active_context(struct intel_engine_cs *engine, + struct i915_gem_context *fixme) +{ + struct intel_context *ce; + int pass; + int err; + + /* + * We keep active contexts alive until after a subsequent context + * switch as the final write from the context-save will be after + * we retire the final request. We track when we unpin the context, + * under the presumption that the final pin is from the last request, + * and instead of immediately unpinning the context, we add a task + * to unpin the context from the next idle-barrier. + * + * This test makes sure that the context is kept alive until a + * subsequent idle-barrier (emitted when the engine wakeref hits 0 + * with no more outstanding requests). + */ + + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is awake before starting %s!\n", + engine->name, __func__); + return -EINVAL; + } + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (pass = 0; pass <= 2; pass++) { + struct i915_request *rq; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + err = request_sync(rq); + if (err) + goto err; + + /* Context will be kept active until after an idle-barrier. */ + if (i915_active_is_idle(&ce->active)) { + pr_err("context is not active; expected idle-barrier (%s pass %d)\n", + engine->name, pass); + err = -EINVAL; + goto err; + } + + if (!intel_engine_pm_is_awake(engine)) { + pr_err("%s is asleep before idle-barrier\n", + engine->name); + err = -EINVAL; + goto err; + } + } + + /* Now make sure our idle-barriers are flushed */ + err = context_sync(engine->kernel_context); + if (err) + goto err; + + if (!i915_active_is_idle(&ce->active)) { + pr_err("context is still active!"); + err = -EINVAL; + } + + if (intel_engine_pm_is_awake(engine)) { + struct drm_printer p = drm_debug_printer(__func__); + + intel_engine_dump(engine, &p, + "%s is still awake after idle-barriers\n", + engine->name); + GEM_TRACE_DUMP(); + + err = -EINVAL; + goto err; + } + +err: + intel_context_put(ce); + return err; +} + +static int live_active_context(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + enum intel_engine_id id; + struct drm_file *file; + int err = 0; + + file = mock_file(gt->i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(>->i915->drm.struct_mutex); + + fixme = live_context(gt->i915, file); + if (IS_ERR(fixme)) { + err = PTR_ERR(fixme); + goto unlock; + } + + for_each_engine(engine, gt->i915, id) { + err = __live_active_context(engine, fixme); + if (err) + break; + + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + if (err) + break; + } + +unlock: + mutex_unlock(>->i915->drm.struct_mutex); + mock_file_free(gt->i915, file); + return err; +} + +static int __remote_sync(struct intel_context *ce, struct intel_context *remote) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(remote); + if (err) + return err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto unpin; + } + + err = intel_context_prepare_remote_request(remote, rq); + if (err) { + i915_request_add(rq); + goto unpin; + } + + err = request_sync(rq); + +unpin: + intel_context_unpin(remote); + return err; +} + +static int __live_remote_context(struct intel_engine_cs *engine, + struct i915_gem_context *fixme) +{ + struct intel_context *local, *remote; + int pass; + int err; + + /* + * Check that our idle barriers do not interfere with normal + * activity tracking. In particular, check that operating + * on the context image remotely (intel_context_prepare_remote_request), + * which inserts foreign fences into intel_context.active, does not + * clobber the idle-barrier. + */ + + remote = intel_context_create(fixme, engine); + if (IS_ERR(remote)) + return PTR_ERR(remote); + + local = intel_context_create(fixme, engine); + if (IS_ERR(local)) { + err = PTR_ERR(local); + goto err_remote; + } + + for (pass = 0; pass <= 2; pass++) { + err = __remote_sync(local, remote); + if (err) + break; + + err = __remote_sync(engine->kernel_context, remote); + if (err) + break; + + if (i915_active_is_idle(&remote->active)) { + pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", + engine->name, pass); + err = -EINVAL; + break; + } + } + + intel_context_put(local); +err_remote: + intel_context_put(remote); + return err; +} + +static int live_remote_context(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + enum intel_engine_id id; + struct drm_file *file; + int err = 0; + + file = mock_file(gt->i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(>->i915->drm.struct_mutex); + + fixme = live_context(gt->i915, file); + if (IS_ERR(fixme)) { + err = PTR_ERR(fixme); + goto unlock; + } + + for_each_engine(engine, gt->i915, id) { + err = __live_remote_context(engine, fixme); + if (err) + break; + + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + if (err) + break; + } + +unlock: + mutex_unlock(>->i915->drm.struct_mutex); + mock_file_free(gt->i915, file); + return err; +} + +int intel_context_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_context_size), + SUBTEST(live_active_context), + SUBTEST(live_remote_context), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c new file mode 100644 index 000000000000..f65b118e261d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine.c @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_selftest.h" +#include "selftest_engine.h" + +int intel_engine_live_selftests(struct drm_i915_private *i915) +{ + static int (* const tests[])(struct intel_gt *) = { + live_engine_pm_selftests, + NULL, + }; + struct intel_gt *gt = &i915->gt; + typeof(*tests) *fn; + + for (fn = tests; *fn; fn++) { + int err; + + err = (*fn)(gt); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.h b/drivers/gpu/drm/i915/gt/selftest_engine.h new file mode 100644 index 000000000000..ab32d09ec5a1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_ENGINE_H +#define SELFTEST_ENGINE_H + +struct intel_gt; + +int live_engine_pm_selftests(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index cfaa6b296835..3880f07c29b8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -12,19 +12,18 @@ static int intel_mmio_bases_check(void *arg) for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { const struct engine_info *info = &intel_engines[i]; - char name[INTEL_ENGINE_CS_MAX_NAME]; u8 prev = U8_MAX; - __sprint_engine_name(name, info); - for (j = 0; j < MAX_MMIO_BASES; j++) { u8 gen = info->mmio_bases[j].gen; u32 base = info->mmio_bases[j].base; if (gen >= prev) { - pr_err("%s: %s: mmio base for gen %x " - "is before the one for gen %x\n", - __func__, name, prev, gen); + pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n", + __func__, + intel_engine_class_repr(info->class), + info->class, info->instance, + prev, gen); return -EINVAL; } @@ -32,17 +31,22 @@ static int intel_mmio_bases_check(void *arg) break; if (!base) { - pr_err("%s: %s: invalid mmio base (%x) " - "for gen %x at entry %u\n", - __func__, name, base, gen, j); + pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n", + __func__, + intel_engine_class_repr(info->class), + info->class, info->instance, + base, gen, j); return -EINVAL; } prev = gen; } - pr_info("%s: min gen supported for %s = %d\n", - __func__, name, prev); + pr_debug("%s: min gen supported for %s%d is %d\n", + __func__, + intel_engine_class_repr(info->class), + info->instance, + prev); } return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c new file mode 100644 index 000000000000..3a1419376912 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_selftest.h" +#include "selftest_engine.h" +#include "selftests/igt_atomic.h" + +static int live_engine_pm(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Check we can call intel_engine_pm_put from any context. No + * failures are reported directly, but if we mess up lockdep should + * tell us. + */ + if (intel_gt_pm_wait_for_idle(gt)) { + pr_err("Unable to flush GT pm before test\n"); + return -EBUSY; + } + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); + for_each_engine(engine, gt->i915, id) { + const typeof(*igt_atomic_phases) *p; + + for (p = igt_atomic_phases; p->name; p++) { + /* + * Acquisition is always synchronous, except if we + * know that the engine is already awake, in which + * case we should use intel_engine_pm_get_if_awake() + * to atomically grab the wakeref. + * + * In practice, + * intel_engine_pm_get(); + * intel_engine_pm_put(); + * occurs in one thread, while simultaneously + * intel_engine_pm_get_if_awake(); + * intel_engine_pm_put(); + * occurs from atomic context in another. + */ + GEM_BUG_ON(intel_engine_pm_is_awake(engine)); + intel_engine_pm_get(engine); + + p->critical_section_begin(); + if (!intel_engine_pm_get_if_awake(engine)) + pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", + engine->name, p->name); + else + intel_engine_pm_put(engine); + intel_engine_pm_put(engine); + p->critical_section_end(); + + /* engine wakeref is sync (instant) */ + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is still awake after flushing pm\n", + engine->name); + return -EINVAL; + } + + /* gt wakeref is async (deferred to workqueue) */ + if (intel_gt_pm_wait_for_idle(gt)) { + pr_err("GT failed to idle\n"); + return -EINVAL; + } + } + } + + return 0; +} + +int live_engine_pm_selftests(struct intel_gt *gt) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_engine_pm), + }; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 1ee4c923044f..a0098fc35921 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -25,13 +25,13 @@ #include <linux/kthread.h> #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "intel_engine_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" -#include "selftests/igt_wedge_me.h" #include "selftests/igt_atomic.h" #include "selftests/mock_drm.h" @@ -42,7 +42,7 @@ #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ struct hang { - struct drm_i915_private *i915; + struct intel_gt *gt; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; @@ -50,27 +50,27 @@ struct hang { u32 *batch; }; -static int hang_init(struct hang *h, struct drm_i915_private *i915) +static int hang_init(struct hang *h, struct intel_gt *gt) { void *vaddr; int err; memset(h, 0, sizeof(*h)); - h->i915 = i915; + h->gt = gt; - h->ctx = kernel_context(i915); + h->ctx = kernel_context(gt->i915); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); - h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(h->hws)) { err = PTR_ERR(h->hws); goto err_ctx; } - h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(h->obj)) { err = PTR_ERR(h->obj); goto err_hws; @@ -85,7 +85,7 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915) h->seqno = memset(vaddr, 0xff, PAGE_SIZE); vaddr = i915_gem_object_pin_map(h->obj, - i915_coherent_map_type(i915)); + i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_unpin_hws; @@ -118,7 +118,10 @@ static int move_to_active(struct i915_vma *vma, int err; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, flags); + err = i915_request_await_object(rq, vma->obj, + flags & EXEC_OBJECT_WRITE); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, flags); i915_vma_unlock(vma); return err; @@ -127,35 +130,31 @@ static int move_to_active(struct i915_vma *vma, static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = h->i915; - struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm; + struct intel_gt *gt = h->gt; + struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm; + struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; unsigned int flags; + void *vaddr; u32 *batch; int err; - if (i915_gem_object_is_active(h->obj)) { - struct drm_i915_gem_object *obj; - void *vaddr; + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); - obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vaddr = i915_gem_object_pin_map(obj, - i915_coherent_map_type(h->i915)); - if (IS_ERR(vaddr)) { - i915_gem_object_put(obj); - return ERR_CAST(vaddr); - } + vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } - i915_gem_object_unpin_map(h->obj); - i915_gem_object_put(h->obj); + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); - h->obj = obj; - h->batch = vaddr; - } + h->obj = obj; + h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); if (IS_ERR(vma)) @@ -188,7 +187,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) goto cancel_rq; batch = h->batch; - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(gt->i915) >= 8) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); @@ -202,7 +201,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 6) { + } else if (INTEL_GEN(gt->i915) >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -215,7 +214,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(gt->i915) >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -242,7 +241,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ - i915_gem_chipset_flush(h->i915); + intel_gt_chipset_flush(engine->gt); if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); @@ -251,7 +250,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) } flags = 0; - if (INTEL_GEN(vm->i915) <= 5) + if (INTEL_GEN(gt->i915) <= 5) flags |= I915_DISPATCH_SECURE; err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); @@ -276,7 +275,7 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(h->i915); + intel_gt_chipset_flush(h->gt); i915_gem_object_unpin_map(h->obj); i915_gem_object_put(h->obj); @@ -286,7 +285,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915, I915_WAIT_LOCKED); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -301,7 +300,7 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) static int igt_hang_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -310,13 +309,13 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, i915, id) { - struct igt_wedge_me w; + for_each_engine(engine, gt->i915, id) { + struct intel_wedge_me w; long timeout; if (!intel_engine_can_store_dword(engine)) @@ -333,15 +332,15 @@ static int igt_hang_sanitycheck(void *arg) i915_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(engine->gt); i915_request_add(rq); timeout = 0; - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) timeout = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) timeout = -EIO; i915_request_put(rq); @@ -357,7 +356,7 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -368,37 +367,37 @@ static bool wait_for_idle(struct intel_engine_cs *engine) static int igt_reset_nop(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; struct i915_gem_context *ctx; unsigned int reset_count, count; enum intel_engine_id id; - intel_wakeref_t wakeref; struct drm_file *file; IGT_TIMEOUT(end_time); int err = 0; /* Check that we can reset during non-user portions of requests */ - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + ctx = live_context(gt->i915, file); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - reset_count = i915_reset_count(&i915->gpu_error); + reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { + mutex_lock(>->i915->drm.struct_mutex); + + for_each_engine(engine, gt->i915, id) { int i; for (i = 0; i < 16; i++) { @@ -413,82 +412,78 @@ static int igt_reset_nop(void *arg) i915_request_add(rq); } } - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_lock(i915); - i915_reset(i915, ALL_ENGINES, NULL); - igt_global_reset_unlock(i915); - if (i915_reset_failed(i915)) { + igt_global_reset_lock(gt); + intel_gt_reset(gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(gt); + + mutex_unlock(>->i915->drm.struct_mutex); + if (intel_gt_is_wedged(gt)) { err = -EIO; break; } - if (i915_reset_count(&i915->gpu_error) != - reset_count + ++count) { + if (i915_reset_count(global) != reset_count + ++count) { pr_err("Full GPU reset not recorded!\n"); err = -EINVAL; break; } - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915, 0); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_lock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) + mock_file_free(gt->i915, file); + if (intel_gt_is_wedged(gt)) err = -EIO; return err; } static int igt_reset_nop_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct drm_file *file; int err = 0; /* Check that we can engine-reset during non-user portions */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + ctx = live_context(gt->i915, file); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { unsigned int reset_count, reset_engine_count; unsigned int count; IGT_TIMEOUT(end_time); - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); + reset_count = i915_reset_count(global); + reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -499,7 +494,7 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; @@ -511,21 +506,20 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } - mutex_unlock(&i915->drm.struct_mutex); - - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); + mutex_unlock(>->i915->drm.struct_mutex); if (err) { pr_err("i915_reset_engine failed\n"); break; } - if (i915_reset_count(&i915->gpu_error) != reset_count) { + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } - if (i915_reset_engine_count(&i915->gpu_error, engine) != + if (i915_reset_engine_count(global, engine) != reset_engine_count + ++count) { pr_err("%s engine reset not recorded!\n", engine->name); @@ -533,31 +527,31 @@ static int igt_reset_nop_engine(void *arg) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); if (err) break; - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915, 0); if (err) break; } - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) + mock_file_free(gt->i915, file); + if (intel_gt_is_wedged(gt)) err = -EIO; return err; } -static int __igt_reset_engine(struct drm_i915_private *i915, bool active) +static int __igt_reset_engine(struct intel_gt *gt, bool active) { + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -565,18 +559,18 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) /* Check that we can issue an engine reset on an idle engine (no-op) */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; if (active) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); + mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { unsigned int reset_count, reset_engine_count; IGT_TIMEOUT(end_time); @@ -590,30 +584,29 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); + reset_count = i915_reset_count(global); + reset_engine_count = i915_reset_engine_count(global, engine); intel_engine_pm_get(engine); - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { struct i915_request *rq; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); @@ -628,19 +621,19 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) i915_request_put(rq); } - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; } - if (i915_reset_count(&i915->gpu_error) != reset_count) { + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } - if (i915_reset_engine_count(&i915->gpu_error, engine) != + if (i915_reset_engine_count(global, engine) != ++reset_engine_count) { pr_err("%s engine reset not recorded!\n", engine->name); @@ -648,24 +641,24 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_pm_put(engine); if (err) break; - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915, 0); if (err) break; } - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; if (active) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); } return err; @@ -707,7 +700,7 @@ static int active_request_put(struct i915_request *rq) rq->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(rq->i915); + intel_gt_set_wedged(rq->engine->gt); err = -EIO; } @@ -784,10 +777,11 @@ err_file: return err; } -static int __igt_reset_engines(struct drm_i915_private *i915, +static int __igt_reset_engines(struct intel_gt *gt, const char *test_name, unsigned int flags) { + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; struct hang h; @@ -797,13 +791,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915, * with any other engine. */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); + mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; @@ -811,9 +805,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915, h.ctx->sched.priority = 1024; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct active_engine threads[I915_NUM_ENGINES] = {}; - unsigned long global = i915_reset_count(&i915->gpu_error); + unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; IGT_TIMEOUT(end_time); @@ -829,12 +823,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } memset(threads, 0, sizeof(threads)); - for_each_engine(other, i915, tmp) { + for_each_engine(other, gt->i915, tmp) { struct task_struct *tsk; threads[tmp].resets = - i915_reset_engine_count(&i915->gpu_error, - other); + i915_reset_engine_count(global, other); if (!(flags & TEST_OTHERS)) continue; @@ -857,25 +850,25 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } intel_engine_pm_get(engine); - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); @@ -888,7 +881,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } } - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", engine->name, test_name, err); @@ -900,7 +893,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (rq) { if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" " failed to complete request after reset\n", @@ -910,7 +903,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, i915_request_put(rq); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; break; } @@ -920,7 +913,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" " failed to idle after reset\n", @@ -932,12 +925,12 @@ static int __igt_reset_engines(struct drm_i915_private *i915, break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_pm_put(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); - reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported = i915_reset_engine_count(global, engine); reported -= threads[engine->id].resets; if (reported != count) { pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", @@ -947,7 +940,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } unwind: - for_each_engine(other, i915, tmp) { + for_each_engine(other, gt->i915, tmp) { int ret; if (!threads[tmp].task) @@ -962,22 +955,21 @@ unwind: } put_task_struct(threads[tmp].task); - if (other != engine && + if (other->uabi_class != engine->uabi_class && threads[tmp].resets != - i915_reset_engine_count(&i915->gpu_error, other)) { + i915_reset_engine_count(global, other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", other->name, - i915_reset_engine_count(&i915->gpu_error, - other) - + i915_reset_engine_count(global, other) - threads[tmp].resets); if (!err) err = -EINVAL; } } - if (global != i915_reset_count(&i915->gpu_error)) { + if (device != i915_reset_count(global)) { pr_err("Global reset (count=%ld)!\n", - i915_reset_count(&i915->gpu_error) - global); + i915_reset_count(global) - device); if (!err) err = -EINVAL; } @@ -985,20 +977,20 @@ unwind: if (err) break; - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); if (err) break; } - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); } return err; @@ -1024,13 +1016,13 @@ static int igt_reset_engines(void *arg) }, { } }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; typeof(*phases) *p; int err; for (p = phases; p->name; p++) { if (p->flags & TEST_PRIORITY) { - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) continue; } @@ -1042,38 +1034,39 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct drm_i915_private *i915, - intel_engine_mask_t mask) +static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask) { - u32 count = i915_reset_count(&i915->gpu_error); + u32 count = i915_reset_count(>->i915->gpu_error); - i915_reset(i915, mask, NULL); + intel_gt_reset(gt, mask, NULL); return count; } static int igt_reset_wait(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS0])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we detect a stuck waiter and issue a reset */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS0]); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; @@ -1083,19 +1076,19 @@ static int igt_reset_wait(void *arg) i915_request_add(rq); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto out_rq; } - reset_count = fake_hangcheck(i915, ALL_ENGINES); + reset_count = fake_hangcheck(gt, ALL_ENGINES); timeout = i915_request_wait(rq, 0, 10); if (timeout < 0) { @@ -1105,7 +1098,7 @@ static int igt_reset_wait(void *arg) goto out_rq; } - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(global) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; goto out_rq; @@ -1116,10 +1109,10 @@ out_rq: fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + mutex_unlock(>->i915->drm.struct_mutex); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1164,7 +1157,14 @@ static int evict_fence(void *data) goto out_unlock; } + err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); + if (err) { + pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); + goto out_unlock; + } + err = i915_vma_pin_fence(arg->vma); + i915_vma_unpin(arg->vma); if (err) { pr_err("Unable to pin Y-tiled fence; err:%d\n", err); goto out_unlock; @@ -1178,11 +1178,12 @@ out_unlock: return err; } -static int __igt_reset_evict_vma(struct drm_i915_private *i915, +static int __igt_reset_evict_vma(struct intel_gt *gt, struct i915_address_space *vm, int (*fn)(void *), unsigned int flags) { + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; @@ -1190,17 +1191,17 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, struct hang h; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS0])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - obj = i915_gem_object_create_internal(i915, SZ_1M); + obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fini; @@ -1220,7 +1221,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, goto out_obj; } - rq = hang_create_request(&h, i915->engine[RCS0]); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_obj; @@ -1246,7 +1247,10 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } i915_vma_lock(arg.vma); - err = i915_vma_move_to_active(arg.vma, rq, flags); + err = i915_request_await_object(rq, arg.vma->obj, + flags & EXEC_OBJECT_WRITE); + if (err == 0) + err = i915_vma_move_to_active(arg.vma, rq, flags); i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) @@ -1258,16 +1262,16 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, if (err) goto out_rq; - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto out_reset; } @@ -1284,31 +1288,31 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, wait_for_completion(&arg.completion); if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("igt/evict_vma kthread did not wait\n"); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto out_reset; } out_reset: - igt_global_reset_lock(i915); - fake_hangcheck(rq->i915, rq->engine->mask); - igt_global_reset_unlock(i915); + igt_global_reset_lock(gt); + fake_hangcheck(gt, rq->engine->mask); + igt_global_reset_unlock(gt); if (tsk) { - struct igt_wedge_me w; + struct intel_wedge_me w; /* The reset, even indirectly, should take less than 10ms. */ - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) err = kthread_stop(tsk); put_task_struct(tsk); } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: @@ -1316,9 +1320,9 @@ out_obj: fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1326,26 +1330,26 @@ unlock: static int igt_reset_evict_ggtt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + return __igt_reset_evict_vma(gt, >->ggtt->vm, evict_vma, EXEC_OBJECT_WRITE); } static int igt_reset_evict_ppgtt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx; struct drm_file *file; int err; - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + ctx = live_context(gt->i915, file); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -1353,29 +1357,29 @@ static int igt_reset_evict_ppgtt(void *arg) err = 0; if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(i915, ctx->vm, + err = __igt_reset_evict_vma(gt, ctx->vm, evict_vma, EXEC_OBJECT_WRITE); out: - mock_file_free(i915, file); + mock_file_free(gt->i915, file); return err; } static int igt_reset_evict_fence(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + return __igt_reset_evict_vma(gt, >->ggtt->vm, evict_fence, EXEC_OBJECT_NEEDS_FENCE); } -static int wait_for_others(struct drm_i915_private *i915, +static int wait_for_others(struct intel_gt *gt, struct intel_engine_cs *exclude) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (engine == exclude) continue; @@ -1388,7 +1392,8 @@ static int wait_for_others(struct drm_i915_private *i915, static int igt_reset_queue(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -1396,14 +1401,14 @@ static int igt_reset_queue(void *arg) /* Check that we replay pending requests following a hang */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -1444,7 +1449,7 @@ static int igt_reset_queue(void *arg) * (hangcheck), or we focus on resetting just one * engine and so avoid repeatedly resetting innocents. */ - err = wait_for_others(i915, engine); + err = wait_for_others(gt, engine); if (err) { pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", __func__, engine->name); @@ -1452,12 +1457,12 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto fini; } if (!wait_until_running(&h, prev)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, @@ -1468,13 +1473,13 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto fini; } - reset_count = fake_hangcheck(i915, BIT(id)); + reset_count = fake_hangcheck(gt, BIT(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1494,7 +1499,7 @@ static int igt_reset_queue(void *arg) goto fini; } - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(global) == reset_count) { pr_err("No GPU reset recorded!\n"); i915_request_put(rq); i915_request_put(prev); @@ -1509,11 +1514,11 @@ static int igt_reset_queue(void *arg) pr_info("%s: Completed %d resets\n", engine->name, count); *h.batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); + intel_gt_chipset_flush(engine->gt); i915_request_put(prev); - err = igt_flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); if (err) break; } @@ -1521,10 +1526,10 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + mutex_unlock(>->i915->drm.struct_mutex); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1532,8 +1537,9 @@ unlock: static int igt_handle_error(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1541,15 +1547,15 @@ static int igt_handle_error(void *arg) /* Check that we can issue a global GPU and engine reset */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) goto err_unlock; @@ -1563,28 +1569,28 @@ static int igt_handle_error(void *arg) i915_request_add(rq); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_request; } - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); /* Temporarily disable error capture */ - error = xchg(&i915->gpu_error.first_error, (void *)-1); + error = xchg(&global->first_error, (void *)-1); - i915_handle_error(i915, engine->mask, 0, NULL); + intel_gt_handle_error(gt, engine->mask, 0, NULL); - xchg(&i915->gpu_error.first_error, error); + xchg(&global->first_error, error); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); @@ -1597,7 +1603,7 @@ err_request: err_fini: hang_fini(&h); err_unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -1614,7 +1620,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, tasklet_disable_nosync(t); p->critical_section_begin(); - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); p->critical_section_end(); tasklet_enable(t); @@ -1629,7 +1635,6 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, static int igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p) { - struct drm_i915_private *i915 = engine->i915; struct i915_request *rq; struct hang h; int err; @@ -1638,7 +1643,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, if (err) return err; - err = hang_init(&h, i915); + err = hang_init(&h, engine->gt); if (err) return err; @@ -1657,16 +1662,16 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, rq->fence.seqno, hws_seqno(&h, rq)); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(engine->gt); err = -EIO; } if (err == 0) { - struct igt_wedge_me w; + struct intel_wedge_me w; - igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/) + intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */) i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; } @@ -1678,30 +1683,30 @@ out: static int igt_reset_engines_atomic(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; int err = 0; /* Check that the engines resets are usable from atomic context */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); + igt_global_reset_lock(gt); + mutex_lock(>->i915->drm.struct_mutex); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto unlock; for (p = igt_atomic_phases; p->name; p++) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { err = igt_atomic_reset_engine(engine, p); if (err) goto out; @@ -1710,11 +1715,11 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); + igt_force_reset(gt); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + mutex_unlock(>->i915->drm.struct_mutex); + igt_global_reset_unlock(gt); return err; } @@ -1736,28 +1741,29 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_evict_fence), SUBTEST(igt_handle_error), }; + struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; bool saved_hangcheck; int err; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt->i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ + drain_delayed_work(>->hangcheck.work); /* flush param */ - err = i915_subtests(tests, i915); + err = intel_gt_live_subtests(tests, gt); - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 401e8b539297..d791158988d6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -22,9 +22,9 @@ static int live_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; + struct i915_gem_engines_iter it; struct i915_gem_context *ctx; - enum intel_engine_id id; + struct intel_context *ce; struct igt_spinner spin; intel_wakeref_t wakeref; int err = -ENOMEM; @@ -35,17 +35,17 @@ static int live_sanitycheck(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin, i915)) + if (igt_spinner_init(&spin, &i915->gt)) goto err_unlock; ctx = kernel_context(i915); if (!ctx) goto err_spin; - for_each_engine(engine, i915, id) { + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; - rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx; @@ -55,7 +55,7 @@ static int live_sanitycheck(void *arg) if (!igt_wait_for_spinner(&spin, rq)) { GEM_TRACE("spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx; } @@ -69,16 +69,236 @@ static int live_sanitycheck(void *arg) err = 0; err_ctx: + i915_gem_context_unlock_engines(ctx); kernel_context_close(ctx); err_spin: igt_spinner_fini(&spin); err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } +static int +emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + 4 * idx; + *cs++ = 0; + + if (idx > 0) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); + *cs++ = 0; + *cs++ = 1; + } else { + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + intel_ring_advance(rq, cs); + return 0; +} + +static struct i915_request * +semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) +{ + struct i915_gem_context *ctx; + struct i915_request *rq; + int err; + + ctx = kernel_context(engine->i915); + if (!ctx) + return ERR_PTR(-ENOMEM); + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) + goto out_ctx; + + err = emit_semaphore_chain(rq, vma, idx); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_ctx: + kernel_context_close(ctx); + return rq; +} + +static int +release_queue(struct intel_engine_cs *engine, + struct i915_vma *vma, + int idx) +{ + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct i915_request *rq; + u32 *cs; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + engine->schedule(rq, &attr); + + return 0; +} + +static int +slice_semaphore_queue(struct intel_engine_cs *outer, + struct i915_vma *vma, + int count) +{ + struct intel_engine_cs *engine; + struct i915_request *head; + enum intel_engine_id id; + int err, i, n = 0; + + head = semaphore_queue(outer, vma, n++); + if (IS_ERR(head)) + return PTR_ERR(head); + + i915_request_get(head); + for_each_engine(engine, outer->i915, id) { + for (i = 0; i < count; i++) { + struct i915_request *rq; + + rq = semaphore_queue(engine, vma, n++); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + } + } + + err = release_queue(outer, vma, n); + if (err) + goto out; + + if (i915_request_wait(head, + I915_WAIT_LOCKED, + 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { + pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", + count, n); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(outer->gt); + err = -EIO; + } + +out: + i915_request_put(head); + return err; +} + +static int live_timeslice_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + struct i915_vma *vma; + void *vaddr; + int err = 0; + int count; + + /* + * If a request takes too long, we would like to give other users + * a fair go on the GPU. In particular, users may create batches + * that wait upon external input, where that input may even be + * supplied by another GPU job. To avoid blocking forever, we + * need to preempt the current task and replace it with another + * ready task. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_unlock; + } + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_prime_number_from(count, 1, 16) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + memset(vaddr, 0, PAGE_SIZE); + + err = slice_semaphore_queue(engine, vma, count); + if (err) + goto err_pin; + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_pin; + } + } + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); +err_unlock: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + static int live_busywait_preempt(void *arg) { struct drm_i915_private *i915 = arg; @@ -138,6 +358,9 @@ static int live_busywait_preempt(void *arg) struct igt_live_test t; u32 *cs; + if (!intel_engine_has_preemption(engine)) + continue; + if (!intel_engine_can_store_dword(engine)) continue; @@ -229,7 +452,7 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_vma; } @@ -253,13 +476,29 @@ err_ctx_lo: err_ctx_hi: kernel_context_close(ctx_hi); err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } +static struct i915_request * +spinner_create_request(struct igt_spinner *spin, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 arb) +{ + struct intel_context *ce; + struct i915_request *rq; + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = igt_spinner_create_request(spin, ce, arb); + intel_context_put(ce); + return rq; +} + static int live_preempt(void *arg) { struct drm_i915_private *i915 = arg; @@ -279,10 +518,10 @@ static int live_preempt(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, i915)) + if (igt_spinner_init(&spin_hi, &i915->gt)) goto err_unlock; - if (igt_spinner_init(&spin_lo, i915)) + if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; ctx_hi = kernel_context(i915); @@ -309,8 +548,8 @@ static int live_preempt(void *arg) goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx_lo; @@ -320,13 +559,13 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); @@ -337,7 +576,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -361,7 +600,6 @@ err_spin_lo: err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -384,10 +622,10 @@ static int live_late_preempt(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, i915)) + if (igt_spinner_init(&spin_hi, &i915->gt)) goto err_unlock; - if (igt_spinner_init(&spin_lo, i915)) + if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; ctx_hi = kernel_context(i915); @@ -398,6 +636,9 @@ static int live_late_preempt(void *arg) if (!ctx_lo) goto err_ctx_hi; + /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ + ctx_lo->sched.priority = I915_USER_PRIORITY(1); + for_each_engine(engine, i915, id) { struct igt_live_test t; struct i915_request *rq; @@ -410,8 +651,8 @@ static int live_late_preempt(void *arg) goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx_lo; @@ -423,8 +664,8 @@ static int live_late_preempt(void *arg) goto err_wedged; } - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_NOOP); + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_NOOP); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); @@ -465,7 +706,6 @@ err_spin_lo: err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -473,7 +713,7 @@ err_unlock: err_wedged: igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -490,7 +730,7 @@ static int preempt_client_init(struct drm_i915_private *i915, if (!c->ctx) return -ENOMEM; - if (igt_spinner_init(&c->spin, i915)) + if (igt_spinner_init(&c->spin, &i915->gt)) goto err_ctx; return 0; @@ -506,6 +746,114 @@ static void preempt_client_fini(struct preempt_client *c) kernel_context_close(c->ctx); } +static int live_nopreempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct preempt_client a, b; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Verify that we can disable preemption for an individual request + * that may be being observed and not want to be interrupted. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + if (preempt_client_init(i915, &a)) + goto err_unlock; + if (preempt_client_init(i915, &b)) + goto err_client_a; + b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + + for_each_engine(engine, i915, id) { + struct i915_request *rq_a, *rq_b; + + if (!intel_engine_has_preemption(engine)) + continue; + + engine->execlists.preempt_hang.count = 0; + + rq_a = spinner_create_request(&a.spin, + a.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + /* Low priority client, but unpreemptable! */ + rq_a->flags |= I915_REQUEST_NOPREEMPT; + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + rq_b = spinner_create_request(&b.spin, + b.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + + i915_request_add(rq_b); + + /* B is much more important than A! (But A is unpreemptable.) */ + GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); + + /* Wait long enough for preemption and timeslicing */ + if (igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client started too early!\n"); + goto err_wedged; + } + + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + igt_spinner_end(&b.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d; should have been suppressed!\n", + engine->execlists.preempt_hang.count); + err = -EINVAL; + goto err_wedged; + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); +err_unlock: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + intel_gt_set_wedged(&i915->gt); + err = -EIO; + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { struct drm_i915_private *i915 = arg; @@ -531,6 +879,9 @@ static int live_suppress_self_preempt(void *arg) if (USES_GUC_SUBMISSION(i915)) return 0; /* presume black blox */ + if (intel_vgpu_active(i915)) + return 0; /* GVT forces single port & request submission */ + mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); @@ -548,9 +899,9 @@ static int live_suppress_self_preempt(void *arg) engine->execlists.preempt_hang.count = 0; - rq_a = igt_spinner_create_request(&a.spin, - a.ctx, engine, - MI_NOOP); + rq_a = spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); goto err_client_b; @@ -562,10 +913,12 @@ static int live_suppress_self_preempt(void *arg) goto err_wedged; } + /* Keep postponing the timer to avoid premature slicing */ + mod_timer(&engine->execlists.timer, jiffies + HZ); for (depth = 0; depth < 8; depth++) { - rq_b = igt_spinner_create_request(&b.spin, - b.ctx, engine, - MI_NOOP); + rq_b = spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); goto err_client_b; @@ -587,7 +940,8 @@ static int live_suppress_self_preempt(void *arg) igt_spinner_end(&a.spin); if (engine->execlists.preempt_hang.count) { - pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", + pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", + engine->name, engine->execlists.preempt_hang.count, depth); err = -EINVAL; @@ -604,8 +958,6 @@ err_client_b: err_client_a: preempt_client_fini(&a); err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -613,7 +965,7 @@ err_unlock: err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_b; } @@ -646,6 +998,10 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) i915_sw_fence_init(&rq->submit, dummy_notify); set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + spin_lock_init(&rq->lock); + rq->fence.lock = &rq->lock; + INIT_LIST_HEAD(&rq->fence.cb_list); + return rq; } @@ -714,9 +1070,9 @@ static int live_suppress_wait_preempt(void *arg) goto err_client_3; for (i = 0; i < ARRAY_SIZE(client); i++) { - rq[i] = igt_spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); + rq[i] = spinner_create_request(&client[i].spin, + client[i].ctx, engine, + MI_NOOP); if (IS_ERR(rq[i])) { err = PTR_ERR(rq[i]); goto err_wedged; @@ -773,8 +1129,6 @@ err_client_1: err_client_0: preempt_client_fini(&client[0]); err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -782,7 +1136,7 @@ err_unlock: err_wedged: for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_3; } @@ -825,9 +1179,9 @@ static int live_chain_preempt(void *arg) if (!intel_engine_has_preemption(engine)) continue; - rq = igt_spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); @@ -851,18 +1205,18 @@ static int live_chain_preempt(void *arg) } for_each_prime_number_from(count, 1, ring_size) { - rq = igt_spinner_create_request(&hi.spin, - hi.ctx, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&hi.spin, + hi.ctx, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); if (!igt_wait_for_spinner(&hi.spin, rq)) goto err_wedged; - rq = igt_spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); @@ -921,8 +1275,6 @@ err_client_lo: err_client_hi: preempt_client_fini(&hi); err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -930,7 +1282,7 @@ err_unlock: err_wedged: igt_spinner_end(&hi.spin); igt_spinner_end(&lo.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_lo; } @@ -954,10 +1306,10 @@ static int live_preempt_hang(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, i915)) + if (igt_spinner_init(&spin_hi, &i915->gt)) goto err_unlock; - if (igt_spinner_init(&spin_lo, i915)) + if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; ctx_hi = kernel_context(i915); @@ -978,8 +1330,8 @@ static int live_preempt_hang(void *arg) if (!intel_engine_has_preemption(engine)) continue; - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_ctx_lo; @@ -989,13 +1341,13 @@ static int live_preempt_hang(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); @@ -1011,21 +1363,21 @@ static int live_preempt_hang(void *arg) HZ / 10)) { pr_err("Preemption did not occur within timeout!"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - i915_reset_engine(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + intel_engine_reset(engine, NULL); + clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); engine->execlists.preempt_hang.inject_hang = false; if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -1048,7 +1400,6 @@ err_spin_lo: err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -1108,11 +1459,13 @@ static int smoke_submit(struct preempt_smoke *smoke, if (vma) { i915_vma_lock(vma); - err = rq->engine->emit_bb_start(rq, - vma->node.start, - PAGE_SIZE, 0); + err = i915_request_await_object(rq, vma->obj, false); if (!err) err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); i915_vma_unlock(vma); } @@ -1406,7 +1759,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, request[nc]->fence.context, request[nc]->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); break; } } @@ -1444,6 +1797,7 @@ static int live_virtual_engine(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; + struct intel_gt *gt = &i915->gt; enum intel_engine_id id; unsigned int class, inst; int err = -ENODEV; @@ -1467,10 +1821,10 @@ static int live_virtual_engine(void *arg) nsibling = 0; for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!i915->engine_class[class][inst]) + if (!gt->engine_class[class][inst]) continue; - siblings[nsibling++] = i915->engine_class[class][inst]; + siblings[nsibling++] = gt->engine_class[class][inst]; } if (nsibling < 2) continue; @@ -1553,7 +1907,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, request[n]->fence.context, request[n]->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto out; } @@ -1591,6 +1945,7 @@ static int live_virtual_mask(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_gt *gt = &i915->gt; unsigned int class, inst; int err = 0; @@ -1604,10 +1959,10 @@ static int live_virtual_mask(void *arg) nsibling = 0; for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!i915->engine_class[class][inst]) + if (!gt->engine_class[class][inst]) break; - siblings[nsibling++] = i915->engine_class[class][inst]; + siblings[nsibling++] = gt->engine_class[class][inst]; } if (nsibling < 2) continue; @@ -1768,6 +2123,7 @@ static int live_virtual_bond(void *arg) }; struct drm_i915_private *i915 = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_gt *gt = &i915->gt; unsigned int class, inst; int err = 0; @@ -1782,11 +2138,11 @@ static int live_virtual_bond(void *arg) nsibling = 0; for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!i915->engine_class[class][inst]) + if (!gt->engine_class[class][inst]) break; GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); - siblings[nsibling++] = i915->engine_class[class][inst]; + siblings[nsibling++] = gt->engine_class[class][inst]; } if (nsibling < 2) continue; @@ -1812,9 +2168,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), + SUBTEST(live_timeslice_preempt), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_nopreempt), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), @@ -1828,8 +2186,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (!HAS_EXECLISTS(i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 89da9e7cc1ba..00a4f60cdfd5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -9,26 +9,29 @@ static int igt_global_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; unsigned int reset_count; + intel_wakeref_t wakeref; int err = 0; /* Check that we can issue a global GPU reset */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - reset_count = i915_reset_count(&i915->gpu_error); + reset_count = i915_reset_count(>->i915->gpu_error); - i915_reset(i915, ALL_ENGINES, NULL); + intel_gt_reset(gt, ALL_ENGINES, NULL); - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(>->i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; } - igt_global_reset_unlock(i915); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; return err; @@ -36,61 +39,123 @@ static int igt_global_reset(void *arg) static int igt_wedged_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; intel_wakeref_t wakeref; /* Check that we can recover a wedged device with a GPU reset */ - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); - GEM_BUG_ON(!i915_reset_failed(i915)); - i915_reset(i915, ALL_ENGINES, NULL); + GEM_BUG_ON(!intel_gt_is_wedged(gt)); + intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + igt_global_reset_unlock(gt); - return i915_reset_failed(i915) ? -EIO : 0; + return intel_gt_is_wedged(gt) ? -EIO : 0; } static int igt_atomic_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; int err = 0; /* Check that the resets are usable from atomic context */ - igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); + intel_gt_pm_get(gt); + igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto unlock; for (p = igt_atomic_phases; p->name; p++) { - GEM_TRACE("intel_gpu_reset under %s\n", p->name); + intel_engine_mask_t awake; + + GEM_TRACE("__intel_gt_reset under %s\n", p->name); + awake = reset_prepare(gt); p->critical_section_begin(); - reset_prepare(i915); - err = intel_gpu_reset(i915, ALL_ENGINES); - reset_finish(i915); + + err = __intel_gt_reset(gt, ALL_ENGINES); + p->critical_section_end(); + reset_finish(gt, awake); if (err) { - pr_err("intel_gpu_reset failed under %s\n", p->name); + pr_err("__intel_gt_reset failed under %s\n", p->name); break; } } /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); + igt_force_reset(gt); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(gt); + intel_gt_pm_put(gt); + + return err; +} + +static int igt_atomic_engine_reset(void *arg) +{ + struct intel_gt *gt = arg; + const typeof(*igt_atomic_phases) *p; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that the resets are usable from atomic context */ + + if (!intel_has_reset_engine(gt->i915)) + return 0; + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + intel_gt_pm_get(gt); + igt_global_reset_lock(gt); + + /* Flush any requests before we get started and check basics */ + if (!igt_force_reset(gt)) + goto out_unlock; + + for_each_engine(engine, gt->i915, id) { + tasklet_disable_nosync(&engine->execlists.tasklet); + intel_engine_pm_get(engine); + + for (p = igt_atomic_phases; p->name; p++) { + GEM_TRACE("intel_engine_reset(%s) under %s\n", + engine->name, p->name); + + p->critical_section_begin(); + err = intel_engine_reset(engine, NULL); + p->critical_section_end(); + + if (err) { + pr_err("intel_engine_reset(%s) failed under %s\n", + engine->name, p->name); + break; + } + } + + intel_engine_pm_put(engine); + tasklet_enable(&engine->execlists.tasklet); + if (err) + break; + } + + /* As we poke around the guts, do a full reset before continuing. */ + igt_force_reset(gt); + +out_unlock: + igt_global_reset_unlock(gt); + intel_gt_pm_put(gt); return err; } @@ -101,18 +166,15 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_global_reset), /* attempt to recover GPU first */ SUBTEST(igt_wedged_reset), SUBTEST(igt_atomic_reset), + SUBTEST(igt_atomic_engine_reset), }; - intel_wakeref_t wakeref; - int err = 0; + struct intel_gt *gt = &i915->gt; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt->i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, i915); - - return err; + return intel_gt_live_subtests(tests, gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c new file mode 100644 index 000000000000..321481403165 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -0,0 +1,846 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "gem/i915_gem_pm.h" +#include "intel_gt.h" + +#include "../selftests/i915_random.h" +#include "../i915_selftest.h" + +#include "../selftests/igt_flush_test.h" +#include "../selftests/mock_gem_device.h" +#include "selftests/mock_timeline.h" + +static struct page *hwsp_page(struct intel_timeline *tl) +{ + struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct intel_timeline *tl) +{ + unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + + return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { + struct drm_i915_private *i915; + struct radix_tree_root cachelines; + struct intel_timeline **history; + unsigned long count, max; + struct rnd_state prng; +}; + +enum { + SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, + unsigned int idx, + struct intel_timeline *tl) +{ + tl = xchg(&state->history[idx], tl); + if (tl) { + radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + intel_timeline_put(tl); + } +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, + unsigned int count, + unsigned int flags) +{ + struct intel_timeline *tl; + unsigned int idx; + + while (count--) { + unsigned long cacheline; + int err; + + tl = intel_timeline_create(&state->i915->gt, NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + cacheline = hwsp_cacheline(tl); + err = radix_tree_insert(&state->cachelines, cacheline, tl); + if (err) { + if (err == -EEXIST) { + pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", + cacheline); + } + intel_timeline_put(tl); + return err; + } + + idx = state->count++ % state->max; + __mock_hwsp_record(state, idx, tl); + } + + if (flags & SHUFFLE) + i915_prandom_shuffle(state->history, + sizeof(*state->history), + min(state->count, state->max), + &state->prng); + + count = i915_prandom_u32_max_state(min(state->count, state->max), + &state->prng); + while (count--) { + idx = --state->count % state->max; + __mock_hwsp_record(state, idx, NULL); + } + + return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ + struct mock_hwsp_freelist state; + const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "linear", 0 }, + { "shuffled", SHUFFLE }, + { }, + }, *p; + unsigned int na; + int err = 0; + + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); + state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + + state.i915 = mock_gem_device(); + if (!state.i915) + return -ENOMEM; + + /* + * Create a bunch of timelines and check that their HWSP do not overlap. + * Free some, and try again. + */ + + state.max = PAGE_SIZE / sizeof(*state.history); + state.count = 0; + state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); + if (!state.history) { + err = -ENOMEM; + goto err_put; + } + + mutex_lock(&state.i915->drm.struct_mutex); + for (p = phases; p->name; p++) { + pr_debug("%s(%s)\n", __func__, p->name); + for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { + err = __mock_hwsp_timeline(&state, na, p->flags); + if (err) + goto out; + } + } + +out: + for (na = 0; na < state.max; na++) + __mock_hwsp_record(&state, na, NULL); + mutex_unlock(&state.i915->drm.struct_mutex); + kfree(state.history); +err_put: + drm_dev_put(&state.i915->drm); + return err; +} + +struct __igt_sync { + const char *name; + u32 seqno; + bool expected; + bool set; +}; + +static int __igt_sync(struct intel_timeline *tl, + u64 ctx, + const struct __igt_sync *p, + const char *name) +{ + int ret; + + if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", + name, p->name, ctx, p->seqno, yesno(p->expected)); + return -EINVAL; + } + + if (p->set) { + ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + if (ret) + return ret; + } + + return 0; +} + +static int igt_sync(void *arg) +{ + const struct __igt_sync pass[] = { + { "unset", 0, false, false }, + { "new", 0, false, true }, + { "0a", 0, true, true }, + { "1a", 1, false, true }, + { "1b", 1, true, true }, + { "0b", 0, true, false }, + { "2a", 2, false, true }, + { "4", 4, false, true }, + { "INT_MAX", INT_MAX, false, true }, + { "INT_MAX-1", INT_MAX-1, true, false }, + { "INT_MAX+1", (u32)INT_MAX+1, false, true }, + { "INT_MAX", INT_MAX, true, false }, + { "UINT_MAX", UINT_MAX, false, true }, + { "wrap", 0, false, true }, + { "unwrap", UINT_MAX, true, false }, + {}, + }, *p; + struct intel_timeline tl; + int order, offset; + int ret = -ENODEV; + + mock_timeline_init(&tl, 0); + for (p = pass; p->name; p++) { + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + ret = __igt_sync(&tl, ctx, p, "1"); + if (ret) + goto out; + } + } + } + mock_timeline_fini(&tl); + + mock_timeline_init(&tl, 0); + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + for (p = pass; p->name; p++) { + ret = __igt_sync(&tl, ctx, p, "2"); + if (ret) + goto out; + } + } + } + +out: + mock_timeline_fini(&tl); + return ret; +} + +static unsigned int random_engine(struct rnd_state *rnd) +{ + return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); +} + +static int bench_sync(void *arg) +{ + struct rnd_state prng; + struct intel_timeline tl; + unsigned long end_time, count; + u64 prng32_1M; + ktime_t kt; + int order, last_order; + + mock_timeline_init(&tl, 0); + + /* Lookups from cache are very fast and so the random number generation + * and the loop itself becomes a significant factor in the per-iteration + * timings. We try to compensate the results by measuring the overhead + * of the prng and subtract it from the reported results. + */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 x; + + /* Make sure the compiler doesn't optimise away the prng call */ + WRITE_ONCE(x, prandom_u32_state(&prng)); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_debug("%s: %lu random evaluations, %lluns/prng\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); + + /* Benchmark (only) setting random context ids */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u64 id = i915_prandom_u64_state(&prng); + + __intel_timeline_sync_set(&tl, id, 0); + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + prandom_seed_state(&prng, i915_selftest.random_seed); + end_time = count; + kt = ktime_get(); + while (end_time--) { + u64 id = i915_prandom_u64_state(&prng); + + if (!__intel_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); + pr_err("Lookup of %llu failed\n", id); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark setting the first N (in order) contexts */ + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + __intel_timeline_sync_set(&tl, count++, 0); + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + end_time = count; + kt = ktime_get(); + while (end_time--) { + if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { + pr_err("Lookup of %lu failed\n", end_time); + mock_timeline_fini(&tl); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark searching for a random context id and maybe changing it */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 id = random_engine(&prng); + u32 seqno = prandom_u32_state(&prng); + + if (!__intel_timeline_sync_is_later(&tl, id, seqno)) + __intel_timeline_sync_set(&tl, id, seqno); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + + /* Benchmark searching for a known context id and changing the seqno */ + for (last_order = 1, order = 1; order < 32; + ({ int tmp = last_order; last_order = order; order += tmp; })) { + unsigned int mask = BIT(order) - 1; + + mock_timeline_init(&tl, 0); + + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + /* Without assuming too many details of the underlying + * implementation, try to identify its phase-changes + * (if any)! + */ + u64 id = (u64)(count & mask) << order; + + __intel_timeline_sync_is_later(&tl, id, 0); + __intel_timeline_sync_set(&tl, id, 0); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", + __func__, count, order, + (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + } + + return 0; +} + +int intel_timeline_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(mock_hwsp_freelist), + SUBTEST(igt_sync), + SUBTEST(bench_sync), + }; + + return i915_subtests(tests, NULL); +} + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = 0; + *cs++ = value; + } else if (INTEL_GEN(rq->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = addr; + *cs++ = value; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = addr; + *cs++ = value; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_request * +tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ + + err = intel_timeline_pin(tl); + if (err) { + rq = ERR_PTR(err); + goto out; + } + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + goto out_unpin; + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_unpin: + intel_timeline_unpin(tl); +out: + if (IS_ERR(rq)) + pr_err("Failed to write to timeline!\n"); + return rq; +} + +static struct intel_timeline * +checked_intel_timeline_create(struct drm_i915_private *i915) +{ + struct intel_timeline *tl; + + tl = intel_timeline_create(&i915->gt, NULL); + if (IS_ERR(tl)) + return tl; + + if (*tl->hwsp_seqno != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + intel_timeline_put(tl); + return ERR_PTR(-EINVAL); + } + + return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct intel_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + for (n = 0; n < NUM_TIMELINES; n++) { + struct intel_timeline *tl; + struct i915_request *rq; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct intel_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + intel_timeline_put(tl); + } + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct intel_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots with adjacent + * engines. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for (n = 0; n < NUM_TIMELINES; n++) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl; + struct i915_request *rq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct intel_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + intel_timeline_put(tl); + } + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_wrap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct intel_timeline *tl; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = 0; + + /* + * Across a seqno wrap, we need to keep the old cacheline alive for + * foreign GPU references. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + tl = intel_timeline_create(&i915->gt, NULL); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out_rpm; + } + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + goto out_free; + + err = intel_timeline_pin(tl); + if (err) + goto out_free; + + for_each_engine(engine, i915, id) { + const u32 *hwsp_seqno[2]; + struct i915_request *rq; + u32 seqno[2]; + + if (!intel_engine_can_store_dword(engine)) + continue; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + tl->seqno = -4u; + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = intel_timeline_get_seqno(tl, rq, &seqno[0]); + mutex_unlock(&tl->mutex); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", + seqno[0], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[0] = tl->hwsp_seqno; + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = intel_timeline_get_seqno(tl, rq, &seqno[1]); + mutex_unlock(&tl->mutex); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", + seqno[1], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[1] = tl->hwsp_seqno; + + /* With wrap should come a new hwsp */ + GEM_BUG_ON(seqno[1] >= seqno[0]); + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); + + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + err = -EIO; + goto out; + } + + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", + *hwsp_seqno[0], *hwsp_seqno[1], + seqno[0], seqno[1]); + err = -EINVAL; + goto out; + } + + i915_retire_requests(i915); /* recycle HWSP */ + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + intel_timeline_unpin(tl); +out_free: + intel_timeline_put(tl); +out_rpm: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int live_hwsp_recycle(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count; + int err = 0; + + /* + * Check seqno writes into one timeline at a time. We expect to + * recycle the breadcrumb slot between iterations and neither + * want to confuse ourselves or the GPU. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) + continue; + + do { + struct intel_timeline *tl; + struct i915_request *rq; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + intel_timeline_put(tl); + err = -EIO; + goto out; + } + + if (*tl->hwsp_seqno != count) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + count, *tl->hwsp_seqno); + err = -EINVAL; + } + + intel_timeline_put(tl); + count++; + + if (err) + goto out; + } while (!__igt_timeout(end_time, NULL)); + } + +out: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +int intel_timeline_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_recycle), + SUBTEST(live_hwsp_engine), + SUBTEST(live_hwsp_alternate), + SUBTEST(live_hwsp_wrap), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 9eaf030affd0..d06d68ac2a3b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -5,13 +5,14 @@ */ #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" #include "intel_reset.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" -#include "selftests/igt_wedge_me.h" #include "selftests/mock_drm.h" #include "gem/selftests/igt_gem_utils.h" @@ -24,11 +25,9 @@ static const struct wo_register { { INTEL_GEMINILAKE, 0x731c } }; -#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 8) struct wa_lists { struct i915_wa_list gt_wa_list; struct { - char name[REF_NAME_MAX]; struct i915_wa_list wa_list; struct i915_wa_list ctx_wa_list; } engine[I915_NUM_ENGINES]; @@ -42,25 +41,20 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); - wa_init_start(&lists->gt_wa_list, "GT_REF"); + wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); gt_init_workarounds(i915, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); for_each_engine(engine, i915, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; - char *name = lists->engine[id].name; - snprintf(name, REF_NAME_MAX, "%s_REF", engine->name); - - wa_init_start(wal, name); + wa_init_start(wal, "REF", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); - snprintf(name, REF_NAME_MAX, "%s_CTX_REF", engine->name); - __intel_engine_init_ctx_wa(engine, &lists->engine[id].ctx_wa_list, - name); + "CTX_REF"); } } @@ -102,7 +96,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) i915_gem_object_flush_map(result); i915_gem_object_unpin_map(result); - vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(result, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -119,7 +113,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) } i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto err_req; @@ -184,7 +180,7 @@ static int check_whitelist(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *results; - struct igt_wedge_me wedge; + struct intel_wedge_me wedge; u32 *vaddr; int err; int i; @@ -195,10 +191,10 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; i915_gem_object_lock(results); - igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ + intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); - if (i915_terminally_wedged(ctx->i915)) + if (intel_gt_is_wedged(&ctx->i915->gt)) err = -EIO; if (err) goto out_put; @@ -231,13 +227,13 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - i915_reset(engine->i915, engine->mask, "live_workarounds"); + intel_gt_reset(engine->gt, engine->mask, "live_workarounds"); return 0; } static int do_engine_reset(struct intel_engine_cs *engine) { - return i915_reset_engine(engine, "live_workarounds"); + return intel_engine_reset(engine, "live_workarounds"); } static int @@ -245,6 +241,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, struct igt_spinner *spin) { struct i915_gem_context *ctx; + struct intel_context *ce; struct i915_request *rq; intel_wakeref_t wakeref; int err = 0; @@ -255,10 +252,14 @@ switch_to_scratch_context(struct intel_engine_cs *engine, GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + rq = ERR_PTR(-ENODEV); with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) - rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); + rq = igt_spinner_create_request(spin, ce, MI_NOOP); + intel_context_put(ce); kernel_context_close(ctx); if (IS_ERR(rq)) { @@ -286,64 +287,67 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, const char *name) { struct drm_i915_private *i915 = engine->i915; - struct i915_gem_context *ctx; + struct i915_gem_context *ctx, *tmp; struct igt_spinner spin; intel_wakeref_t wakeref; int err; - pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", - engine->whitelist.count, name); - - err = igt_spinner_init(&spin, i915); - if (err) - return err; + pr_info("Checking %d whitelisted registers on %s (RING_NONPRIV) [%s]\n", + engine->whitelist.count, engine->name, name); ctx = kernel_context(i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); + err = igt_spinner_init(&spin, engine->gt); + if (err) + goto out_ctx; + err = check_whitelist(ctx, engine); if (err) { pr_err("Invalid whitelist *before* %s reset!\n", name); - goto out; + goto out_spin; } err = switch_to_scratch_context(engine, &spin); if (err) - goto out; + goto out_spin; with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = reset(engine); igt_spinner_end(&spin); - igt_spinner_fini(&spin); if (err) { pr_err("%s reset failed\n", name); - goto out; + goto out_spin; } err = check_whitelist(ctx, engine); if (err) { pr_err("Whitelist not preserved in context across %s reset!\n", name); - goto out; + goto out_spin; } + tmp = kernel_context(i915); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto out_spin; + } kernel_context_close(ctx); - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + ctx = tmp; err = check_whitelist(ctx, engine); if (err) { pr_err("Invalid whitelist *after* %s reset in fresh context!\n", name); - goto out; + goto out_spin; } -out: +out_spin: + igt_spinner_fini(&spin); +out_ctx: kernel_context_close(ctx); return err; } @@ -393,6 +397,10 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) enum intel_platform platform = INTEL_INFO(engine->i915)->platform; int i; + if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == + RING_FORCE_TO_NONPRIV_ACCESS_WR) + return true; + for (i = 0; i < ARRAY_SIZE(wo_registers); i++) { if (wo_registers[i].platform == platform && wo_registers[i].reg == reg) @@ -404,7 +412,8 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) static bool ro_register(u32 reg) { - if (reg & RING_FORCE_TO_NONPRIV_RD) + if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == + RING_FORCE_TO_NONPRIV_ACCESS_RD) return true; return false; @@ -476,12 +485,12 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, u32 srm, lrm, rsvd; u32 expect; int idx; + bool ro_reg; if (wo_register(engine, reg)) continue; - if (ro_register(reg)) - continue; + ro_reg = ro_register(reg); srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; @@ -542,7 +551,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); - i915_gem_chipset_flush(ctx->i915); + intel_gt_chipset_flush(engine->gt); rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { @@ -570,7 +579,7 @@ err_request: if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); - i915_gem_set_wedged(ctx->i915); + intel_gt_set_wedged(&ctx->i915->gt); err = -EIO; goto out_batch; } @@ -582,24 +591,35 @@ err_request: } GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff); - rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */ - if (!rsvd) { - pr_err("%s: Unable to write to whitelisted register %x\n", - engine->name, reg); - err = -EINVAL; - goto out_unpin; + if (!ro_reg) { + /* detect write masking */ + rsvd = results[ARRAY_SIZE(values)]; + if (!rsvd) { + pr_err("%s: Unable to write to whitelisted register %x\n", + engine->name, reg); + err = -EINVAL; + goto out_unpin; + } } expect = results[0]; idx = 1; for (v = 0; v < ARRAY_SIZE(values); v++) { - expect = reg_write(expect, values[v], rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, values[v], rsvd); + if (results[idx] != expect) err++; idx++; } for (v = 0; v < ARRAY_SIZE(values); v++) { - expect = reg_write(expect, ~values[v], rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, ~values[v], rsvd); + if (results[idx] != expect) err++; idx++; @@ -608,15 +628,22 @@ err_request: pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n", engine->name, err, reg); - pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n", - engine->name, reg, results[0], rsvd); + if (ro_reg) + pr_info("%s: Whitelisted read-only register: %x, original value %08x\n", + engine->name, reg, results[0]); + else + pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n", + engine->name, reg, results[0], rsvd); expect = results[0]; idx = 1; for (v = 0; v < ARRAY_SIZE(values); v++) { u32 w = values[v]; - expect = reg_write(expect, w, rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, w, rsvd); pr_info("Wrote %08x, read %08x, expect %08x\n", w, results[idx], expect); idx++; @@ -624,7 +651,10 @@ err_request: for (v = 0; v < ARRAY_SIZE(values); v++) { u32 w = ~values[v]; - expect = reg_write(expect, w, rsvd); + if (ro_reg) + expect = results[0]; + else + expect = reg_write(expect, w, rsvd); pr_info("Wrote %08x, read %08x, expect %08x\n", w, results[idx], expect); idx++; @@ -707,7 +737,7 @@ static int live_reset_whitelist(void *arg) if (!engine || engine->whitelist.count == 0) return 0; - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); if (intel_has_reset_engine(i915)) { err = check_whitelist_across_reset(engine, @@ -726,7 +756,7 @@ static int live_reset_whitelist(void *arg) } out: - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); return err; } @@ -756,8 +786,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, u64 offset = results->node.start + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - /* Clear RD only and WR only flags */ - reg &= ~(RING_FORCE_TO_NONPRIV_RD | RING_FORCE_TO_NONPRIV_WR); + /* Clear access permission field */ + reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; *cs++ = srm; *cs++ = reg; @@ -806,7 +836,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, *cs++ = MI_BATCH_BUFFER_END; i915_gem_object_flush_map(batch->obj); - i915_gem_chipset_flush(ctx->i915); + intel_gt_chipset_flush(engine->gt); rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { @@ -925,7 +955,13 @@ check_whitelisted_registers(struct intel_engine_cs *engine, err = 0; for (i = 0; i < engine->whitelist.count; i++) { - if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + const struct i915_wa *wa = &engine->whitelist.list[i]; + + if (i915_mmio_reg_offset(wa->reg) & + RING_FORCE_TO_NONPRIV_ACCESS_RD) + continue; + + if (!fn(engine, a[i], b[i], wa->reg)) err = -EINVAL; } @@ -1055,7 +1091,7 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + for_each_gem_engine(ce, i915_gem_context_engines(ctx), it) { enum intel_engine_id id = ce->engine->id; ok &= engine_wa_list_verify(ce, @@ -1066,7 +1102,6 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, &lists->engine[id].ctx_wa_list, str) == 0; } - i915_gem_context_unlock_engines(ctx); return ok; } @@ -1087,9 +1122,11 @@ live_gpu_reset_workarounds(void *arg) if (IS_ERR(ctx)) return PTR_ERR(ctx); + i915_gem_context_lock_engines(ctx); + pr_info("Verifying after GPU reset...\n"); - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); wakeref = intel_runtime_pm_get(&i915->runtime_pm); reference_lists_init(i915, &lists); @@ -1098,15 +1135,16 @@ live_gpu_reset_workarounds(void *arg) if (!ok) goto out; - i915_reset(i915, ALL_ENGINES, "live_workarounds"); + intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after reset"); out: + i915_gem_context_unlock_engines(ctx); kernel_context_close(ctx); reference_lists_fini(i915, &lists); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); return ok ? 0 : -ESRCH; } @@ -1115,10 +1153,10 @@ static int live_engine_reset_workarounds(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; + struct i915_gem_engines_iter it; struct i915_gem_context *ctx; + struct intel_context *ce; struct igt_spinner spin; - enum intel_engine_id id; struct i915_request *rq; intel_wakeref_t wakeref; struct wa_lists lists; @@ -1131,12 +1169,13 @@ live_engine_reset_workarounds(void *arg) if (IS_ERR(ctx)) return PTR_ERR(ctx); - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); wakeref = intel_runtime_pm_get(&i915->runtime_pm); reference_lists_init(i915, &lists); - for_each_engine(engine, i915, id) { + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + struct intel_engine_cs *engine = ce->engine; bool ok; pr_info("Verifying after %s reset...\n", engine->name); @@ -1147,7 +1186,7 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_reset_engine(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after idle reset"); if (!ok) { @@ -1155,11 +1194,11 @@ live_engine_reset_workarounds(void *arg) goto err; } - ret = igt_spinner_init(&spin, i915); + ret = igt_spinner_init(&spin, engine->gt); if (ret) goto err; - rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); if (IS_ERR(rq)) { ret = PTR_ERR(rq); igt_spinner_fini(&spin); @@ -1175,7 +1214,7 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_reset_engine(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds"); igt_spinner_end(&spin); igt_spinner_fini(&spin); @@ -1186,11 +1225,11 @@ live_engine_reset_workarounds(void *arg) goto err; } } - err: + i915_gem_context_unlock_engines(ctx); reference_lists_fini(i915, &lists); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); kernel_context_close(ctx); igt_flush_test(i915, I915_WAIT_LOCKED); @@ -1209,7 +1248,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) }; int err; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; mutex_lock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c new file mode 100644 index 000000000000..598170efcaf6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include "../intel_timeline.h" + +#include "mock_timeline.h" + +void mock_timeline_init(struct intel_timeline *timeline, u64 context) +{ + timeline->gt = NULL; + timeline->fence_context = context; + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + + INIT_LIST_HEAD(&timeline->link); +} + +void mock_timeline_fini(struct intel_timeline *timeline) +{ + i915_syncmap_free(&timeline->sync); +} diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h new file mode 100644 index 000000000000..689efc66c908 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#ifndef __MOCK_TIMELINE__ +#define __MOCK_TIMELINE__ + +struct intel_timeline; + +void mock_timeline_init(struct intel_timeline *timeline, u64 context); +void mock_timeline_fini(struct intel_timeline *timeline); + +#endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/Makefile b/drivers/gpu/drm/i915/gt/uc/Makefile new file mode 100644 index 000000000000..bec94d434cb6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/Makefile @@ -0,0 +1,5 @@ +# For building individual subdir files on the command line +subdir-ccflags-y += -I$(srctree)/$(src)/../.. + +# Extra header tests +header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c new file mode 100644 index 000000000000..249c747e9756 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "intel_guc.h" +#include "intel_guc_ads.h" +#include "intel_guc_submission.h" +#include "i915_drv.h" + +static void gen8_guc_raise_irq(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + intel_uncore_write(gt->uncore, GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); +} + +static void gen11_guc_raise_irq(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + intel_uncore_write(gt->uncore, GEN11_GUC_HOST_INTERRUPT, 0); +} + +static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +{ + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); + GEM_BUG_ON(i >= guc->send_regs.count); + + return _MMIO(guc->send_regs.base + 4 * i); +} + +void intel_guc_init_send_regs(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + enum forcewake_domains fw_domains = 0; + unsigned int i; + + if (INTEL_GEN(gt->i915) >= 11) { + guc->send_regs.base = + i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0)); + guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT; + } else { + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN; + BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT); + } + + for (i = 0; i < guc->send_regs.count; i++) { + fw_domains |= intel_uncore_forcewake_for_reg(gt->uncore, + guc_send_reg(guc, i), + FW_REG_READ | FW_REG_WRITE); + } + guc->send_regs.fw_domains = fw_domains; +} + +void intel_guc_init_early(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + intel_guc_fw_init_early(guc); + intel_guc_ct_init_early(&guc->ct); + intel_guc_log_init_early(&guc->log); + intel_guc_submission_init_early(guc); + + mutex_init(&guc->send_mutex); + spin_lock_init(&guc->irq_lock); + guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; + if (INTEL_GEN(i915) >= 11) { + guc->notify = gen11_guc_raise_irq; + guc->interrupts.reset = gen11_reset_guc_interrupts; + guc->interrupts.enable = gen11_enable_guc_interrupts; + guc->interrupts.disable = gen11_disable_guc_interrupts; + } else { + guc->notify = gen8_guc_raise_irq; + guc->interrupts.reset = gen9_reset_guc_interrupts; + guc->interrupts.enable = gen9_enable_guc_interrupts; + guc->interrupts.disable = gen9_disable_guc_interrupts; + } +} + +static int guc_shared_data_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + guc->shared_data = vma; + guc->shared_data_vaddr = vaddr; + + return 0; +} + +static void guc_shared_data_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP); +} + +static u32 guc_ctl_debug_flags(struct intel_guc *guc) +{ + u32 level = intel_guc_log_get_level(&guc->log); + u32 flags = 0; + + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) + flags |= GUC_LOG_DISABLED; + else + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << + GUC_LOG_VERBOSITY_SHIFT; + + return flags; +} + +static u32 guc_ctl_feature_flags(struct intel_guc *guc) +{ + u32 flags = 0; + + if (!intel_guc_is_submission_supported(guc)) + flags |= GUC_CTL_DISABLE_SCHEDULER; + + return flags; +} + +static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc) +{ + u32 flags = 0; + + if (intel_guc_is_submission_supported(guc)) { + u32 ctxnum, base; + + base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); + ctxnum = GUC_MAX_STAGE_DESCRIPTORS / 16; + + base >>= PAGE_SHIFT; + flags |= (base << GUC_CTL_BASE_ADDR_SHIFT) | + (ctxnum << GUC_CTL_CTXNUM_IN16_SHIFT); + } + return flags; +} + +static u32 guc_ctl_log_params_flags(struct intel_guc *guc) +{ + u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT; + u32 flags; + + #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) + #define UNIT SZ_1M + #define FLAG GUC_LOG_ALLOC_IN_MEGABYTE + #else + #define UNIT SZ_4K + #define FLAG 0 + #endif + + BUILD_BUG_ON(!CRASH_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!DPC_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DPC_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!ISR_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(ISR_BUFFER_SIZE, UNIT)); + + BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) > + (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); + BUILD_BUG_ON((DPC_BUFFER_SIZE / UNIT - 1) > + (GUC_LOG_DPC_MASK >> GUC_LOG_DPC_SHIFT)); + BUILD_BUG_ON((ISR_BUFFER_SIZE / UNIT - 1) > + (GUC_LOG_ISR_MASK >> GUC_LOG_ISR_SHIFT)); + + flags = GUC_LOG_VALID | + GUC_LOG_NOTIFY_ON_HALF_FULL | + FLAG | + ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) | + ((DPC_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DPC_SHIFT) | + ((ISR_BUFFER_SIZE / UNIT - 1) << GUC_LOG_ISR_SHIFT) | + (offset << GUC_LOG_BUF_ADDR_SHIFT); + + #undef UNIT + #undef FLAG + + return flags; +} + +static u32 guc_ctl_ads_flags(struct intel_guc *guc) +{ + u32 ads = intel_guc_ggtt_offset(guc, guc->ads_vma) >> PAGE_SHIFT; + u32 flags = ads << GUC_ADS_ADDR_SHIFT; + + return flags; +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +static void guc_init_params(struct intel_guc *guc) +{ + u32 *params = guc->params; + int i; + + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); + + params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc); + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); + params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]); +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void intel_guc_write_params(struct intel_guc *guc) +{ + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; + int i; + + /* + * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and + * they are power context saved so it's ok to release forcewake + * when we are done here and take it again at xfer time. + */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_BLITTER); + + intel_uncore_write(uncore, SOFT_SCRATCH(0), 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + intel_uncore_write(uncore, SOFT_SCRATCH(1 + i), guc->params[i]); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_BLITTER); +} + +int intel_guc_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + int ret; + + ret = intel_uc_fw_init(&guc->fw); + if (ret) + goto err_fetch; + + ret = guc_shared_data_create(guc); + if (ret) + goto err_fw; + GEM_BUG_ON(!guc->shared_data); + + ret = intel_guc_log_create(&guc->log); + if (ret) + goto err_shared; + + ret = intel_guc_ads_create(guc); + if (ret) + goto err_log; + GEM_BUG_ON(!guc->ads_vma); + + ret = intel_guc_ct_init(&guc->ct); + if (ret) + goto err_ads; + + if (intel_guc_is_submission_supported(guc)) { + /* + * This is stuff we need to have available at fw load time + * if we are planning to enable submission later + */ + ret = intel_guc_submission_init(guc); + if (ret) + goto err_ct; + } + + /* now that everything is perma-pinned, initialize the parameters */ + guc_init_params(guc); + + /* We need to notify the guc whenever we change the GGTT */ + i915_ggtt_enable_guc(gt->ggtt); + + return 0; + +err_ct: + intel_guc_ct_fini(&guc->ct); +err_ads: + intel_guc_ads_destroy(guc); +err_log: + intel_guc_log_destroy(&guc->log); +err_shared: + guc_shared_data_destroy(guc); +err_fw: + intel_uc_fw_fini(&guc->fw); +err_fetch: + intel_uc_fw_cleanup_fetch(&guc->fw); + DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret); + return ret; +} + +void intel_guc_fini(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (!intel_uc_fw_is_available(&guc->fw)) + return; + + i915_ggtt_disable_guc(gt->ggtt); + + if (intel_guc_is_submission_supported(guc)) + intel_guc_submission_fini(guc); + + intel_guc_ct_fini(&guc->ct); + + intel_guc_ads_destroy(guc); + intel_guc_log_destroy(&guc->log); + guc_shared_data_destroy(guc); + intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); +} + +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) +{ + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; +} + +void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) +{ + WARN(1, "Unexpected event: no suitable handler\n"); +} + +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) +{ + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; + u32 status; + int i; + int ret; + + GEM_BUG_ON(!len); + GEM_BUG_ON(len > guc->send_regs.count); + + /* We expect only action code */ + GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK); + + /* If CT is available, we expect to use MMIO only during init/fini */ + GEM_BUG_ON(*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + + mutex_lock(&guc->send_mutex); + intel_uncore_forcewake_get(uncore, guc->send_regs.fw_domains); + + for (i = 0; i < len; i++) + intel_uncore_write(uncore, guc_send_reg(guc, i), action[i]); + + intel_uncore_posting_read(uncore, guc_send_reg(guc, i - 1)); + + intel_guc_notify(guc); + + /* + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. + */ + ret = __intel_wait_for_register_fw(uncore, + guc_send_reg(guc, 0), + INTEL_GUC_MSG_TYPE_MASK, + INTEL_GUC_MSG_TYPE_RESPONSE << + INTEL_GUC_MSG_TYPE_SHIFT, + 10, 10, &status); + /* If GuC explicitly returned an error, convert it to -EIO */ + if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status)) + ret = -EIO; + + if (ret) { + DRM_ERROR("MMIO: GuC action %#x failed with error %d %#x\n", + action[0], ret, status); + goto out; + } + + if (response_buf) { + int count = min(response_buf_size, guc->send_regs.count - 1); + + for (i = 0; i < count; i++) + response_buf[i] = intel_uncore_read(uncore, + guc_send_reg(guc, i + 1)); + } + + /* Use data from the GuC response as our return value */ + ret = INTEL_GUC_MSG_TO_DATA(status); + +out: + intel_uncore_forcewake_put(uncore, guc->send_regs.fw_domains); + mutex_unlock(&guc->send_mutex); + + return ret; +} + +int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, + const u32 *payload, u32 len) +{ + u32 msg; + + if (unlikely(!len)) + return -EPROTO; + + /* Make sure to handle only enabled messages */ + msg = payload[0] & guc->msg_enabled_mask; + + if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)) + intel_guc_log_handle_flush_event(&guc->log); + + return 0; +} + +int intel_guc_sample_forcewake(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + u32 action[2]; + + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; + /* WaRsDisableCoarsePowerGating:skl,cnl */ + if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + action[1] = 0; + else + /* bit 0 and 1 are for Render and Media domain separately */ + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode + * @guc: intel_guc structure + * @rsa_offset: rsa offset w.r.t ggtt base of huc vma + * + * Triggers a HuC firmware authentication request to the GuC via intel_guc_send + * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by + * intel_huc_auth(). + * + * Return: non-zero code on error + */ +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_AUTHENTICATE_HUC, + rsa_offset + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_suspend() - notify GuC entering suspend state + * @guc: the guc + */ +int intel_guc_suspend(struct intel_guc *guc) +{ + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; + int ret; + u32 status; + u32 action[] = { + INTEL_GUC_ACTION_ENTER_S_STATE, + GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + }; + + /* + * The ENTER_S_STATE action queues the save/restore operation in GuC FW + * and then returns, so waiting on the H2G is not enough to guarantee + * GuC is done. When all the processing is done, GuC writes + * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll + * on that. Note that GuC does not ensure that the value in the register + * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is + * in progress so we need to take care of that ourselves as well. + */ + + intel_uncore_write(uncore, SOFT_SCRATCH(14), + INTEL_GUC_SLEEP_STATE_INVALID_MASK); + + ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); + if (ret) + return ret; + + ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), + INTEL_GUC_SLEEP_STATE_INVALID_MASK, + 0, 0, 10, &status); + if (ret) + return ret; + + if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) { + DRM_ERROR("GuC failed to change sleep state. " + "action=0x%x, err=%u\n", + action[0], status); + return -EIO; + } + + return 0; +} + +/** + * intel_guc_reset_engine() - ask GuC to reset an engine + * @guc: intel_guc structure + * @engine: engine to be reset + */ +int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine) +{ + u32 data[7]; + + GEM_BUG_ON(!guc->execbuf_client); + + data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; + data[1] = engine->guc_id; + data[2] = 0; + data[3] = 0; + data[4] = 0; + data[5] = guc->execbuf_client->stage_id; + data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_resume() - notify GuC resuming from suspend state + * @guc: the guc + */ +int intel_guc_resume(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_EXIT_S_STATE, + GUC_POWER_D0, + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * DOC: GuC Address Space + * + * The layout of GuC address space is shown below: + * + * :: + * + * +===========> +====================+ <== FFFF_FFFF + * ^ | Reserved | + * | +====================+ <== GUC_GGTT_TOP + * | | | + * | | DRAM | + * GuC | | + * Address +===> +====================+ <== GuC ggtt_pin_bias + * Space ^ | | + * | | | | + * | GuC | GuC | + * | WOPCM | WOPCM | + * | Size | | + * | | | | + * v v | | + * +=======+===> +====================+ <== 0000_0000 + * + * The lower part of GuC Address Space [0, ggtt_pin_bias) is mapped to GuC WOPCM + * while upper part of GuC Address Space [ggtt_pin_bias, GUC_GGTT_TOP) is mapped + * to DRAM. The value of the GuC ggtt_pin_bias is the GuC WOPCM size. + */ + +/** + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC ggtt_pin_bias) because that + * range is reserved inside GuC. + * + * Return: A i915_vma if successful, otherwise an ERR_PTR. + */ +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u64 flags; + int ret; + + obj = i915_gem_object_create_shmem(gt->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) + goto err; + + flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + ret = i915_vma_pin(vma, 0, 0, flags); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + return i915_vma_make_unshrinkable(vma); + +err: + i915_gem_object_put(obj); + return vma; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h new file mode 100644 index 000000000000..2b2f046d3cc3 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_H_ +#define _INTEL_GUC_H_ + +#include "intel_uncore.h" +#include "intel_guc_fw.h" +#include "intel_guc_fwif.h" +#include "intel_guc_ct.h" +#include "intel_guc_log.h" +#include "intel_guc_reg.h" +#include "intel_uc_fw.h" +#include "i915_utils.h" +#include "i915_vma.h" + +struct __guc_ads_blob; + +/* + * Top level structure of GuC. It handles firmware loading and manages client + * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy + * ExecList submission. + */ +struct intel_guc { + struct intel_uc_fw fw; + struct intel_guc_log log; + struct intel_guc_ct ct; + + /* intel_guc_recv interrupt related state */ + spinlock_t irq_lock; + unsigned int msg_enabled_mask; + + struct { + bool enabled; + void (*reset)(struct intel_guc *guc); + void (*enable)(struct intel_guc *guc); + void (*disable)(struct intel_guc *guc); + } interrupts; + + bool submission_supported; + + struct i915_vma *ads_vma; + struct __guc_ads_blob *ads_blob; + + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; + struct i915_vma *shared_data; + void *shared_data_vaddr; + + struct intel_guc_client *execbuf_client; + + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); + /* Cyclic counter mod pagesize */ + u32 db_cacheline; + + /* Control params for fw initialization */ + u32 params[GUC_CTL_MAX_DWORDS]; + + /* GuC's FW specific registers used in MMIO send */ + struct { + u32 base; + unsigned int count; + enum forcewake_domains fw_domains; + } send_regs; + + /* Store msg (e.g. log flush) that we see while CTBs are disabled */ + u32 mmio_msg; + + /* To serialize the intel_guc_send actions */ + struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len, + u32 *response_buf, u32 response_buf_size); + + /* GuC's FW specific event handler function */ + void (*handler)(struct intel_guc *guc); + + /* GuC's FW specific notify function */ + void (*notify)(struct intel_guc *guc); +}; + +static +inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len, NULL, 0); +} + +static inline int +intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) +{ + return guc->send(guc, action, len, response_buf, response_buf_size); +} + +static inline void intel_guc_notify(struct intel_guc *guc) +{ + guc->notify(guc); +} + +static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) +{ + guc->handler(guc); +} + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 + +/** + * intel_guc_ggtt_offset() - Get and validate the GGTT offset of @vma + * @guc: intel_guc structure. + * @vma: i915 graphics virtual memory area. + * + * GuC does not allow any gfx GGTT address that falls into range + * [0, ggtt.pin_bias), which is reserved for Boot ROM, SRAM and WOPCM. + * Currently, in order to exclude [0, ggtt.pin_bias) address space from + * GGTT, all gfx objects used by GuC are allocated with intel_guc_allocate_vma() + * and pinned with PIN_OFFSET_BIAS along with the value of ggtt.pin_bias. + * + * Return: GGTT offset of the @vma. + */ +static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, + struct i915_vma *vma) +{ + u32 offset = i915_ggtt_offset(vma); + + GEM_BUG_ON(offset < i915_ggtt_pin_bias(vma)); + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); + + return offset; +} + +void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_send_regs(struct intel_guc *guc); +void intel_guc_write_params(struct intel_guc *guc); +int intel_guc_init(struct intel_guc *guc); +void intel_guc_fini(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +void intel_guc_to_host_event_handler(struct intel_guc *guc); +void intel_guc_to_host_event_handler_nop(struct intel_guc *guc); +int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, + const u32 *payload, u32 len); +int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); +int intel_guc_suspend(struct intel_guc *guc); +int intel_guc_resume(struct intel_guc *guc); +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); + +static inline bool intel_guc_is_supported(struct intel_guc *guc) +{ + return intel_uc_fw_is_supported(&guc->fw); +} + +static inline bool intel_guc_is_enabled(struct intel_guc *guc) +{ + return intel_uc_fw_is_enabled(&guc->fw); +} + +static inline bool intel_guc_is_running(struct intel_guc *guc) +{ + return intel_uc_fw_is_running(&guc->fw); +} + +static inline int intel_guc_sanitize(struct intel_guc *guc) +{ + intel_uc_fw_sanitize(&guc->fw); + guc->mmio_msg = 0; + + return 0; +} + +static inline bool intel_guc_is_submission_supported(struct intel_guc *guc) +{ + return guc->submission_supported; +} + +static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask) +{ + spin_lock_irq(&guc->irq_lock); + guc->msg_enabled_mask |= mask; + spin_unlock_irq(&guc->irq_lock); +} + +static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) +{ + spin_lock_irq(&guc->irq_lock); + guc->msg_enabled_mask &= ~mask; + spin_unlock_irq(&guc->irq_lock); +} + +int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c new file mode 100644 index 000000000000..ca6674b8e00c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "intel_guc_ads.h" +#include "intel_uc.h" +#include "i915_drv.h" + +/* + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads), the + * scheduling policies (guc_policies), a structure describing a collection of + * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save + * its internal state for sleep. + */ + +static void guc_policy_init(struct guc_policy *policy) +{ + policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; + policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; + policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; + policy->policy_flags = 0; +} + +static void guc_policies_init(struct guc_policies *policies) +{ + struct guc_policy *policy; + u32 p, i; + + policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; + policies->max_num_work_items = POLICY_MAX_NUM_WI; + + for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) { + policy = &policies->policy[p][i]; + + guc_policy_init(policy); + } + } + + policies->is_valid = 1; +} + +static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num) +{ + memset(pool, 0, num * sizeof(*pool)); +} + +/* + * The first 80 dwords of the register state context, containing the + * execlists and ppgtt registers. + */ +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) + +/* The ads obj includes the struct itself and buffers passed to GuC */ +struct __guc_ads_blob { + struct guc_ads ads; + struct guc_policies policies; + struct guc_mmio_reg_state reg_state; + struct guc_gt_system_info system_info; + struct guc_clients_info clients_info; + struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; + u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; +} __packed; + +static void __guc_ads_init(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + struct __guc_ads_blob *blob = guc->ads_blob; + const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; + u32 base; + u8 engine_class; + + /* GuC scheduling policies */ + guc_policies_init(&blob->policies); + + /* + * GuC expects a per-engine-class context image and size + * (minus hwsp and ring context). The context image will be + * used to reinitialize engines after a reset. It must exist + * and be pinned in the GGTT, so that the address won't change after + * we have told GuC where to find it. The context size will be used + * to validate that the LRC base + size fall within allowed GGTT. + */ + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { + if (engine_class == OTHER_CLASS) + continue; + /* + * TODO: Set context pointer to default state to allow + * GuC to re-init guilty contexts after internal reset. + */ + blob->ads.golden_context_lrca[engine_class] = 0; + blob->ads.eng_state_size[engine_class] = + intel_engine_context_size(dev_priv, engine_class) - + skipped_size; + } + + /* System info */ + blob->system_info.slice_enabled = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask); + blob->system_info.rcs_enabled = 1; + blob->system_info.bcs_enabled = 1; + + blob->system_info.vdbox_enable_mask = VDBOX_MASK(dev_priv); + blob->system_info.vebox_enable_mask = VEBOX_MASK(dev_priv); + blob->system_info.vdbox_sfc_support_mask = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; + + base = intel_guc_ggtt_offset(guc, guc->ads_vma); + + /* Clients info */ + guc_ct_pool_entries_init(blob->ct_pool, ARRAY_SIZE(blob->ct_pool)); + + blob->clients_info.clients_num = 1; + blob->clients_info.ct_pool_addr = base + ptr_offset(blob, ct_pool); + blob->clients_info.ct_pool_count = ARRAY_SIZE(blob->ct_pool); + + /* ADS */ + blob->ads.scheduler_policies = base + ptr_offset(blob, policies); + blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); + blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); + blob->ads.gt_system_info = base + ptr_offset(blob, system_info); + blob->ads.clients_info = base + ptr_offset(blob, clients_info); + + i915_gem_object_flush_map(guc->ads_vma->obj); +} + +/** + * intel_guc_ads_create() - allocates and initializes GuC ADS. + * @guc: intel_guc struct + * + * GuC needs memory block (Additional Data Struct), where it will store + * some data. Allocate and initialize such memory block for GuC use. + */ +int intel_guc_ads_create(struct intel_guc *guc) +{ + const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob)); + struct i915_vma *vma; + void *blob; + int ret; + + GEM_BUG_ON(guc->ads_vma); + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(blob)) { + ret = PTR_ERR(blob); + goto err_vma; + } + + guc->ads_vma = vma; + guc->ads_blob = blob; + + __guc_ads_init(guc); + + return 0; + +err_vma: + i915_vma_unpin_and_release(&guc->ads_vma, 0); + return ret; +} + +void intel_guc_ads_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); +} + +/** + * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse + * @guc: intel_guc struct + * + * GuC stores some data in ADS, which might be stale after a reset. + * Reinitialize whole ADS in case any part of it was corrupted during + * previous GuC run. + */ +void intel_guc_ads_reset(struct intel_guc *guc) +{ + if (!guc->ads_vma) + return; + __guc_ads_init(guc); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h new file mode 100644 index 000000000000..b00d3ae1113a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_ADS_H_ +#define _INTEL_GUC_ADS_H_ + +struct intel_guc; + +int intel_guc_ads_create(struct intel_guc *guc); +void intel_guc_ads_destroy(struct intel_guc *guc); +void intel_guc_ads_reset(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c new file mode 100644 index 000000000000..b49115517510 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -0,0 +1,897 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_guc_ct.h" + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +#define CT_DEBUG_DRIVER(...) DRM_DEBUG_DRIVER(__VA_ARGS__) +#else +#define CT_DEBUG_DRIVER(...) do { } while (0) +#endif + +struct ct_request { + struct list_head link; + u32 fence; + u32 status; + u32 response_len; + u32 *response_buf; +}; + +struct ct_incoming_request { + struct list_head link; + u32 msg[]; +}; + +enum { CTB_SEND = 0, CTB_RECV = 1 }; + +enum { CTB_OWNER_HOST = 0 }; + +static void ct_incoming_request_worker_func(struct work_struct *w); + +/** + * intel_guc_ct_init_early - Initialize CT state without requiring device access + * @ct: pointer to CT struct + */ +void intel_guc_ct_init_early(struct intel_guc_ct *ct) +{ + /* we're using static channel owners */ + ct->host_channel.owner = CTB_OWNER_HOST; + + spin_lock_init(&ct->lock); + INIT_LIST_HEAD(&ct->pending_requests); + INIT_LIST_HEAD(&ct->incoming_requests); + INIT_WORK(&ct->worker, ct_incoming_request_worker_func); +} + +static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) +{ + return container_of(ct, struct intel_guc, ct); +} + +static inline const char *guc_ct_buffer_type_to_str(u32 type) +{ + switch (type) { + case INTEL_GUC_CT_BUFFER_TYPE_SEND: + return "SEND"; + case INTEL_GUC_CT_BUFFER_TYPE_RECV: + return "RECV"; + default: + return "<invalid>"; + } +} + +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, + u32 cmds_addr, u32 size, u32 owner) +{ + CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", + desc, cmds_addr, size, owner); + memset(desc, 0, sizeof(*desc)); + desc->addr = cmds_addr; + desc->size = size; + desc->owner = owner; +} + +static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) +{ + CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", + desc, desc->head, desc->tail); + desc->head = 0; + desc->tail = 0; + desc->is_in_error = 0; +} + +static int guc_action_register_ct_buffer(struct intel_guc *guc, + u32 desc_addr, + u32 type) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER, + desc_addr, + sizeof(struct guc_ct_buffer_desc), + type + }; + int err; + + /* Can't use generic send(), CT registration must go over MMIO */ + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (err) + DRM_ERROR("CT: register %s buffer failed; err=%d\n", + guc_ct_buffer_type_to_str(type), err); + return err; +} + +static int guc_action_deregister_ct_buffer(struct intel_guc *guc, + u32 owner, + u32 type) +{ + u32 action[] = { + INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, + owner, + type + }; + int err; + + /* Can't use generic send(), CT deregistration must go over MMIO */ + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (err) + DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", + guc_ct_buffer_type_to_str(type), owner, err); + return err; +} + +static int ctch_init(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + struct i915_vma *vma; + void *blob; + int err; + int i; + + GEM_BUG_ON(ctch->vma); + + /* We allocate 1 page to hold both descriptors and both buffers. + * ___________..................... + * |desc (SEND)| : + * |___________| PAGE/4 + * :___________....................: + * |desc (RECV)| : + * |___________| PAGE/4 + * :_______________________________: + * |cmds (SEND) | + * | PAGE/4 + * |_______________________________| + * |cmds (RECV) | + * | PAGE/4 + * |_______________________________| + * + * Each message can use a maximum of 32 dwords and we don't expect to + * have more than 1 in flight at any time, so we have enough space. + * Some logic further ahead will rely on the fact that there is only 1 + * page and that it is always mapped, so if the size is changed the + * other code will need updating as well. + */ + + /* allocate vma */ + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_out; + } + ctch->vma = vma; + + /* map first page */ + blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(blob)) { + err = PTR_ERR(blob); + goto err_vma; + } + CT_DEBUG_DRIVER("CT: vma base=%#x\n", + intel_guc_ggtt_offset(guc, ctch->vma)); + + /* store pointers to desc and cmds */ + for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i; + ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; + } + + return 0; + +err_vma: + i915_vma_unpin_and_release(&ctch->vma, 0); +err_out: + CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", + ctch->owner, err); + return err; +} + +static void ctch_fini(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + GEM_BUG_ON(ctch->enabled); + + i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP); +} + +static int ctch_enable(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + u32 base; + int err; + int i; + + GEM_BUG_ON(!ctch->vma); + + GEM_BUG_ON(ctch->enabled); + + /* vma should be already allocated and map'ed */ + base = intel_guc_ggtt_offset(guc, ctch->vma); + + /* (re)initialize descriptors + * cmds buffers are in the second half of the blob page + */ + for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + guc_ct_buffer_desc_init(ctch->ctbs[i].desc, + base + PAGE_SIZE/4 * i + PAGE_SIZE/2, + PAGE_SIZE/4, + ctch->owner); + } + + /* register buffers, starting wirh RECV buffer + * descriptors are in first half of the blob + */ + err = guc_action_register_ct_buffer(guc, + base + PAGE_SIZE/4 * CTB_RECV, + INTEL_GUC_CT_BUFFER_TYPE_RECV); + if (unlikely(err)) + goto err_out; + + err = guc_action_register_ct_buffer(guc, + base + PAGE_SIZE/4 * CTB_SEND, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + if (unlikely(err)) + goto err_deregister; + + ctch->enabled = true; + + return 0; + +err_deregister: + guc_action_deregister_ct_buffer(guc, + ctch->owner, + INTEL_GUC_CT_BUFFER_TYPE_RECV); +err_out: + DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err); + return err; +} + +static void ctch_disable(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + GEM_BUG_ON(!ctch->enabled); + + ctch->enabled = false; + + guc_action_deregister_ct_buffer(guc, + ctch->owner, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + guc_action_deregister_ct_buffer(guc, + ctch->owner, + INTEL_GUC_CT_BUFFER_TYPE_RECV); +} + +static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) +{ + /* For now it's trivial */ + return ++ctch->next_fence; +} + +/** + * DOC: CTB Host to GuC request + * + * Format of the CTB Host to GuC request message is as follows:: + * + * +------------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | ... | [n-1] | + * +------------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+ + * | | 0 | 1 | ... | n | + * +============+=========+=========+=========+=========+ + * | len >= 1 | FENCE | request specific data | + * +------+-----+---------+---------+---------+---------+ + * + * ^-----------------len-------------------^ + */ + +static int ctb_write(struct intel_guc_ct_buffer *ctb, + const u32 *action, + u32 len /* in dwords */, + u32 fence, + bool want_response) +{ + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head = desc->head / 4; /* in dwords */ + u32 tail = desc->tail / 4; /* in dwords */ + u32 size = desc->size / 4; /* in dwords */ + u32 used; /* in dwords */ + u32 header; + u32 *cmds = ctb->cmds; + unsigned int i; + + GEM_BUG_ON(desc->size % 4); + GEM_BUG_ON(desc->head % 4); + GEM_BUG_ON(desc->tail % 4); + GEM_BUG_ON(tail >= size); + + /* + * tail == head condition indicates empty. GuC FW does not support + * using up the entire buffer to get tail == head meaning full. + */ + if (tail < head) + used = (size - head) + tail; + else + used = tail - head; + + /* make sure there is a space including extra dw for the fence */ + if (unlikely(used + len + 1 >= size)) + return -ENOSPC; + + /* + * Write the message. The format is the following: + * DW0: header (including action code) + * DW1: fence + * DW2+: action data + */ + header = (len << GUC_CT_MSG_LEN_SHIFT) | + (GUC_CT_MSG_WRITE_FENCE_TO_DESC) | + (want_response ? GUC_CT_MSG_SEND_STATUS : 0) | + (action[0] << GUC_CT_MSG_ACTION_SHIFT); + + CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n", + 4, &header, 4, &fence, + 4 * (len - 1), &action[1]); + + cmds[tail] = header; + tail = (tail + 1) % size; + + cmds[tail] = fence; + tail = (tail + 1) % size; + + for (i = 1; i < len; i++) { + cmds[tail] = action[i]; + tail = (tail + 1) % size; + } + + /* now update desc tail (back in bytes) */ + desc->tail = tail * 4; + GEM_BUG_ON(desc->tail > desc->size); + + return 0; +} + +/** + * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update. + * @desc: buffer descriptor + * @fence: response fence + * @status: placeholder for status + * + * Guc will update CT buffer descriptor with new fence and status + * after processing the command identified by the fence. Wait for + * specified fence and then read from the descriptor status of the + * command. + * + * Return: + * * 0 response received (status is valid) + * * -ETIMEDOUT no response within hardcoded timeout + * * -EPROTO no response, CT buffer is in error + */ +static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc, + u32 fence, + u32 *status) +{ + int err; + + /* + * Fast commands should complete in less than 10us, so sample quickly + * up to that length of time, then switch to a slower sleep-wait loop. + * No GuC command should ever take longer than 10ms. + */ +#define done (READ_ONCE(desc->fence) == fence) + err = wait_for_us(done, 10); + if (err) + err = wait_for(done, 10); +#undef done + + if (unlikely(err)) { + DRM_ERROR("CT: fence %u failed; reported fence=%u\n", + fence, desc->fence); + + if (WARN_ON(desc->is_in_error)) { + /* Something went wrong with the messaging, try to reset + * the buffer and hope for the best + */ + guc_ct_buffer_desc_reset(desc); + err = -EPROTO; + } + } + + *status = desc->status; + return err; +} + +/** + * wait_for_ct_request_update - Wait for CT request state update. + * @req: pointer to pending request + * @status: placeholder for status + * + * For each sent request, Guc shall send bac CT response message. + * Our message handler will update status of tracked request once + * response message with given fence is received. Wait here and + * check for valid response status value. + * + * Return: + * * 0 response received (status is valid) + * * -ETIMEDOUT no response within hardcoded timeout + */ +static int wait_for_ct_request_update(struct ct_request *req, u32 *status) +{ + int err; + + /* + * Fast commands should complete in less than 10us, so sample quickly + * up to that length of time, then switch to a slower sleep-wait loop. + * No GuC command should ever take longer than 10ms. + */ +#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status)) + err = wait_for_us(done, 10); + if (err) + err = wait_for(done, 10); +#undef done + + if (unlikely(err)) + DRM_ERROR("CT: fence %u err %d\n", req->fence, err); + + *status = req->status; + return err; +} + +static int ctch_send(struct intel_guc_ct *ct, + struct intel_guc_ct_channel *ctch, + const u32 *action, + u32 len, + u32 *response_buf, + u32 response_buf_size, + u32 *status) +{ + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; + struct guc_ct_buffer_desc *desc = ctb->desc; + struct ct_request request; + unsigned long flags; + u32 fence; + int err; + + GEM_BUG_ON(!ctch->enabled); + GEM_BUG_ON(!len); + GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); + GEM_BUG_ON(!response_buf && response_buf_size); + + fence = ctch_get_next_fence(ctch); + request.fence = fence; + request.status = 0; + request.response_len = response_buf_size; + request.response_buf = response_buf; + + spin_lock_irqsave(&ct->lock, flags); + list_add_tail(&request.link, &ct->pending_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + err = ctb_write(ctb, action, len, fence, !!response_buf); + if (unlikely(err)) + goto unlink; + + intel_guc_notify(ct_to_guc(ct)); + + if (response_buf) + err = wait_for_ct_request_update(&request, status); + else + err = wait_for_ctb_desc_update(desc, fence, status); + if (unlikely(err)) + goto unlink; + + if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) { + err = -EIO; + goto unlink; + } + + if (response_buf) { + /* There shall be no data in the status */ + WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status)); + /* Return actual response len */ + err = request.response_len; + } else { + /* There shall be no response payload */ + WARN_ON(request.response_len); + /* Return data decoded from the status dword */ + err = INTEL_GUC_MSG_TO_DATA(*status); + } + +unlink: + spin_lock_irqsave(&ct->lock, flags); + list_del(&request.link); + spin_unlock_irqrestore(&ct->lock, flags); + + return err; +} + +/* + * Command Transport (CT) buffer based GuC send function. + */ +int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) +{ + struct intel_guc_ct *ct = &guc->ct; + struct intel_guc_ct_channel *ctch = &ct->host_channel; + u32 status = ~0; /* undefined */ + int ret; + + mutex_lock(&guc->send_mutex); + + ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size, + &status); + if (unlikely(ret < 0)) { + DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", + action[0], ret, status); + } else if (unlikely(ret)) { + CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n", + action[0], ret, ret); + } + + mutex_unlock(&guc->send_mutex); + return ret; +} + +static inline unsigned int ct_header_get_len(u32 header) +{ + return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK; +} + +static inline unsigned int ct_header_get_action(u32 header) +{ + return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK; +} + +static inline bool ct_header_is_response(u32 header) +{ + return !!(header & GUC_CT_MSG_IS_RESPONSE); +} + +static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data) +{ + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head = desc->head / 4; /* in dwords */ + u32 tail = desc->tail / 4; /* in dwords */ + u32 size = desc->size / 4; /* in dwords */ + u32 *cmds = ctb->cmds; + s32 available; /* in dwords */ + unsigned int len; + unsigned int i; + + GEM_BUG_ON(desc->size % 4); + GEM_BUG_ON(desc->head % 4); + GEM_BUG_ON(desc->tail % 4); + GEM_BUG_ON(tail >= size); + GEM_BUG_ON(head >= size); + + /* tail == head condition indicates empty */ + available = tail - head; + if (unlikely(available == 0)) + return -ENODATA; + + /* beware of buffer wrap case */ + if (unlikely(available < 0)) + available += size; + CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail); + GEM_BUG_ON(available < 0); + + data[0] = cmds[head]; + head = (head + 1) % size; + + /* message len with header */ + len = ct_header_get_len(data[0]) + 1; + if (unlikely(len > (u32)available)) { + DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n", + 4, data, + 4 * (head + available - 1 > size ? + size - head : available - 1), &cmds[head], + 4 * (head + available - 1 > size ? + available - 1 - size + head : 0), &cmds[0]); + return -EPROTO; + } + + for (i = 1; i < len; i++) { + data[i] = cmds[head]; + head = (head + 1) % size; + } + CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data); + + desc->head = head * 4; + return 0; +} + +/** + * DOC: CTB GuC to Host response + * + * Format of the CTB GuC to Host response message is as follows:: + * + * +------------+---------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | [3] | ... | [n-1] | + * +------------+---------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+---------+ + * | | 0 | 1 | 2 | ... | n | + * +============+=========+=========+=========+=========+=========+ + * | len >= 2 | FENCE | STATUS | response specific data | + * +------+-----+---------+---------+---------+---------+---------+ + * + * ^-----------------------len-----------------------^ + */ + +static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) +{ + u32 header = msg[0]; + u32 len = ct_header_get_len(header); + u32 msglen = len + 1; /* total message length including header */ + u32 fence; + u32 status; + u32 datalen; + struct ct_request *req; + bool found = false; + + GEM_BUG_ON(!ct_header_is_response(header)); + GEM_BUG_ON(!in_irq()); + + /* Response payload shall at least include fence and status */ + if (unlikely(len < 2)) { + DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + return -EPROTO; + } + + fence = msg[1]; + status = msg[2]; + datalen = len - 2; + + /* Format of the status follows RESPONSE message */ + if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { + DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + return -EPROTO; + } + + CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); + + spin_lock(&ct->lock); + list_for_each_entry(req, &ct->pending_requests, link) { + if (unlikely(fence != req->fence)) { + CT_DEBUG_DRIVER("CT: request %u awaits response\n", + req->fence); + continue; + } + if (unlikely(datalen > req->response_len)) { + DRM_ERROR("CT: response %u too long %*ph\n", + req->fence, 4 * msglen, msg); + datalen = 0; + } + if (datalen) + memcpy(req->response_buf, msg + 3, 4 * datalen); + req->response_len = datalen; + WRITE_ONCE(req->status, status); + found = true; + break; + } + spin_unlock(&ct->lock); + + if (!found) + DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); + return 0; +} + +static void ct_process_request(struct intel_guc_ct *ct, + u32 action, u32 len, const u32 *payload) +{ + struct intel_guc *guc = ct_to_guc(ct); + int ret; + + CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload); + + switch (action) { + case INTEL_GUC_ACTION_DEFAULT: + ret = intel_guc_to_host_process_recv_msg(guc, payload, len); + if (unlikely(ret)) + goto fail_unexpected; + break; + + default: +fail_unexpected: + DRM_ERROR("CT: unexpected request %x %*ph\n", + action, 4 * len, payload); + break; + } +} + +static bool ct_process_incoming_requests(struct intel_guc_ct *ct) +{ + unsigned long flags; + struct ct_incoming_request *request; + u32 header; + u32 *payload; + bool done; + + spin_lock_irqsave(&ct->lock, flags); + request = list_first_entry_or_null(&ct->incoming_requests, + struct ct_incoming_request, link); + if (request) + list_del(&request->link); + done = !!list_empty(&ct->incoming_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + if (!request) + return true; + + header = request->msg[0]; + payload = &request->msg[1]; + ct_process_request(ct, + ct_header_get_action(header), + ct_header_get_len(header), + payload); + + kfree(request); + return done; +} + +static void ct_incoming_request_worker_func(struct work_struct *w) +{ + struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker); + bool done; + + done = ct_process_incoming_requests(ct); + if (!done) + queue_work(system_unbound_wq, &ct->worker); +} + +/** + * DOC: CTB GuC to Host request + * + * Format of the CTB GuC to Host request message is as follows:: + * + * +------------+---------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | [3] | ... | [n-1] | + * +------------+---------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+---------+ + * | | 0 | 1 | 2 | ... | n | + * +============+=========+=========+=========+=========+=========+ + * | len | request specific data | + * +------+-----+---------+---------+---------+---------+---------+ + * + * ^-----------------------len-----------------------^ + */ + +static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) +{ + u32 header = msg[0]; + u32 len = ct_header_get_len(header); + u32 msglen = len + 1; /* total message length including header */ + struct ct_incoming_request *request; + unsigned long flags; + + GEM_BUG_ON(ct_header_is_response(header)); + + request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC); + if (unlikely(!request)) { + DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg); + return 0; /* XXX: -ENOMEM ? */ + } + memcpy(request->msg, msg, 4 * msglen); + + spin_lock_irqsave(&ct->lock, flags); + list_add_tail(&request->link, &ct->incoming_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + queue_work(system_unbound_wq, &ct->worker); + return 0; +} + +static void ct_process_host_channel(struct intel_guc_ct *ct) +{ + struct intel_guc_ct_channel *ctch = &ct->host_channel; + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV]; + u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ + int err = 0; + + if (!ctch->enabled) + return; + + do { + err = ctb_read(ctb, msg); + if (err) + break; + + if (ct_header_is_response(msg[0])) + err = ct_handle_response(ct, msg); + else + err = ct_handle_request(ct, msg); + } while (!err); + + if (GEM_WARN_ON(err == -EPROTO)) { + DRM_ERROR("CT: corrupted message detected!\n"); + ctb->desc->is_in_error = 1; + } +} + +/* + * When we're communicating with the GuC over CT, GuC uses events + * to notify us about new messages being posted on the RECV buffer. + */ +void intel_guc_to_host_event_handler_ct(struct intel_guc *guc) +{ + struct intel_guc_ct *ct = &guc->ct; + + ct_process_host_channel(ct); +} + +/** + * intel_guc_ct_init - Init CT communication + * @ct: pointer to CT struct + * + * Allocate memory required for communication via + * the CT channel. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_init(struct intel_guc_ct *ct) +{ + struct intel_guc *guc = ct_to_guc(ct); + struct intel_guc_ct_channel *ctch = &ct->host_channel; + int err; + + err = ctch_init(guc, ctch); + if (unlikely(err)) { + DRM_ERROR("CT: can't open channel %d; err=%d\n", + ctch->owner, err); + return err; + } + + GEM_BUG_ON(!ctch->vma); + return 0; +} + +/** + * intel_guc_ct_fini - Fini CT communication + * @ct: pointer to CT struct + * + * Deallocate memory required for communication via + * the CT channel. + */ +void intel_guc_ct_fini(struct intel_guc_ct *ct) +{ + struct intel_guc *guc = ct_to_guc(ct); + struct intel_guc_ct_channel *ctch = &ct->host_channel; + + ctch_fini(guc, ctch); +} + +/** + * intel_guc_ct_enable - Enable buffer based command transport. + * @ct: pointer to CT struct + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_enable(struct intel_guc_ct *ct) +{ + struct intel_guc *guc = ct_to_guc(ct); + struct intel_guc_ct_channel *ctch = &ct->host_channel; + + if (ctch->enabled) + return 0; + + return ctch_enable(guc, ctch); +} + +/** + * intel_guc_ct_disable - Disable buffer based command transport. + * @ct: pointer to CT struct + */ +void intel_guc_ct_disable(struct intel_guc_ct *ct) +{ + struct intel_guc *guc = ct_to_guc(ct); + struct intel_guc_ct_channel *ctch = &ct->host_channel; + + if (!ctch->enabled) + return; + + ctch_disable(guc, ctch); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h new file mode 100644 index 000000000000..7c24d83f5c24 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_CT_H_ +#define _INTEL_GUC_CT_H_ + +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include "intel_guc_fwif.h" + +struct i915_vma; +struct intel_guc; + +/** + * DOC: Command Transport (CT). + * + * Buffer based command transport is a replacement for MMIO based mechanism. + * It can be used to perform both host-2-guc and guc-to-host communication. + */ + +/** Represents single command transport buffer. + * + * A single command transport buffer consists of two parts, the header + * record (command transport buffer descriptor) and the actual buffer which + * holds the commands. + * + * @desc: pointer to the buffer descriptor + * @cmds: pointer to the commands buffer + */ +struct intel_guc_ct_buffer { + struct guc_ct_buffer_desc *desc; + u32 *cmds; +}; + +/** Represents pair of command transport buffers. + * + * Buffers go in pairs to allow bi-directional communication. + * To simplify the code we place both of them in the same vma. + * Buffers from the same pair must share unique owner id. + * + * @vma: pointer to the vma with pair of CT buffers + * @ctbs: buffers for sending(0) and receiving(1) commands + * @owner: unique identifier + * @next_fence: fence to be used with next send command + */ +struct intel_guc_ct_channel { + struct i915_vma *vma; + struct intel_guc_ct_buffer ctbs[2]; + u32 owner; + u32 next_fence; + bool enabled; +}; + +/** Holds all command transport channels. + * + * @host_channel: main channel used by the host + */ +struct intel_guc_ct { + struct intel_guc_ct_channel host_channel; + /* other channels are tbd */ + + /** @lock: protects pending requests list */ + spinlock_t lock; + + /** @pending_requests: list of requests waiting for response */ + struct list_head pending_requests; + + /** @incoming_requests: list of incoming requests */ + struct list_head incoming_requests; + + /** @worker: worker for handling incoming requests */ + struct work_struct worker; +}; + +void intel_guc_ct_init_early(struct intel_guc_ct *ct); +int intel_guc_ct_init(struct intel_guc_ct *ct); +void intel_guc_ct_fini(struct intel_guc_ct *ct); +int intel_guc_ct_enable(struct intel_guc_ct *ct); +void intel_guc_ct_disable(struct intel_guc_ct *ct); + +static inline void intel_guc_ct_stop(struct intel_guc_ct *ct) +{ + ct->host_channel.enabled = false; +} + +int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +void intel_guc_to_host_event_handler_ct(struct intel_guc *guc); + +#endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c new file mode 100644 index 000000000000..5528224448f6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + * + * Authors: + * Vinit Azad <vinit.azad@intel.com> + * Ben Widawsky <ben@bwidawsk.net> + * Dave Gordon <david.s.gordon@intel.com> + * Alex Dai <yu.dai@intel.com> + */ + +#include "gt/intel_gt.h" +#include "intel_guc_fw.h" +#include "i915_drv.h" + +/** + * intel_guc_fw_init_early() - initializes GuC firmware struct + * @guc: intel_guc struct + * + * On platforms with GuC selects firmware for uploading + */ +void intel_guc_fw_init_early(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915), + INTEL_INFO(i915)->platform, INTEL_REVID(i915)); +} + +static void guc_prepare_xfer(struct intel_uncore *uncore) +{ + u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_MIA_CACHING | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING; + + /* Must program this register before loading the ucode with DMA */ + intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags); + + if (IS_GEN9_LP(uncore->i915)) + intel_uncore_write(uncore, GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + else + intel_uncore_write(uncore, GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + + if (IS_GEN(uncore->i915, 9)) { + /* DOP Clock Gating Enable for GuC clocks */ + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); + + /* allows for 5us (in 10ns units) before GT can go to RC6 */ + intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); + } +} + +/* Copy RSA signature from the fw image to HW for verification */ +static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) +{ + u32 rsa[UOS_RSA_SCRATCH_COUNT]; + size_t copied; + int i; + + copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa)); + GEM_BUG_ON(copied < sizeof(rsa)); + + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) + intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]); +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for() + * loop below. + */ +static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) +{ + u32 val = intel_uncore_read(uncore, GUC_STATUS); + u32 uk_val = val & GS_UKERNEL_MASK; + + *status = val; + return (uk_val == GS_UKERNEL_READY) || + ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE)); +} + +static int guc_wait_ucode(struct intel_uncore *uncore) +{ + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms, so a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver may decide to reset the GuC and + * attempt the ucode load again if this happens.) + */ + ret = wait_for(guc_ready(uncore, &status), 100); + DRM_DEBUG_DRIVER("GuC status %#x\n", status); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + DRM_ERROR("GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { + DRM_ERROR("GuC firmware exception. EIP: %#x\n", + intel_uncore_read(uncore, SOFT_SCRATCH(13))); + ret = -ENXIO; + } + + return ret; +} + +/** + * intel_guc_fw_upload() - load GuC uCode to device + * @guc: intel_guc structure + * + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. + * + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_guc_fw_upload(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_uncore *uncore = gt->uncore; + int ret; + + guc_prepare_xfer(uncore); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded via MMIO. + */ + guc_xfer_rsa(&guc->fw, uncore); + + /* + * Current uCode expects the code to be loaded at 8k; locations below + * this are used for the stack. + */ + ret = intel_uc_fw_upload(&guc->fw, gt, 0x2000, UOS_MOVE); + if (ret) + goto out; + + ret = guc_wait_ucode(uncore); + if (ret) + goto out; + + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_RUNNING); + return 0; + +out: + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL); + return ret; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h new file mode 100644 index 000000000000..b5ab639d7259 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2017-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_FW_H_ +#define _INTEL_GUC_FW_H_ + +struct intel_guc; + +void intel_guc_fw_init_early(struct intel_guc *guc); +int intel_guc_fw_upload(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h new file mode 100644 index 000000000000..1d3cdd67ca2f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_FWIF_H +#define _INTEL_GUC_FWIF_H + +#include <linux/bits.h> +#include <linux/compiler.h> +#include <linux/types.h> + +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 + +#define GUC_MAX_STAGE_DESCRIPTORS 1024 +#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS + +#define GUC_RENDER_ENGINE 0 +#define GUC_VIDEO_ENGINE 1 +#define GUC_BLITTER_ENGINE 2 +#define GUC_VIDEOENHANCE_ENGINE 3 +#define GUC_VIDEO_ENGINE2 4 +#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) + +#define GUC_MAX_ENGINE_CLASSES 5 +#define GUC_MAX_INSTANCES_PER_CLASS 16 + +#define GUC_DOORBELL_INVALID 256 + +#define GUC_DB_SIZE (PAGE_SIZE) +#define GUC_WQ_SIZE (PAGE_SIZE * 2) + +/* Work queue item header definitions */ +#define WQ_STATUS_ACTIVE 1 +#define WQ_STATUS_SUSPENDED 2 +#define WQ_STATUS_CMD_ERROR 3 +#define WQ_STATUS_ENGINE_ID_NOT_USED 4 +#define WQ_STATUS_SUSPENDED_FROM_RESET 5 +#define WQ_TYPE_SHIFT 0 +#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT) +#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT) +#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT) +#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT) +#define WQ_TARGET_SHIFT 10 +#define WQ_LEN_SHIFT 16 +#define WQ_NO_WCFLUSH_WAIT (1 << 27) +#define WQ_PRESENT_WORKLOAD (1 << 28) + +#define WQ_RING_TAIL_SHIFT 20 +#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */ +#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT) + +#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) +#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) +#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2) +#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3) +#define GUC_STAGE_DESC_ATTR_RESET BIT(4) +#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5) +#define GUC_STAGE_DESC_ATTR_PCH BIT(6) +#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) + +/* New GuC control data */ +#define GUC_CTL_CTXINFO 0 +#define GUC_CTL_CTXNUM_IN16_SHIFT 0 +#define GUC_CTL_BASE_ADDR_SHIFT 12 + +#define GUC_CTL_LOG_PARAMS 1 +#define GUC_LOG_VALID (1 << 0) +#define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) +#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) +#define GUC_LOG_CRASH_SHIFT 4 +#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) +#define GUC_LOG_DPC_SHIFT 6 +#define GUC_LOG_DPC_MASK (0x7 << GUC_LOG_DPC_SHIFT) +#define GUC_LOG_ISR_SHIFT 9 +#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT) +#define GUC_LOG_BUF_ADDR_SHIFT 12 + +#define GUC_CTL_WA 2 +#define GUC_CTL_FEATURE 3 +#define GUC_CTL_DISABLE_SCHEDULER (1 << 14) + +#define GUC_CTL_DEBUG 4 +#define GUC_LOG_VERBOSITY_SHIFT 0 +#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) +/* Verbosity range-check limits, without the shift */ +#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY_MAX 3 +#define GUC_LOG_VERBOSITY_MASK 0x0000000f +#define GUC_LOG_DESTINATION_MASK (3 << 4) +#define GUC_LOG_DISABLED (1 << 6) +#define GUC_PROFILE_ENABLED (1 << 7) + +#define GUC_CTL_ADS 5 +#define GUC_ADS_ADDR_SHIFT 1 +#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) + +#define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */ + +/* Work item for submitting workloads into work queue of GuC. */ +struct guc_wq_item { + u32 header; + u32 context_desc; + u32 submit_element_info; + u32 fence_id; +} __packed; + +struct guc_process_desc { + u32 stage_id; + u64 db_base_addr; + u32 head; + u32 tail; + u32 error_offset; + u64 wq_base_addr; + u32 wq_size_bytes; + u32 wq_status; + u32 engine_presence; + u32 priority; + u32 reserved[30]; +} __packed; + +/* engine id and context id is packed into guc_execlist_context.context_id*/ +#define GUC_ELC_CTXID_OFFSET 0 +#define GUC_ELC_ENGINE_OFFSET 29 + +/* The execlist context including software and HW information */ +struct guc_execlist_context { + u32 context_desc; + u32 context_id; + u32 ring_status; + u32 ring_lrca; + u32 ring_begin; + u32 ring_end; + u32 ring_next_free_location; + u32 ring_current_tail_pointer_value; + u8 engine_state_submit_value; + u8 engine_state_wait_value; + u16 pagefault_count; + u16 engine_submit_queue_count; +} __packed; + +/* + * This structure describes a stage set arranged for a particular communication + * between uKernel (GuC) and Driver (KMD). Technically, this is known as a + * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" + * to avoid confusion with all the other things already named "context" in the + * driver. A static pool of these descriptors are stored inside a GEM object + * (stage_desc_pool) which is held for the entire lifetime of our interaction + * with the GuC, being allocated before the GuC is loaded with its firmware. + */ +struct guc_stage_desc { + u32 sched_common_area; + u32 stage_id; + u32 pas_id; + u8 engines_used; + u64 db_trigger_cpu; + u32 db_trigger_uk; + u64 db_trigger_phy; + u16 db_id; + + struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM]; + + u8 attribute; + + u32 priority; + + u32 wq_sampled_tail_offset; + u32 wq_total_submit_enqueues; + + u32 process_desc; + u32 wq_addr; + u32 wq_size; + + u32 engine_presence; + + u8 engine_suspended; + + u8 reserved0[3]; + u64 reserved1[1]; + + u64 desc_private; +} __packed; + +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is stored in the buffer descriptor. + * Buffer descriptor holds tail and head fields that represents active data + * stream. The tail field is updated by the data producer (sender), and head + * field is updated by the data consumer (receiver):: + * + * +------------+ + * | DESCRIPTOR | +=================+============+========+ + * +============+ | | MESSAGE(s) | | + * | address |--------->+=================+============+========+ + * +------------+ + * | head | ^-----head--------^ + * +------------+ + * | tail | ^---------tail-----------------^ + * +------------+ + * | size | ^---------------size--------------------^ + * +------------+ + * + * Each message in data stream starts with the single u32 treated as a header, + * followed by optional set of u32 data that makes message specific payload:: + * + * +------------+---------+---------+---------+ + * | MESSAGE | + * +------------+---------+---------+---------+ + * | msg[0] | [1] | ... | [n-1] | + * +------------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+ + * | | 0 | ... | n | + * +======+=====+=========+=========+=========+ + * | 31:16| code| | | | + * +------+-----+ | | | + * | 15:5|flags| | | | + * +------+-----+ | | | + * | 4:0| len| | | | + * +------+-----+---------+---------+---------+ + * + * ^-------------len-------------^ + * + * The message header consists of: + * + * - **len**, indicates length of the message payload (in u32) + * - **code**, indicates message code + * - **flags**, holds various bits to control message handling + */ + +/* + * Describes single command transport buffer. + * Used by both guc-master and clients. + */ +struct guc_ct_buffer_desc { + u32 addr; /* gfx address */ + u64 host_private; /* host private data */ + u32 size; /* size in bytes */ + u32 head; /* offset updated by GuC*/ + u32 tail; /* offset updated by owner */ + u32 is_in_error; /* error indicator */ + u32 fence; /* fence updated by GuC */ + u32 status; /* status updated by GuC */ + u32 owner; /* id of the channel owner */ + u32 owner_sub_id; /* owner-defined field for extra tracking */ + u32 reserved[5]; +} __packed; + +/* Type of command transport buffer */ +#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u +#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u + +/* + * Definition of the command transport message header (DW0) + * + * bit[4..0] message len (in dwords) + * bit[7..5] reserved + * bit[8] response (G2H only) + * bit[8] write fence to desc (H2G only) + * bit[9] write status to H2G buff (H2G only) + * bit[10] send status back via G2H (H2G only) + * bit[15..11] reserved + * bit[31..16] action code + */ +#define GUC_CT_MSG_LEN_SHIFT 0 +#define GUC_CT_MSG_LEN_MASK 0x1F +#define GUC_CT_MSG_IS_RESPONSE (1 << 8) +#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) +#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) +#define GUC_CT_MSG_SEND_STATUS (1 << 10) +#define GUC_CT_MSG_ACTION_SHIFT 16 +#define GUC_CT_MSG_ACTION_MASK 0xFFFF + +#define GUC_FORCEWAKE_RENDER (1 << 0) +#define GUC_FORCEWAKE_MEDIA (1 << 1) + +#define GUC_POWER_UNSPECIFIED 0 +#define GUC_POWER_D0 1 +#define GUC_POWER_D1 2 +#define GUC_POWER_D2 3 +#define GUC_POWER_D3 4 + +/* Scheduling policy settings */ + +/* Reset engine upon preempt failure */ +#define POLICY_RESET_ENGINE (1<<0) +/* Preempt to idle on quantum expiry */ +#define POLICY_PREEMPT_TO_IDLE (1<<1) + +#define POLICY_MAX_NUM_WI 15 +#define POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 +#define POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 +#define POLICY_DEFAULT_PREEMPTION_TIME_US 500000 +#define POLICY_DEFAULT_FAULT_TIME_US 250000 + +struct guc_policy { + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to completed before issuing a + * reset. (in micro seconds). */ + u32 preemption_time; + /* How much time to allow to run after the first fault is observed. + * Then preempt afterwards. (in micro seconds) */ + u32 fault_time; + u32 policy_flags; + u32 reserved[8]; +} __packed; + +struct guc_policies { + struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINE_CLASSES]; + u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; + /* In micro seconds. How much time to allow before DPC processing is + * called back via interrupt (to prevent DPC queue drain starving). + * Typically 1000s of micro seconds (example only, not granularity). */ + u32 dpc_promote_time; + + /* Must be set to take these new values. */ + u32 is_valid; + + /* Max number of WIs to process per call. A large value may keep CS + * idle. */ + u32 max_num_work_items; + + u32 reserved[4]; +} __packed; + +/* GuC MMIO reg state struct */ + + +#define GUC_REGSET_MAX_REGISTERS 64 +#define GUC_S3_SAVE_SPACE_PAGES 10 + +struct guc_mmio_reg { + u32 offset; + u32 value; + u32 flags; +#define GUC_REGSET_MASKED (1 << 0) +} __packed; + +struct guc_mmio_regset { + struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS]; + u32 values_valid; + u32 number_of_registers; +} __packed; + +/* GuC register sets */ +struct guc_mmio_reg_state { + struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 reserved[98]; +} __packed; + +/* HW info */ +struct guc_gt_system_info { + u32 slice_enabled; + u32 rcs_enabled; + u32 reserved0; + u32 bcs_enabled; + u32 vdbox_enable_mask; + u32 vdbox_sfc_support_mask; + u32 vebox_enable_mask; + u32 reserved[9]; +} __packed; + +/* Clients info */ +struct guc_ct_pool_entry { + struct guc_ct_buffer_desc desc; + u32 reserved[7]; +} __packed; + +#define GUC_CT_POOL_SIZE 2 + +struct guc_clients_info { + u32 clients_num; + u32 reserved0[13]; + u32 ct_pool_addr; + u32 ct_pool_count; + u32 reserved[4]; +} __packed; + +/* GuC Additional Data Struct */ +struct guc_ads { + u32 reg_state_addr; + u32 reg_state_buffer; + u32 scheduler_policies; + u32 gt_system_info; + u32 clients_info; + u32 control_data; + u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; + u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; + u32 reserved[16]; +} __packed; + +/* GuC logging structures */ + +enum guc_log_buffer_type { + GUC_ISR_LOG_BUFFER, + GUC_DPC_LOG_BUFFER, + GUC_CRASH_DUMP_LOG_BUFFER, + GUC_MAX_LOG_BUFFER +}; + +/** + * struct guc_log_buffer_state - GuC log buffer state + * + * Below state structure is used for coordination of retrieval of GuC firmware + * logs. Separate state is maintained for each log buffer type. + * read_ptr points to the location where i915 read last in log buffer and + * is read only for GuC firmware. write_ptr is incremented by GuC with number + * of bytes written for each log entry and is read only for i915. + * When any type of log buffer becomes half full, GuC sends a flush interrupt. + * GuC firmware expects that while it is writing to 2nd half of the buffer, + * first half would get consumed by Host and then get a flush completed + * acknowledgment from Host, so that it does not end up doing any overwrite + * causing loss of logs. So when buffer gets half filled & i915 has requested + * for interrupt, GuC will set flush_to_file field, set the sampled_write_ptr + * to the value of write_ptr and raise the interrupt. + * On receiving the interrupt i915 should read the buffer, clear flush_to_file + * field and also update read_ptr with the value of sample_write_ptr, before + * sending an acknowledgment to GuC. marker & version fields are for internal + * usage of GuC and opaque to i915. buffer_full_cnt field is incremented every + * time GuC detects the log buffer overflow. + */ +struct guc_log_buffer_state { + u32 marker[2]; + u32 read_ptr; + u32 write_ptr; + u32 size; + u32 sampled_write_ptr; + union { + struct { + u32 flush_to_file:1; + u32 buffer_full_cnt:4; + u32 reserved:27; + }; + u32 flags; + }; + u32 version; +} __packed; + +struct guc_ctx_report { + u32 report_return_status; + u32 reserved1[64]; + u32 affected_count; + u32 reserved2[2]; +} __packed; + +/* GuC Shared Context Data Struct */ +struct guc_shared_ctx_data { + u32 addr_of_last_preempted_data_low; + u32 addr_of_last_preempted_data_high; + u32 addr_of_last_preempted_data_high_tmp; + u32 padding; + u32 is_mapped_to_proxy; + u32 proxy_ctx_id; + u32 engine_reset_ctx_id; + u32 media_reset_count; + u32 reserved1[8]; + u32 uk_last_ctx_switch_reason; + u32 was_reset; + u32 lrca_gpu_addr; + u64 execlist_ctx; + u32 reserved2[66]; + struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; +} __packed; + +/** + * DOC: MMIO based communication + * + * The MMIO based communication between Host and GuC uses software scratch + * registers, where first register holds data treated as message header, + * and other registers are used to hold message payload. + * + * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 8 parameters and the GuC FW + * itself uses an 8-element array to store the H2G message. + * + * +-----------+---------+---------+---------+ + * | MMIO[0] | MMIO[1] | ... | MMIO[n] | + * +-----------+---------+---------+---------+ + * | header | optional payload | + * +======+====+=========+=========+=========+ + * | 31:28|type| | | | + * +------+----+ | | | + * | 27:16|data| | | | + * +------+----+ | | | + * | 15:0|code| | | | + * +------+----+---------+---------+---------+ + * + * The message header consists of: + * + * - **type**, indicates message type + * - **code**, indicates message code, is specific for **type** + * - **data**, indicates message data, optional, depends on **code** + * + * The following message **types** are supported: + * + * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code + * must be priovided in **code** field. Optional action specific parameters + * can be provided in remaining payload registers or **data** field. + * + * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, + * action response status will be provided in **code** field. Optional + * response data can be returned in remaining payload registers or **data** + * field. + */ + +#define GUC_MAX_MMIO_MSG_LEN 8 + +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +#define __INTEL_GUC_MSG_GET(T, m) \ + (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) +#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) +#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) +#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#define __INTEL_GUC_MSG_TYPE_IS(T, m) \ + (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) +#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) +#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) + +enum intel_guc_action { + INTEL_GUC_ACTION_DEFAULT = 0x0, + INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, + INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, + INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, + INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, + INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, + INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, + INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, + INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x3005, + INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, + INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, + INTEL_GUC_ACTION_LIMIT +}; + +enum intel_guc_preempt_options { + INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum intel_guc_report_status { + INTEL_GUC_REPORT_STATUS_UNKNOWN = 0x0, + INTEL_GUC_REPORT_STATUS_ACKED = 0x1, + INTEL_GUC_REPORT_STATUS_ERROR = 0x2, + INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + +enum intel_guc_sleep_state_status { + INTEL_GUC_SLEEP_STATE_SUCCESS = 0x1, + INTEL_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, + INTEL_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 +#define INTEL_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 +}; + +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +enum intel_guc_response_status { + INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, +}; + +#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ + (typecheck(u32, (m)) && \ + ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ + ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \ + (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT))) + +/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ +enum intel_guc_recv_message { + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER = BIT(3) +}; + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c new file mode 100644 index 000000000000..36332064de9c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -0,0 +1,646 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "gt/intel_gt.h" +#include "i915_drv.h" +#include "i915_memcpy.h" +#include "intel_guc_log.h" + +static void guc_log_capture_logs(struct intel_guc_log *log); + +/** + * DOC: GuC firmware log + * + * Firmware log is enabled by setting i915.guc_log_level to the positive level. + * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from + * i915_guc_load_status will print out firmware loading status and scratch + * registers value. + */ + +static int guc_action_flush_log_complete(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static int guc_action_flush_log(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH, + 0 + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static int guc_action_control_log(struct intel_guc *guc, bool enable, + bool default_logging, u32 verbosity) +{ + u32 action[] = { + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, + (enable ? GUC_LOG_CONTROL_LOGGING_ENABLED : 0) | + (verbosity << GUC_LOG_CONTROL_VERBOSITY_SHIFT) | + (default_logging ? GUC_LOG_CONTROL_DEFAULT_LOGGING : 0) + }; + + GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX); + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) +{ + return container_of(log, struct intel_guc, log); +} + +static void guc_log_enable_flush_events(struct intel_guc_log *log) +{ + intel_guc_enable_msg(log_to_guc(log), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); +} + +static void guc_log_disable_flush_events(struct intel_guc_log *log) +{ + intel_guc_disable_msg(log_to_guc(log), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); +} + +/* + * Sub buffer switch callback. Called whenever relay has to switch to a new + * sub buffer, relay stays on the same sub buffer if 0 is returned. + */ +static int subbuf_start_callback(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) +{ + /* + * Use no-overwrite mode by default, where relay will stop accepting + * new data if there are no empty sub buffers left. + * There is no strict synchronization enforced by relay between Consumer + * and Producer. In overwrite mode, there is a possibility of getting + * inconsistent/garbled data, the producer could be writing on to the + * same sub buffer from which Consumer is reading. This can't be avoided + * unless Consumer is fast enough and can always run in tandem with + * Producer. + */ + if (relay_buf_full(buf)) + return 0; + + return 1; +} + +/* + * file_create() callback. Creates relay file in debugfs. + */ +static struct dentry *create_buf_file_callback(const char *filename, + struct dentry *parent, + umode_t mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + /* + * This to enable the use of a single buffer for the relay channel and + * correspondingly have a single file exposed to User, through which + * it can collect the logs in order without any post-processing. + * Need to set 'is_global' even if parent is NULL for early logging. + */ + *is_global = 1; + + if (!parent) + return NULL; + + buf_file = debugfs_create_file(filename, mode, + parent, buf, &relay_file_operations); + if (IS_ERR(buf_file)) + return NULL; + + return buf_file; +} + +/* + * file_remove() default callback. Removes relay file in debugfs. + */ +static int remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +/* relay channel callbacks */ +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_callback, + .create_buf_file = create_buf_file_callback, + .remove_buf_file = remove_buf_file_callback, +}; + +static void guc_move_to_next_buf(struct intel_guc_log *log) +{ + /* + * Make sure the updates made in the sub buffer are visible when + * Consumer sees the following update to offset inside the sub buffer. + */ + smp_wmb(); + + /* All data has been written, so now move the offset of sub buffer. */ + relay_reserve(log->relay.channel, log->vma->obj->base.size); + + /* Switch to the next sub buffer */ + relay_flush(log->relay.channel); +} + +static void *guc_get_write_buffer(struct intel_guc_log *log) +{ + /* + * Just get the base address of a new sub buffer and copy data into it + * ourselves. NULL will be returned in no-overwrite mode, if all sub + * buffers are full. Could have used the relay_write() to indirectly + * copy the data, but that would have been bit convoluted, as we need to + * write to only certain locations inside a sub buffer which cannot be + * done without using relay_reserve() along with relay_write(). So its + * better to use relay_reserve() alone. + */ + return relay_reserve(log->relay.channel, 0); +} + +static bool guc_check_log_buf_overflow(struct intel_guc_log *log, + enum guc_log_buffer_type type, + unsigned int full_cnt) +{ + unsigned int prev_full_cnt = log->stats[type].sampled_overflow; + bool overflow = false; + + if (full_cnt != prev_full_cnt) { + overflow = true; + + log->stats[type].overflow = full_cnt; + log->stats[type].sampled_overflow += full_cnt - prev_full_cnt; + + if (full_cnt < prev_full_cnt) { + /* buffer_full_cnt is a 4 bit counter */ + log->stats[type].sampled_overflow += 16; + } + + dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev, + "GuC log buffer overflow\n"); + } + + return overflow; +} + +static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) +{ + switch (type) { + case GUC_ISR_LOG_BUFFER: + return ISR_BUFFER_SIZE; + case GUC_DPC_LOG_BUFFER: + return DPC_BUFFER_SIZE; + case GUC_CRASH_DUMP_LOG_BUFFER: + return CRASH_BUFFER_SIZE; + default: + MISSING_CASE(type); + } + + return 0; +} + +static void guc_read_update_log_buffer(struct intel_guc_log *log) +{ + unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; + struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; + struct guc_log_buffer_state log_buf_state_local; + enum guc_log_buffer_type type; + void *src_data, *dst_data; + bool new_overflow; + + mutex_lock(&log->relay.lock); + + if (WARN_ON(!intel_guc_log_relay_enabled(log))) + goto out_unlock; + + /* Get the pointer to shared GuC log buffer */ + log_buf_state = src_data = log->relay.buf_addr; + + /* Get the pointer to local buffer to store the logs */ + log_buf_snapshot_state = dst_data = guc_get_write_buffer(log); + + if (unlikely(!log_buf_snapshot_state)) { + /* + * Used rate limited to avoid deluge of messages, logs might be + * getting consumed by User at a slow rate. + */ + DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + log->relay.full_count++; + + goto out_unlock; + } + + /* Actual logs are present from the 2nd page */ + src_data += PAGE_SIZE; + dst_data += PAGE_SIZE; + + for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + /* + * Make a copy of the state structure, inside GuC log buffer + * (which is uncached mapped), on the stack to avoid reading + * from it multiple times. + */ + memcpy(&log_buf_state_local, log_buf_state, + sizeof(struct guc_log_buffer_state)); + buffer_size = guc_get_log_buffer_size(type); + read_offset = log_buf_state_local.read_ptr; + write_offset = log_buf_state_local.sampled_write_ptr; + full_cnt = log_buf_state_local.buffer_full_cnt; + + /* Bookkeeping stuff */ + log->stats[type].flush += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); + + /* Update the state of shared log buffer */ + log_buf_state->read_ptr = write_offset; + log_buf_state->flush_to_file = 0; + log_buf_state++; + + /* First copy the state structure in snapshot buffer */ + memcpy(log_buf_snapshot_state, &log_buf_state_local, + sizeof(struct guc_log_buffer_state)); + + /* + * The write pointer could have been updated by GuC firmware, + * after sending the flush interrupt to Host, for consistency + * set write pointer value to same value of sampled_write_ptr + * in the snapshot buffer. + */ + log_buf_snapshot_state->write_ptr = write_offset; + log_buf_snapshot_state++; + + /* Now copy the actual logs. */ + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + DRM_ERROR("invalid log buffer state\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + /* Just copy the newly written data */ + if (read_offset > write_offset) { + i915_memcpy_from_wc(dst_data, src_data, write_offset); + bytes_to_copy = buffer_size - read_offset; + } else { + bytes_to_copy = write_offset - read_offset; + } + i915_memcpy_from_wc(dst_data + read_offset, + src_data + read_offset, bytes_to_copy); + + src_data += buffer_size; + dst_data += buffer_size; + } + + guc_move_to_next_buf(log); + +out_unlock: + mutex_unlock(&log->relay.lock); +} + +static void capture_logs_work(struct work_struct *work) +{ + struct intel_guc_log *log = + container_of(work, struct intel_guc_log, relay.flush_work); + + guc_log_capture_logs(log); +} + +static int guc_log_map(struct intel_guc_log *log) +{ + void *vaddr; + + lockdep_assert_held(&log->relay.lock); + + if (!log->vma) + return -ENODEV; + + /* + * Create a WC (Uncached for read) vmalloc mapping of log + * buffer pages, so that we can directly get the data + * (up-to-date) from memory. + */ + vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + log->relay.buf_addr = vaddr; + + return 0; +} + +static void guc_log_unmap(struct intel_guc_log *log) +{ + lockdep_assert_held(&log->relay.lock); + + i915_gem_object_unpin_map(log->vma->obj); + log->relay.buf_addr = NULL; +} + +void intel_guc_log_init_early(struct intel_guc_log *log) +{ + mutex_init(&log->relay.lock); + INIT_WORK(&log->relay.flush_work, capture_logs_work); +} + +static int guc_log_relay_create(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + struct rchan *guc_log_relay_chan; + size_t n_subbufs, subbuf_size; + int ret; + + lockdep_assert_held(&log->relay.lock); + GEM_BUG_ON(!log->vma); + + /* Keep the size of sub buffers same as shared log buffer */ + subbuf_size = log->vma->size; + + /* + * Store up to 8 snapshots, which is large enough to buffer sufficient + * boot time logs and provides enough leeway to User, in terms of + * latency, for consuming the logs from relay. Also doesn't take + * up too much memory. + */ + n_subbufs = 8; + + guc_log_relay_chan = relay_open("guc_log", + dev_priv->drm.primary->debugfs_root, + subbuf_size, n_subbufs, + &relay_callbacks, dev_priv); + if (!guc_log_relay_chan) { + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); + + ret = -ENOMEM; + return ret; + } + + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); + log->relay.channel = guc_log_relay_chan; + + return 0; +} + +static void guc_log_relay_destroy(struct intel_guc_log *log) +{ + lockdep_assert_held(&log->relay.lock); + + relay_close(log->relay.channel); + log->relay.channel = NULL; +} + +static void guc_log_capture_logs(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + intel_wakeref_t wakeref; + + guc_read_update_log_buffer(log); + + /* + * Generally device is expected to be active only at this + * time, so get/put should be really quick. + */ + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) + guc_action_flush_log_complete(guc); +} + +static u32 __get_default_log_level(struct intel_guc_log *log) +{ + /* A negative value means "use platform/config default" */ + if (i915_modparams.guc_log_level < 0) { + return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? + GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_NON_VERBOSE; + } + + if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { + DRM_WARN("Incompatible option detected: %s=%d, %s!\n", + "guc_log_level", i915_modparams.guc_log_level, + "verbosity too high"); + return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? + GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED; + } + + GEM_BUG_ON(i915_modparams.guc_log_level < GUC_LOG_LEVEL_DISABLED); + GEM_BUG_ON(i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX); + return i915_modparams.guc_log_level; +} + +int intel_guc_log_create(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct i915_vma *vma; + u32 guc_log_size; + int ret; + + GEM_BUG_ON(log->vma); + + /* + * GuC Log buffer Layout + * + * +===============================+ 00B + * | Crash dump state header | + * +-------------------------------+ 32B + * | DPC state header | + * +-------------------------------+ 64B + * | ISR state header | + * +-------------------------------+ 96B + * | | + * +===============================+ PAGE_SIZE (4KB) + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | DPC logs | + * +===============================+ + DPC_SIZE + * | ISR logs | + * +===============================+ + ISR_SIZE + */ + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DPC_BUFFER_SIZE + + ISR_BUFFER_SIZE; + + vma = intel_guc_allocate_vma(guc, guc_log_size); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + log->vma = vma; + + log->level = __get_default_log_level(log); + DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n", + log->level, enableddisabled(log->level), + yesno(GUC_LOG_LEVEL_IS_VERBOSE(log->level)), + GUC_LOG_LEVEL_TO_VERBOSITY(log->level)); + + return 0; + +err: + DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret); + return ret; +} + +void intel_guc_log_destroy(struct intel_guc_log *log) +{ + i915_vma_unpin_and_release(&log->vma, 0); +} + +int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + intel_wakeref_t wakeref; + int ret = 0; + + BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0); + GEM_BUG_ON(!log->vma); + + /* + * GuC is recognizing log levels starting from 0 to max, we're using 0 + * as indication that logging should be disabled. + */ + if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX) + return -EINVAL; + + mutex_lock(&dev_priv->drm.struct_mutex); + + if (log->level == level) + goto out_unlock; + + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) + ret = guc_action_control_log(guc, + GUC_LOG_LEVEL_IS_VERBOSE(level), + GUC_LOG_LEVEL_IS_ENABLED(level), + GUC_LOG_LEVEL_TO_VERBOSITY(level)); + if (ret) { + DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); + goto out_unlock; + } + + log->level = level; + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + return ret; +} + +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +{ + return log->relay.buf_addr; +} + +int intel_guc_log_relay_open(struct intel_guc_log *log) +{ + int ret; + + if (!log->vma) + return -ENODEV; + + mutex_lock(&log->relay.lock); + + if (intel_guc_log_relay_enabled(log)) { + ret = -EEXIST; + goto out_unlock; + } + + /* + * We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (!i915_has_memcpy_from_wc()) { + ret = -ENXIO; + goto out_unlock; + } + + ret = guc_log_relay_create(log); + if (ret) + goto out_unlock; + + ret = guc_log_map(log); + if (ret) + goto out_relay; + + mutex_unlock(&log->relay.lock); + + guc_log_enable_flush_events(log); + + /* + * When GuC is logging without us relaying to userspace, we're ignoring + * the flush notification. This means that we need to unconditionally + * flush on relay enabling, since GuC only notifies us once. + */ + queue_work(system_highpri_wq, &log->relay.flush_work); + + return 0; + +out_relay: + guc_log_relay_destroy(log); +out_unlock: + mutex_unlock(&log->relay.lock); + + return ret; +} + +void intel_guc_log_relay_flush(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + intel_wakeref_t wakeref; + + /* + * Before initiating the forceful flush, wait for any pending/ongoing + * flush to complete otherwise forceful flush may not actually happen. + */ + flush_work(&log->relay.flush_work); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + guc_action_flush_log(guc); + + /* GuC would have updated log buffer by now, so capture it */ + guc_log_capture_logs(log); +} + +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + guc_log_disable_flush_events(log); + intel_synchronize_irq(i915); + + flush_work(&log->relay.flush_work); + + mutex_lock(&log->relay.lock); + GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + guc_log_unmap(log); + guc_log_relay_destroy(log); + mutex_unlock(&log->relay.lock); +} + +void intel_guc_log_handle_flush_event(struct intel_guc_log *log) +{ + queue_work(system_highpri_wq, &log->relay.flush_work); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h new file mode 100644 index 000000000000..6f764879acb1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_LOG_H_ +#define _INTEL_GUC_LOG_H_ + +#include <linux/mutex.h> +#include <linux/relay.h> +#include <linux/workqueue.h> + +#include "intel_guc_fwif.h" +#include "i915_gem.h" + +struct intel_guc; + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +#define CRASH_BUFFER_SIZE SZ_2M +#define DPC_BUFFER_SIZE SZ_8M +#define ISR_BUFFER_SIZE SZ_8M +#else +#define CRASH_BUFFER_SIZE SZ_8K +#define DPC_BUFFER_SIZE SZ_32K +#define ISR_BUFFER_SIZE SZ_32K +#endif + +/* + * While we're using plain log level in i915, GuC controls are much more... + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual + * log enabling, and separate bit for default logging - which "conveniently" + * ignores the enable bit. + */ +#define GUC_LOG_LEVEL_DISABLED 0 +#define GUC_LOG_LEVEL_NON_VERBOSE 1 +#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) +#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) +#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ + typeof(x) _x = (x); \ + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ +}) +#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) +#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) + +struct intel_guc_log { + u32 level; + struct i915_vma *vma; + struct { + void *buf_addr; + struct work_struct flush_work; + struct rchan *channel; + struct mutex lock; + u32 full_count; + } relay; + /* logging related stats */ + struct { + u32 sampled_overflow; + u32 overflow; + u32 flush; + } stats[GUC_MAX_LOG_BUFFER]; +}; + +void intel_guc_log_init_early(struct intel_guc_log *log); +int intel_guc_log_create(struct intel_guc_log *log); +void intel_guc_log_destroy(struct intel_guc_log *log); + +int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +int intel_guc_log_relay_open(struct intel_guc_log *log); +void intel_guc_log_relay_flush(struct intel_guc_log *log); +void intel_guc_log_relay_close(struct intel_guc_log *log); + +void intel_guc_log_handle_flush_event(struct intel_guc_log *log); + +static inline u32 intel_guc_log_get_level(struct intel_guc_log *log) +{ + return log->level; +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h new file mode 100644 index 000000000000..edf194d23c6b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_REG_H_ +#define _INTEL_GUC_REG_H_ + +#include <linux/compiler.h> +#include <linux/types.h> + +#include "i915_reg.h" + +/* Definitions of GuC H/W registers, bits, etc */ + +#define GUC_STATUS _MMIO(0xc000) +#define GS_RESET_SHIFT 0 +#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) +#define GS_BOOTROM_SHIFT 1 +#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) +#define GS_UKERNEL_SHIFT 8 +#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) +#define GS_UKERNEL_LAPIC_DONE (0x30 << GS_UKERNEL_SHIFT) +#define GS_UKERNEL_DPC_ERROR (0x60 << GS_UKERNEL_SHIFT) +#define GS_UKERNEL_EXCEPTION (0x70 << GS_UKERNEL_SHIFT) +#define GS_UKERNEL_READY (0xF0 << GS_UKERNEL_SHIFT) +#define GS_MIA_SHIFT 16 +#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) +#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) +#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) +#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) +#define GS_AUTH_STATUS_SHIFT 30 +#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) + +#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) +#define SOFT_SCRATCH_COUNT 16 + +#define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4) +#define GEN11_SOFT_SCRATCH_COUNT 4 + +#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH_COUNT 64 + +#define DMA_ADDR_0_LOW _MMIO(0xc300) +#define DMA_ADDR_0_HIGH _MMIO(0xc304) +#define DMA_ADDR_1_LOW _MMIO(0xc308) +#define DMA_ADDR_1_HIGH _MMIO(0xc30c) +#define DMA_ADDRESS_SPACE_WOPCM (7 << 16) +#define DMA_ADDRESS_SPACE_GTT (8 << 16) +#define DMA_COPY_SIZE _MMIO(0xc310) +#define DMA_CTRL _MMIO(0xc314) +#define HUC_UKERNEL (1<<9) +#define UOS_MOVE (1<<4) +#define START_DMA (1<<0) +#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define GUC_WOPCM_OFFSET_VALID (1<<0) +#define HUC_LOADING_AGENT_VCR (0<<1) +#define HUC_LOADING_AGENT_GUC (1<<1) +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) +#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) + +#define HUC_STATUS2 _MMIO(0xD3B0) +#define HUC_FW_VERIFIED (1<<7) + +#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) +#define HUC_LOAD_SUCCESSFUL (1 << 0) + +#define GUC_WOPCM_SIZE _MMIO(0xc050) +#define GUC_WOPCM_SIZE_LOCKED (1<<0) +#define GUC_WOPCM_SIZE_SHIFT 12 +#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) + +#define GEN8_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9_GT_PM_CONFIG _MMIO(0x13816c) +#define GT_DOORBELL_ENABLE (1<<0) + +#define GEN8_GTCR _MMIO(0x4274) +#define GEN8_GTCR_INVALIDATE (1<<0) + +#define GUC_ARAT_C6DIS _MMIO(0xA178) + +#define GUC_SHIM_CONTROL _MMIO(0xc064) +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) +#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) +#define GUC_ENABLE_MIA_CACHING (1<<2) +#define GUC_GEN10_MSGCH_ENABLE (1<<4) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) +#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) +#define GUC_GEN10_SHIM_WC_ENABLE (1<<21) + +#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) +#define GUC_SEND_TRIGGER (1<<0) +#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) + +#define GUC_NUM_DOORBELLS 256 + +/* format of the HW-monitored doorbell cacheline */ +struct guc_doorbell_info { + u32 db_status; +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 + + u32 cookie; + u32 reserved[14]; +} __packed; + +#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define GEN8_DRB_VALID (1<<0) +#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) + +#define DE_GUCRMR _MMIO(0x44054) + +#define GUC_BCS_RCS_IER _MMIO(0xC550) +#define GUC_VCS2_VCS1_IER _MMIO(0xC554) +#define GUC_WD_VECS_IER _MMIO(0xC558) +#define GUC_PM_P24C_IER _MMIO(0xC55C) + +/* GuC Interrupt Vector */ +#define GUC_INTR_GUC2HOST BIT(15) +#define GUC_INTR_EXEC_ERROR BIT(14) +#define GUC_INTR_DISPLAY_EVENT BIT(13) +#define GUC_INTR_SEM_SIG BIT(12) +#define GUC_INTR_IOMMU2GUC BIT(11) +#define GUC_INTR_DOORBELL_RANG BIT(10) +#define GUC_INTR_DMA_DONE BIT(9) +#define GUC_INTR_FATAL_ERROR BIT(8) +#define GUC_INTR_NOTIF_ERROR BIT(7) +#define GUC_INTR_SW_INT_6 BIT(6) +#define GUC_INTR_SW_INT_5 BIT(5) +#define GUC_INTR_SW_INT_4 BIT(4) +#define GUC_INTR_SW_INT_3 BIT(3) +#define GUC_INTR_SW_INT_2 BIT(2) +#define GUC_INTR_SW_INT_1 BIT(1) +#define GUC_INTR_SW_INT_0 BIT(0) + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c new file mode 100644 index 000000000000..f325d3dd564f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -0,0 +1,1184 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014 Intel Corporation + */ + +#include <linux/circ_buf.h> + +#include "gem/i915_gem_context.h" + +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_lrc_reg.h" +#include "intel_guc_submission.h" + +#include "i915_drv.h" +#include "i915_trace.h" + +enum { + GUC_PREEMPT_NONE = 0, + GUC_PREEMPT_INPROGRESS, + GUC_PREEMPT_FINISHED, +}; +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +#define GUC_PREEMPT_BREADCRUMB_BYTES \ + (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) + +/** + * DOC: GuC-based command submission + * + * GuC client: + * A intel_guc_client refers to a submission path through GuC. Currently, there + * is only one client, which is charged with all submissions to the GuC. This + * struct is the owner of a doorbell, a process descriptor and a workqueue (all + * of them inside a single gem object that contains all required pages for these + * elements). + * + * GuC stage descriptor: + * During initialization, the driver allocates a static pool of 1024 such + * descriptors, and shares them with the GuC. + * Currently, there exists a 1:1 mapping between a intel_guc_client and a + * guc_stage_desc (via the client's stage_id), so effectively only one + * gets used. This stage descriptor lets the GuC know about the doorbell, + * workqueue and process descriptor. Theoretically, it also lets the GuC + * know about our HW contexts (context ID, etc...), but we actually + * employ a kind of submission where the GuC uses the LRCA sent via the work + * item instead (the single guc_stage_desc associated to execbuf client + * contains information about the default kernel context only, but this is + * essentially unused). This is called a "proxy" submission. + * + * The Scratch registers: + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then + * triggers an interrupt on the GuC via another register write (0xC4C8). + * Firmware writes a success/fail code back to the action register after + * processes the request. The kernel driver polls waiting for this update and + * then proceeds. + * See intel_guc_send() + * + * Doorbells: + * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) + * mapped into process space. + * + * Work Items: + * There are several types of work items that the host may place into a + * workqueue, each with its own requirements and limitations. Currently only + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which + * represents in-order queue. The kernel driver packs ring tail pointer and an + * ELSP context descriptor dword into Work Item. + * See guc_add_request() + * + */ + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static inline bool is_high_priority(struct intel_guc_client *client) +{ + return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || + client->priority == GUC_CLIENT_PRIORITY_HIGH); +} + +static int reserve_doorbell(struct intel_guc_client *client) +{ + unsigned long offset; + unsigned long end; + u16 id; + + GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); + + /* + * The bitmap tracks which doorbell registers are currently in use. + * It is split into two halves; the first half is used for normal + * priority contexts, the second half for high-priority ones. + */ + offset = 0; + end = GUC_NUM_DOORBELLS / 2; + if (is_high_priority(client)) { + offset = end; + end += offset; + } + + id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset); + if (id == end) + return -ENOSPC; + + __set_bit(id, client->guc->doorbell_bitmap); + client->doorbell_id = id; + DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", + client->stage_id, yesno(is_high_priority(client)), + id); + return 0; +} + +static bool has_doorbell(struct intel_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} + +static void unreserve_doorbell(struct intel_guc_client *client) +{ + GEM_BUG_ON(!has_doorbell(client)); + + __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); + client->doorbell_id = GUC_DOORBELL_INVALID; +} + +/* + * Tell the GuC to allocate or deallocate a specific doorbell + */ + +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) +{ + u32 action[] = { + INTEL_GUC_ACTION_ALLOCATE_DOORBELL, + stage_id + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) +{ + u32 action[] = { + INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, + stage_id + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) +{ + struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; + + return &base[client->stage_id]; +} + +/* + * Initialise, update, or clear doorbell data shared with the GuC + * + * These functions modify shared data and so need access to the mapped + * client object which contains the page being used for the doorbell + */ + +static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id) +{ + struct guc_stage_desc *desc; + + /* Update the GuC's idea of the doorbell ID */ + desc = __get_stage_desc(client); + desc->db_id = new_id; +} + +static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) +{ + return client->vaddr + client->doorbell_offset; +} + +static bool __doorbell_valid(struct intel_guc *guc, u16 db_id) +{ + struct intel_uncore *uncore = guc_to_gt(guc)->uncore; + + GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); + return intel_uncore_read(uncore, GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID; +} + +static void __init_doorbell(struct intel_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + + doorbell = __get_doorbell(client); + doorbell->db_status = GUC_DOORBELL_ENABLED; + doorbell->cookie = 0; +} + +static void __fini_doorbell(struct intel_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + u16 db_id = client->doorbell_id; + + doorbell = __get_doorbell(client); + doorbell->db_status = GUC_DOORBELL_DISABLED; + + /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit + * to go to zero after updating db_status before we call the GuC to + * release the doorbell + */ + if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10)) + WARN_ONCE(true, "Doorbell never became invalid after disable\n"); +} + +static int create_doorbell(struct intel_guc_client *client) +{ + int ret; + + if (WARN_ON(!has_doorbell(client))) + return -ENODEV; /* internal setup error, should never happen */ + + __update_doorbell_desc(client, client->doorbell_id); + __init_doorbell(client); + + ret = __guc_allocate_doorbell(client->guc, client->stage_id); + if (ret) { + __fini_doorbell(client); + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n", + client->stage_id, ret); + return ret; + } + + return 0; +} + +static int destroy_doorbell(struct intel_guc_client *client) +{ + int ret; + + GEM_BUG_ON(!has_doorbell(client)); + + __fini_doorbell(client); + ret = __guc_deallocate_doorbell(client->guc, client->stage_id); + if (ret) + DRM_ERROR("Couldn't destroy client %u doorbell: %d\n", + client->stage_id, ret); + + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + + return ret; +} + +static unsigned long __select_cacheline(struct intel_guc *guc) +{ + unsigned long offset; + + /* Doorbell uses a single cache line within a page */ + offset = offset_in_page(guc->db_cacheline); + + /* Moving to next cache line to reduce contention */ + guc->db_cacheline += cache_line_size(); + + DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", + offset, guc->db_cacheline, cache_line_size()); + return offset; +} + +static inline struct guc_process_desc * +__get_process_desc(struct intel_guc_client *client) +{ + return client->vaddr + client->proc_desc_offset; +} + +/* + * Initialise the process descriptor shared with the GuC firmware. + */ +static void guc_proc_desc_init(struct intel_guc_client *client) +{ + struct guc_process_desc *desc; + + desc = memset(__get_process_desc(client), 0, sizeof(*desc)); + + /* + * XXX: pDoorbell and WQVBaseAddress are pointers in process address + * space for ring3 clients (set them as in mmap_ioctl) or kernel + * space for kernel clients (map on demand instead? May make debug + * easier to have it mapped). + */ + desc->wq_base_addr = 0; + desc->db_base_addr = 0; + + desc->stage_id = client->stage_id; + desc->wq_size_bytes = GUC_WQ_SIZE; + desc->wq_status = WQ_STATUS_ACTIVE; + desc->priority = client->priority; +} + +static void guc_proc_desc_fini(struct intel_guc_client *client) +{ + struct guc_process_desc *desc; + + desc = __get_process_desc(client); + memset(desc, 0, sizeof(*desc)); +} + +static int guc_stage_desc_pool_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, + PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS)); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + guc->stage_desc_pool = vma; + guc->stage_desc_pool_vaddr = vaddr; + ida_init(&guc->stage_ids); + + return 0; +} + +static void guc_stage_desc_pool_destroy(struct intel_guc *guc) +{ + ida_destroy(&guc->stage_ids); + i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); +} + +/* + * Initialise/clear the stage descriptor shared with the GuC firmware. + * + * This descriptor tells the GuC where (in GGTT space) to find the important + * data structures relating to this client (doorbell, process descriptor, + * write queue, etc). + */ +static void guc_stage_desc_init(struct intel_guc_client *client) +{ + struct intel_guc *guc = client->guc; + struct guc_stage_desc *desc; + u32 gfx_addr; + + desc = __get_stage_desc(client); + memset(desc, 0, sizeof(*desc)); + + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | + GUC_STAGE_DESC_ATTR_KERNEL; + if (is_high_priority(client)) + desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; + desc->stage_id = client->stage_id; + desc->priority = client->priority; + desc->db_id = client->doorbell_id; + + /* + * The doorbell, process descriptor, and workqueue are all parts + * of the client object, which the GuC will reference via the GGTT + */ + gfx_addr = intel_guc_ggtt_offset(guc, client->vma); + desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + + client->doorbell_offset; + desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); + desc->db_trigger_uk = gfx_addr + client->doorbell_offset; + desc->process_desc = gfx_addr + client->proc_desc_offset; + desc->wq_addr = gfx_addr + GUC_DB_SIZE; + desc->wq_size = GUC_WQ_SIZE; + + desc->desc_private = ptr_to_u64(client); +} + +static void guc_stage_desc_fini(struct intel_guc_client *client) +{ + struct guc_stage_desc *desc; + + desc = __get_stage_desc(client); + memset(desc, 0, sizeof(*desc)); +} + +/* Construct a Work Item and append it to the GuC's Work Queue */ +static void guc_wq_item_append(struct intel_guc_client *client, + u32 target_engine, u32 context_desc, + u32 ring_tail, u32 fence_id) +{ + /* wqi_len is in DWords, and does not include the one-word header */ + const size_t wqi_size = sizeof(struct guc_wq_item); + const u32 wqi_len = wqi_size / sizeof(u32) - 1; + struct guc_process_desc *desc = __get_process_desc(client); + struct guc_wq_item *wqi; + u32 wq_off; + + lockdep_assert_held(&client->wq_lock); + + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we + * should not have the case where structure wqi is across page, neither + * wrapped to the beginning. This simplifies the implementation below. + * + * XXX: if not the case, we need save data to a temp wqi and copy it to + * workqueue buffer dw by dw. + */ + BUILD_BUG_ON(wqi_size != 16); + + /* We expect the WQ to be active if we're appending items to it */ + GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE); + + /* Free space is guaranteed. */ + wq_off = READ_ONCE(desc->tail); + GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), + GUC_WQ_SIZE) < wqi_size); + GEM_BUG_ON(wq_off & (wqi_size - 1)); + + /* WQ starts from the page after doorbell / process_desc */ + wqi = client->vaddr + wq_off + GUC_DB_SIZE; + + if (I915_SELFTEST_ONLY(client->use_nop_wqi)) { + wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT); + } else { + /* Now fill in the 4-word work queue item */ + wqi->header = WQ_TYPE_INORDER | + (wqi_len << WQ_LEN_SHIFT) | + (target_engine << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + wqi->context_desc = context_desc; + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); + wqi->fence_id = fence_id; + } + + /* Make the update visible to GuC */ + WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); +} + +static void guc_ring_doorbell(struct intel_guc_client *client) +{ + struct guc_doorbell_info *db; + u32 cookie; + + lockdep_assert_held(&client->wq_lock); + + /* pointer of current doorbell cacheline */ + db = __get_doorbell(client); + + /* + * We're not expecting the doorbell cookie to change behind our back, + * we also need to treat 0 as a reserved value. + */ + cookie = READ_ONCE(db->cookie); + WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie); + + /* XXX: doorbell was lost and need to acquire it again */ + GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); +} + +static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) +{ + struct intel_guc_client *client = guc->execbuf_client; + struct intel_engine_cs *engine = rq->engine; + u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); + u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); + + guc_wq_item_append(client, engine->guc_id, ctx_desc, + ring_tail, rq->fence.seqno); + guc_ring_doorbell(client); +} + +/* + * When we're doing submissions using regular execlists backend, writing to + * ELSP from CPU side is enough to make sure that writes to ringbuffer pages + * pinned in mappable aperture portion of GGTT are visible to command streamer. + * Writes done by GuC on our behalf are not guaranteeing such ordering, + * therefore, to ensure the flush, we're issuing a POSTING READ. + */ +static void flush_ggtt_writes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + + if (i915_vma_is_map_and_fenceable(vma)) + intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS); +} + +static void guc_submit(struct intel_engine_cs *engine, + struct i915_request **out, + struct i915_request **end) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc_client *client = guc->execbuf_client; + + spin_lock(&client->wq_lock); + + do { + struct i915_request *rq = *out++; + + flush_ggtt_writes(rq->ring->vma); + guc_add_request(guc, rq); + } while (out != end); + + spin_unlock(&client->wq_lock); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority | __NO_PREEMPTION; +} + +static struct i915_request *schedule_in(struct i915_request *rq, int idx) +{ + trace_i915_request_in(rq, idx); + + /* + * Currently we are not tracking the rq->context being inflight + * (ce->inflight = rq->engine). It is only used by the execlists + * backend at the moment, a similar counting strategy would be + * required if we generalise the inflight tracking. + */ + + intel_gt_pm_get(rq->engine->gt); + return i915_request_get(rq); +} + +static void schedule_out(struct i915_request *rq) +{ + trace_i915_request_out(rq); + + intel_gt_pm_put(rq->engine->gt); + i915_request_put(rq); +} + +static void __guc_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request **first = execlists->inflight; + struct i915_request ** const last_port = first + execlists->port_mask; + struct i915_request *last = first[0]; + struct i915_request **port; + bool submit = false; + struct rb_node *rb; + + lockdep_assert_held(&engine->active.lock); + + if (last) { + if (*++first) + return; + + last = NULL; + } + + /* + * We write directly into the execlists->inflight queue and don't use + * the execlists->pending queue, as we don't have a distinct switch + * event. + */ + port = first; + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + struct i915_request *rq, *rn; + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + if (last && rq->hw_context != last->hw_context) { + if (port == last_port) + goto done; + + *port = schedule_in(last, + port - execlists->inflight); + port++; + } + + list_del_init(&rq->sched.link); + __i915_request_submit(rq); + submit = true; + last = rq; + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } +done: + execlists->queue_priority_hint = + rb ? to_priolist(rb)->priority : INT_MIN; + if (submit) { + *port = schedule_in(last, port - execlists->inflight); + *++port = NULL; + guc_submit(engine, first, port); + } + execlists->active = execlists->inflight; +} + +static void guc_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request **port, *rq; + unsigned long flags; + + spin_lock_irqsave(&engine->active.lock, flags); + + for (port = execlists->inflight; (rq = *port); port++) { + if (!i915_request_completed(rq)) + break; + + schedule_out(rq); + } + if (port != execlists->inflight) { + int idx = port - execlists->inflight; + int rem = ARRAY_SIZE(execlists->inflight) - idx; + memmove(execlists->inflight, port, rem * sizeof(*port)); + } + + __guc_dequeue(engine); + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void guc_reset_prepare(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + GEM_TRACE("%s\n", engine->name); + + /* + * Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its execlists->tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the execlists->tasklet until the reset is over + * prevents the race. + */ + __tasklet_disable_sync_once(&execlists->tasklet); +} + +static void +cancel_port_requests(struct intel_engine_execlists * const execlists) +{ + struct i915_request * const *port, *rq; + + /* Note we are only using the inflight and not the pending queue */ + + for (port = execlists->active; (rq = *port); port++) + schedule_out(rq); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); +} + +static void guc_reset(struct intel_engine_cs *engine, bool stalled) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq; + unsigned long flags; + + spin_lock_irqsave(&engine->active.lock, flags); + + cancel_port_requests(execlists); + + /* Push back any incomplete requests for replay after the reset. */ + rq = execlists_unwind_incomplete_requests(execlists); + if (!rq) + goto out_unlock; + + if (!i915_request_started(rq)) + stalled = false; + + __i915_request_reset(rq, stalled); + intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); + +out_unlock: + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void guc_cancel_requests(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq, *rn; + struct rb_node *rb; + unsigned long flags; + + GEM_TRACE("%s\n", engine->name); + + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + spin_lock_irqsave(&engine->active.lock, flags); + + /* Cancel the requests on the HW and clear the ELSP tracker. */ + cancel_port_requests(execlists); + + /* Mark all executing requests as skipped. */ + list_for_each_entry(rq, &engine->active.requests, sched.link) { + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); + + i915_request_mark_complete(rq); + } + + /* Flush the queued requests to the timeline list (for retiring). */ + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + list_del_init(&rq->sched.link); + __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + execlists->queue_priority_hint = INT_MIN; + execlists->queue = RB_ROOT_CACHED; + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void guc_reset_finish(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + if (__tasklet_enable(&execlists->tasklet)) + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&execlists->tasklet); + + GEM_TRACE("%s: depth->%d\n", engine->name, + atomic_read(&execlists->tasklet.count)); +} + +/* + * Everything below here is concerned with setup & teardown, and is + * therefore not part of the somewhat time-critical batch-submission + * path of guc_submit() above. + */ + +/* Check that a doorbell register is in the expected state */ +static bool doorbell_ok(struct intel_guc *guc, u16 db_id) +{ + bool valid; + + GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); + + valid = __doorbell_valid(guc, db_id); + + if (test_bit(db_id, guc->doorbell_bitmap) == valid) + return true; + + DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n", + db_id, yesno(valid)); + + return false; +} + +static bool guc_verify_doorbells(struct intel_guc *guc) +{ + bool doorbells_ok = true; + u16 db_id; + + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) + if (!doorbell_ok(guc, db_id)) + doorbells_ok = false; + + return doorbells_ok; +} + +/** + * guc_client_alloc() - Allocate an intel_guc_client + * @guc: the intel_guc structure + * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW + * The kernel client to replace ExecList submission is created with + * NORMAL priority. Priority of a client for scheduler can be HIGH, + * while a preemption context can use CRITICAL. + * + * Return: An intel_guc_client object if success, else NULL. + */ +static struct intel_guc_client * +guc_client_alloc(struct intel_guc *guc, u32 priority) +{ + struct intel_guc_client *client; + struct i915_vma *vma; + void *vaddr; + int ret; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return ERR_PTR(-ENOMEM); + + client->guc = guc; + client->priority = priority; + client->doorbell_id = GUC_DOORBELL_INVALID; + spin_lock_init(&client->wq_lock); + + ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, + GFP_KERNEL); + if (ret < 0) + goto err_client; + + client->stage_id = ret; + + /* The first page is doorbell/proc_desc. Two followed pages are wq. */ + vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_id; + } + + /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ + client->vma = vma; + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_vma; + } + client->vaddr = vaddr; + + ret = reserve_doorbell(client); + if (ret) + goto err_vaddr; + + client->doorbell_offset = __select_cacheline(guc); + + /* + * Since the doorbell only requires a single cacheline, we can save + * space by putting the application process descriptor in the same + * page. Use the half of the page that doesn't include the doorbell. + */ + if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) + client->proc_desc_offset = 0; + else + client->proc_desc_offset = (GUC_DB_SIZE / 2); + + DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n", + priority, client, client->stage_id); + DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", + client->doorbell_id, client->doorbell_offset); + + return client; + +err_vaddr: + i915_gem_object_unpin_map(client->vma->obj); +err_vma: + i915_vma_unpin_and_release(&client->vma, 0); +err_id: + ida_simple_remove(&guc->stage_ids, client->stage_id); +err_client: + kfree(client); + return ERR_PTR(ret); +} + +static void guc_client_free(struct intel_guc_client *client) +{ + unreserve_doorbell(client); + i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); + ida_simple_remove(&client->guc->stage_ids, client->stage_id); + kfree(client); +} + +static inline bool ctx_save_restore_disabled(struct intel_context *ce) +{ + u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1]; + +#define SR_DISABLED \ + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \ + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) + + return (sr & SR_DISABLED) == SR_DISABLED; + +#undef SR_DISABLED +} + +static int guc_clients_create(struct intel_guc *guc) +{ + struct intel_guc_client *client; + + GEM_BUG_ON(guc->execbuf_client); + + client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for submission!\n"); + return PTR_ERR(client); + } + guc->execbuf_client = client; + + return 0; +} + +static void guc_clients_destroy(struct intel_guc *guc) +{ + struct intel_guc_client *client; + + client = fetch_and_zero(&guc->execbuf_client); + if (client) + guc_client_free(client); +} + +static int __guc_client_enable(struct intel_guc_client *client) +{ + int ret; + + guc_proc_desc_init(client); + guc_stage_desc_init(client); + + ret = create_doorbell(client); + if (ret) + goto fail; + + return 0; + +fail: + guc_stage_desc_fini(client); + guc_proc_desc_fini(client); + return ret; +} + +static void __guc_client_disable(struct intel_guc_client *client) +{ + /* + * By the time we're here, GuC may have already been reset. if that is + * the case, instead of trying (in vain) to communicate with it, let's + * just cleanup the doorbell HW and our internal state. + */ + if (intel_guc_is_running(client->guc)) + destroy_doorbell(client); + else + __fini_doorbell(client); + + guc_stage_desc_fini(client); + guc_proc_desc_fini(client); +} + +static int guc_clients_enable(struct intel_guc *guc) +{ + return __guc_client_enable(guc->execbuf_client); +} + +static void guc_clients_disable(struct intel_guc *guc) +{ + if (guc->execbuf_client) + __guc_client_disable(guc->execbuf_client); +} + +/* + * Set up the memory resources to be shared with the GuC (via the GGTT) + * at firmware loading time. + */ +int intel_guc_submission_init(struct intel_guc *guc) +{ + int ret; + + if (guc->stage_desc_pool) + return 0; + + ret = guc_stage_desc_pool_create(guc); + if (ret) + return ret; + /* + * Keep static analysers happy, let them know that we allocated the + * vma after testing that it didn't exist earlier. + */ + GEM_BUG_ON(!guc->stage_desc_pool); + + WARN_ON(!guc_verify_doorbells(guc)); + ret = guc_clients_create(guc); + if (ret) + goto err_pool; + + return 0; + +err_pool: + guc_stage_desc_pool_destroy(guc); + return ret; +} + +void intel_guc_submission_fini(struct intel_guc *guc) +{ + guc_clients_destroy(guc); + WARN_ON(!guc_verify_doorbells(guc)); + + if (guc->stage_desc_pool) + guc_stage_desc_pool_destroy(guc); +} + +static void guc_interrupts_capture(struct intel_gt *gt) +{ + struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* tell all command streamers to forward interrupts (but not vblank) + * to GuC + */ + irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, gt->i915, id) + ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); + + /* route USER_INTERRUPT to Host, all others are sent to GuC. */ + irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + /* These three registers have the same bit definitions */ + intel_uncore_write(uncore, GUC_BCS_RCS_IER, ~irqs); + intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, ~irqs); + intel_uncore_write(uncore, GUC_WD_VECS_IER, ~irqs); + + /* + * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all + * (unmasked) PM interrupts to the GuC. All other bits of this + * register *disable* generation of a specific interrupt. + * + * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when + * writing to the PM interrupt mask register, i.e. interrupts + * that must not be disabled. + * + * If the GuC is handling these interrupts, then we must not let + * the PM code disable ANY interrupt that the GuC is expecting. + * So for each ENABLED (0) bit in this register, we must SET the + * bit in pm_intrmsk_mbz so that it's left enabled for the GuC. + * GuC needs ARAT expired interrupt unmasked hence it is set in + * pm_intrmsk_mbz. + * + * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will + * result in the register bit being left SET! + */ + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +static void guc_interrupts_release(struct intel_gt *gt) +{ + struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* + * tell all command streamers NOT to forward interrupts or vblank + * to GuC. + */ + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); + irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, gt->i915, id) + ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); + + /* route all GT interrupts to the host */ + intel_uncore_write(uncore, GUC_BCS_RCS_IER, 0); + intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, 0); + intel_uncore_write(uncore, GUC_WD_VECS_IER, 0); + + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; +} + +static void guc_set_default_submission(struct intel_engine_cs *engine) +{ + /* + * We inherit a bunch of functions from execlists that we'd like + * to keep using: + * + * engine->submit_request = execlists_submit_request; + * engine->cancel_requests = execlists_cancel_requests; + * engine->schedule = execlists_schedule; + * + * But we need to override the actual submission backend in order + * to talk to the GuC. + */ + intel_execlists_set_default_submission(engine); + + engine->execlists.tasklet.func = guc_submission_tasklet; + + /* do not use execlists park/unpark */ + engine->park = engine->unpark = NULL; + + engine->reset.prepare = guc_reset_prepare; + engine->reset.reset = guc_reset; + engine->reset.finish = guc_reset_finish; + + engine->cancel_requests = guc_cancel_requests; + + engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; + engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; + + /* + * For the breadcrumb irq to work we need the interrupts to stay + * enabled. However, on all platforms on which we'll have support for + * GuC submission we don't allow disabling the interrupts at runtime, so + * we're always safe with the current flow. + */ + GEM_BUG_ON(engine->irq_enable || engine->irq_disable); +} + +int intel_guc_submission_enable(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + err = i915_inject_load_error(gt->i915, -ENXIO); + if (err) + return err; + + /* + * We're using GuC work items for submitting work through GuC. Since + * we're coalescing multiple requests from a single context into a + * single work item prior to assigning it to execlist_port, we can + * never have more work items than the total number of ports (for all + * engines). The GuC firmware is controlling the HEAD of work queue, + * and it is guaranteed that it will remove the work item from the + * queue before our request is completed. + */ + BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) * + sizeof(struct guc_wq_item) * + I915_NUM_ENGINES > GUC_WQ_SIZE); + + GEM_BUG_ON(!guc->execbuf_client); + + err = guc_clients_enable(guc); + if (err) + return err; + + /* Take over from manual control of ELSP (execlists) */ + guc_interrupts_capture(gt); + + for_each_engine(engine, gt->i915, id) { + engine->set_default_submission = guc_set_default_submission; + engine->set_default_submission(engine); + } + + return 0; +} + +void intel_guc_submission_disable(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + GEM_BUG_ON(gt->awake); /* GT should be parked first */ + + guc_interrupts_release(gt); + guc_clients_disable(guc); +} + +static bool __guc_submission_support(struct intel_guc *guc) +{ + /* XXX: GuC submission is unavailable for now */ + return false; + + if (!intel_guc_is_supported(guc)) + return false; + + return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; +} + +void intel_guc_submission_init_early(struct intel_guc *guc) +{ + guc->submission_supported = __guc_submission_support(guc); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h new file mode 100644 index 000000000000..54d716828352 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_GUC_SUBMISSION_H_ +#define _INTEL_GUC_SUBMISSION_H_ + +#include <linux/spinlock.h> + +#include "gt/intel_engine_types.h" + +#include "i915_gem.h" +#include "i915_selftest.h" + +struct drm_i915_private; + +/* + * This structure primarily describes the GEM object shared with the GuC. + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with + * the GuC, being allocated before the GuC is loaded with its firmware. + * Because there's no way to update the address used by the GuC after + * initialisation, the shared object must stay pinned into the GGTT as + * long as the GuC is in use. We also keep the first page (only) mapped + * into kernel address space, as it includes shared data that must be + * updated on every request submission. + * + * The single GEM object described here is actually made up of several + * separate areas, as far as the GuC is concerned. The first page (kept + * kmap'd) includes the "process descriptor" which holds sequence data for + * the doorbell, and one cacheline which actually *is* the doorbell; a + * write to this will "ring the doorbell" (i.e. send an interrupt to the + * GuC). The subsequent pages of the client object constitute the work + * queue (a circular array of work items), again described in the process + * descriptor. Work queue pages are mapped momentarily as required. + */ +struct intel_guc_client { + struct i915_vma *vma; + void *vaddr; + struct intel_guc *guc; + + /* bitmap of (host) engine ids */ + u32 priority; + u32 stage_id; + u32 proc_desc_offset; + + u16 doorbell_id; + unsigned long doorbell_offset; + + /* Protects GuC client's WQ access */ + spinlock_t wq_lock; + + /* For testing purposes, use nop WQ items instead of real ones */ + I915_SELFTEST_DECLARE(bool use_nop_wqi); +}; + +void intel_guc_submission_init_early(struct intel_guc *guc); +int intel_guc_submission_init(struct intel_guc *guc); +int intel_guc_submission_enable(struct intel_guc *guc); +void intel_guc_submission_disable(struct intel_guc *guc); +void intel_guc_submission_fini(struct intel_guc *guc); +int intel_guc_preempt_work_create(struct intel_guc *guc); +void intel_guc_preempt_work_destroy(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c new file mode 100644 index 000000000000..d4625c97b4f9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include <linux/types.h> + +#include "gt/intel_gt.h" +#include "intel_huc.h" +#include "i915_drv.h" + +void intel_huc_init_early(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_gt(huc)->i915; + + intel_huc_fw_init_early(huc); + + if (INTEL_GEN(i915) >= 11) { + huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; + huc->status.mask = HUC_LOAD_SUCCESSFUL; + huc->status.value = HUC_LOAD_SUCCESSFUL; + } else { + huc->status.reg = HUC_STATUS2; + huc->status.mask = HUC_FW_VERIFIED; + huc->status.value = HUC_FW_VERIFIED; + } +} + +static int intel_huc_rsa_data_create(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + struct intel_guc *guc = >->uc.guc; + struct i915_vma *vma; + size_t copied; + void *vaddr; + int err; + + err = i915_inject_load_error(gt->i915, -ENXIO); + if (err) + return err; + + /* + * HuC firmware will sit above GUC_GGTT_TOP and will not map + * through GTT. Unfortunately, this means GuC cannot perform + * the HuC auth. as the rsa offset now falls within the GuC + * inaccessible range. We resort to perma-pinning an additional + * vma within the accessible range that only contains the rsa + * signature. The GuC can use this extra pinning to perform + * the authentication since its GGTT offset will be GuC + * accessible. + */ + GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE); + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); + GEM_BUG_ON(copied < huc->fw.rsa_size); + + i915_gem_object_unpin_map(vma->obj); + + huc->rsa_data = vma; + + return 0; +} + +static void intel_huc_rsa_data_destroy(struct intel_huc *huc) +{ + i915_vma_unpin_and_release(&huc->rsa_data, 0); +} + +int intel_huc_init(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_gt(huc)->i915; + int err; + + err = intel_uc_fw_init(&huc->fw); + if (err) + goto out; + + /* + * HuC firmware image is outside GuC accessible range. + * Copy the RSA signature out of the image into + * a perma-pinned region set aside for it + */ + err = intel_huc_rsa_data_create(huc); + if (err) + goto out_fini; + + return 0; + +out_fini: + intel_uc_fw_fini(&huc->fw); +out: + intel_uc_fw_cleanup_fetch(&huc->fw); + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err); + return err; +} + +void intel_huc_fini(struct intel_huc *huc) +{ + if (!intel_uc_fw_is_available(&huc->fw)) + return; + + intel_huc_rsa_data_destroy(huc); + intel_uc_fw_fini(&huc->fw); +} + +/** + * intel_huc_auth() - Authenticate HuC uCode + * @huc: intel_huc structure + * + * Called after HuC and GuC firmware loading during intel_uc_init_hw(). + * + * This function pins HuC firmware image object into GGTT. + * Then it invokes GuC action to authenticate passing the offset to RSA + * signature through intel_guc_auth_huc(). It then waits for 50ms for + * firmware verification ACK and unpins the object. + */ +int intel_huc_auth(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + struct intel_guc *guc = >->uc.guc; + int ret; + + GEM_BUG_ON(intel_huc_is_authenticated(huc)); + + if (!intel_uc_fw_is_loaded(&huc->fw)) + return -ENOEXEC; + + ret = i915_inject_load_error(gt->i915, -ENXIO); + if (ret) + goto fail; + + ret = intel_guc_auth_huc(guc, + intel_guc_ggtt_offset(guc, huc->rsa_data)); + if (ret) { + DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); + goto fail; + } + + /* Check authentication status, it should be done by now */ + ret = __intel_wait_for_register(gt->uncore, + huc->status.reg, + huc->status.mask, + huc->status.value, + 2, 50, NULL); + if (ret) { + DRM_ERROR("HuC: Firmware not verified %d\n", ret); + goto fail; + } + + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + return 0; + +fail: + i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL); + return ret; +} + +/** + * intel_huc_check_status() - check HuC status + * @huc: intel_huc structure + * + * This function reads status register to verify if HuC + * firmware was successfully loaded. + * + * Returns: 1 if HuC firmware is loaded and verified, + * 0 if HuC firmware is not loaded and -ENODEV if HuC + * is not present on this platform. + */ +int intel_huc_check_status(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + intel_wakeref_t wakeref; + u32 status = 0; + + if (!intel_huc_is_supported(huc)) + return -ENODEV; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + status = intel_uncore_read(gt->uncore, huc->status.reg); + + return (status & huc->status.mask) == huc->status.value; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h new file mode 100644 index 000000000000..644c059fe01d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_HUC_H_ +#define _INTEL_HUC_H_ + +#include "i915_reg.h" +#include "intel_uc_fw.h" +#include "intel_huc_fw.h" + +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ + struct i915_vma *rsa_data; + + struct { + i915_reg_t reg; + u32 mask; + u32 value; + } status; +}; + +void intel_huc_init_early(struct intel_huc *huc); +int intel_huc_init(struct intel_huc *huc); +void intel_huc_fini(struct intel_huc *huc); +int intel_huc_auth(struct intel_huc *huc); +int intel_huc_check_status(struct intel_huc *huc); + +static inline int intel_huc_sanitize(struct intel_huc *huc) +{ + intel_uc_fw_sanitize(&huc->fw); + return 0; +} + +static inline bool intel_huc_is_supported(struct intel_huc *huc) +{ + return intel_uc_fw_is_supported(&huc->fw); +} + +static inline bool intel_huc_is_enabled(struct intel_huc *huc) +{ + return intel_uc_fw_is_enabled(&huc->fw); +} + +static inline bool intel_huc_is_authenticated(struct intel_huc *huc) +{ + return intel_uc_fw_is_running(&huc->fw); +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c new file mode 100644 index 000000000000..74602487ed67 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "intel_huc_fw.h" +#include "i915_drv.h" + +/** + * DOC: HuC Firmware + * + * Motivation: + * GEN9 introduces a new dedicated firmware for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * Implementation: + * The same firmware loader is used as the GuC. However, the actual + * loading to HW is deferred until GEM initialization is done. + * + * Note that HuC firmware loading must be done before GuC loading. + */ + +/** + * intel_huc_fw_init_early() - initializes HuC firmware struct + * @huc: intel_huc struct + * + * On platforms with HuC selects firmware for uploading + */ +void intel_huc_fw_init_early(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + struct intel_uc *uc = >->uc; + struct drm_i915_private *i915 = gt->i915; + + intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, + intel_uc_uses_guc(uc), + INTEL_INFO(i915)->platform, INTEL_REVID(i915)); +} + +/** + * intel_huc_fw_upload() - load HuC uCode to device + * @huc: intel_huc structure + * + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. Note that HuC must be loaded before GuC. + * + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_huc_fw_upload(struct intel_huc *huc) +{ + /* HW doesn't look at destination address for HuC, so set it to 0 */ + return intel_uc_fw_upload(&huc->fw, huc_to_gt(huc), 0, HUC_UKERNEL); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h new file mode 100644 index 000000000000..b791269ce923 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_HUC_FW_H_ +#define _INTEL_HUC_FW_H_ + +struct intel_huc; + +void intel_huc_fw_init_early(struct intel_huc *huc); +int intel_huc_fw_upload(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c new file mode 100644 index 000000000000..71ee7ab035cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "gt/intel_reset.h" +#include "intel_guc.h" +#include "intel_guc_ads.h" +#include "intel_guc_submission.h" +#include "intel_uc.h" + +#include "i915_drv.h" + +/* Reset GuC providing us with fresh state for both GuC and HuC. + */ +static int __intel_uc_reset_hw(struct intel_uc *uc) +{ + struct intel_gt *gt = uc_to_gt(uc); + int ret; + u32 guc_status; + + ret = i915_inject_load_error(gt->i915, -ENXIO); + if (ret) + return ret; + + ret = intel_reset_guc(gt); + if (ret) { + DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); + return ret; + } + + guc_status = intel_uncore_read(gt->uncore, GUC_STATUS); + WARN(!(guc_status & GS_MIA_IN_RESET), + "GuC status: 0x%x, MIA core expected to be in reset\n", + guc_status); + + return ret; +} + +static void __confirm_options(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + + DRM_DEV_DEBUG_DRIVER(i915->drm.dev, + "enable_guc=%d (guc:%s submission:%s huc:%s)\n", + i915_modparams.enable_guc, + yesno(intel_uc_uses_guc(uc)), + yesno(intel_uc_uses_guc_submission(uc)), + yesno(intel_uc_uses_huc(uc))); + + if (i915_modparams.enable_guc == -1) + return; + + if (i915_modparams.enable_guc == 0) { + GEM_BUG_ON(intel_uc_uses_guc(uc)); + GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); + GEM_BUG_ON(intel_uc_uses_huc(uc)); + return; + } + + if (!intel_uc_supports_guc(uc)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "GuC is not supported!"); + + if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC && + !intel_uc_supports_huc(uc)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "HuC is not supported!"); + + if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION && + !intel_uc_supports_guc_submission(uc)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "GuC submission is N/A"); + + if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION | + ENABLE_GUC_LOAD_HUC)) + dev_info(i915->drm.dev, + "Incompatible option enable_guc=%d - %s\n", + i915_modparams.enable_guc, "undocumented flag"); +} + +void intel_uc_init_early(struct intel_uc *uc) +{ + intel_guc_init_early(&uc->guc); + intel_huc_init_early(&uc->huc); + + __confirm_options(uc); +} + +void intel_uc_driver_late_release(struct intel_uc *uc) +{ +} + +/** + * intel_uc_init_mmio - setup uC MMIO access + * @uc: the intel_uc structure + * + * Setup minimal state necessary for MMIO accesses later in the + * initialization sequence. + */ +void intel_uc_init_mmio(struct intel_uc *uc) +{ + intel_guc_init_send_regs(&uc->guc); +} + +static void __uc_capture_load_err_log(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (guc->log.vma && !uc->load_err_log) + uc->load_err_log = i915_gem_object_get(guc->log.vma->obj); +} + +static void __uc_free_load_err_log(struct intel_uc *uc) +{ + struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log); + + if (log) + i915_gem_object_put(log); +} + +/* + * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 + * register using the same bits used in the CT message payload. Since our + * communication channel with guc is turned off at this point, we can save the + * message and handle it after we turn it back on. + */ +static void guc_clear_mmio_msg(struct intel_guc *guc) +{ + intel_uncore_write(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15), 0); +} + +static void guc_get_mmio_msg(struct intel_guc *guc) +{ + u32 val; + + spin_lock_irq(&guc->irq_lock); + + val = intel_uncore_read(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15)); + guc->mmio_msg |= val & guc->msg_enabled_mask; + + /* + * clear all events, including the ones we're not currently servicing, + * to make sure we don't try to process a stale message if we enable + * handling of more events later. + */ + guc_clear_mmio_msg(guc); + + spin_unlock_irq(&guc->irq_lock); +} + +static void guc_handle_mmio_msg(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + /* we need communication to be enabled to reply to GuC */ + GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop); + + if (!guc->mmio_msg) + return; + + spin_lock_irq(&i915->irq_lock); + intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1); + spin_unlock_irq(&i915->irq_lock); + + guc->mmio_msg = 0; +} + +static void guc_reset_interrupts(struct intel_guc *guc) +{ + guc->interrupts.reset(guc); +} + +static void guc_enable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.enable(guc); +} + +static void guc_disable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.disable(guc); +} + +static inline bool guc_communication_enabled(struct intel_guc *guc) +{ + return guc->send != intel_guc_send_nop; +} + +static int guc_enable_communication(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int ret; + + GEM_BUG_ON(guc_communication_enabled(guc)); + + ret = i915_inject_load_error(i915, -ENXIO); + if (ret) + return ret; + + ret = intel_guc_ct_enable(&guc->ct); + if (ret) + return ret; + + guc->send = intel_guc_send_ct; + guc->handler = intel_guc_to_host_event_handler_ct; + + /* check for mmio messages received before/during the CT enable */ + guc_get_mmio_msg(guc); + guc_handle_mmio_msg(guc); + + guc_enable_interrupts(guc); + + /* check for CT messages received before we enabled interrupts */ + spin_lock_irq(&i915->irq_lock); + intel_guc_to_host_event_handler_ct(guc); + spin_unlock_irq(&i915->irq_lock); + + DRM_INFO("GuC communication enabled\n"); + + return 0; +} + +static void guc_stop_communication(struct intel_guc *guc) +{ + intel_guc_ct_stop(&guc->ct); + + guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; + + guc_clear_mmio_msg(guc); +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + /* + * Events generated during or after CT disable are logged by guc in + * via mmio. Make sure the register is clear before disabling CT since + * all events we cared about have already been processed via CT. + */ + guc_clear_mmio_msg(guc); + + guc_disable_interrupts(guc); + + guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; + + intel_guc_ct_disable(&guc->ct); + + /* + * Check for messages received during/after the CT disable. We do not + * expect any messages to have arrived via CT between the interrupt + * disable and the CT disable because GuC should've been idle until we + * triggered the CT disable protocol. + */ + guc_get_mmio_msg(guc); + + DRM_INFO("GuC communication disabled\n"); +} + +void intel_uc_fetch_firmwares(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + int err; + + if (!intel_uc_uses_guc(uc)) + return; + + err = intel_uc_fw_fetch(&uc->guc.fw, i915); + if (err) + return; + + if (intel_uc_uses_huc(uc)) + intel_uc_fw_fetch(&uc->huc.fw, i915); +} + +void intel_uc_cleanup_firmwares(struct intel_uc *uc) +{ + if (!intel_uc_uses_guc(uc)) + return; + + if (intel_uc_uses_huc(uc)) + intel_uc_fw_cleanup_fetch(&uc->huc.fw); + + intel_uc_fw_cleanup_fetch(&uc->guc.fw); +} + +void intel_uc_init(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + int ret; + + if (!intel_uc_uses_guc(uc)) + return; + + /* XXX: GuC submission is unavailable for now */ + GEM_BUG_ON(intel_uc_supports_guc_submission(uc)); + + ret = intel_guc_init(guc); + if (ret) { + intel_uc_fw_cleanup_fetch(&huc->fw); + return; + } + + if (intel_uc_uses_huc(uc)) + intel_huc_init(huc); +} + +void intel_uc_fini(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (!intel_uc_uses_guc(uc)) + return; + + if (intel_uc_uses_huc(uc)) + intel_huc_fini(&uc->huc); + + intel_guc_fini(guc); + + __uc_free_load_err_log(uc); +} + +static int __uc_sanitize(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + + intel_huc_sanitize(huc); + intel_guc_sanitize(guc); + + return __intel_uc_reset_hw(uc); +} + +void intel_uc_sanitize(struct intel_uc *uc) +{ + if (!intel_uc_supports_guc(uc)) + return; + + __uc_sanitize(uc); +} + +/* Initialize and verify the uC regs related to uC positioning in WOPCM */ +static int uc_init_wopcm(struct intel_uc *uc) +{ + struct intel_gt *gt = uc_to_gt(uc); + struct intel_uncore *uncore = gt->uncore; + u32 base = intel_wopcm_guc_base(>->i915->wopcm); + u32 size = intel_wopcm_guc_size(>->i915->wopcm); + u32 huc_agent = intel_uc_uses_huc(uc) ? HUC_LOADING_AGENT_GUC : 0; + u32 mask; + int err; + + if (unlikely(!base || !size)) { + i915_probe_error(gt->i915, "Unsuccessful WOPCM partitioning\n"); + return -E2BIG; + } + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + GEM_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK)); + GEM_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK); + GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); + GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); + + err = i915_inject_load_error(gt->i915, -ENXIO); + if (err) + return err; + + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; + err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask, + size | GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = intel_uncore_write_and_verify(uncore, DMA_GUC_WOPCM_OFFSET, + base | huc_agent, mask, + base | huc_agent | + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + i915_probe_error(gt->i915, "Failed to init uC WOPCM registers!\n"); + i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", + i915_mmio_reg_offset(DMA_GUC_WOPCM_OFFSET), + intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET)); + i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", + i915_mmio_reg_offset(GUC_WOPCM_SIZE), + intel_uncore_read(uncore, GUC_WOPCM_SIZE)); + + return err; +} + +static bool uc_is_wopcm_locked(struct intel_uc *uc) +{ + struct intel_gt *gt = uc_to_gt(uc); + struct intel_uncore *uncore = gt->uncore; + + return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) || + (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID); +} + +int intel_uc_init_hw(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + int ret, attempts; + + if (!intel_uc_supports_guc(uc)) + return 0; + + /* + * We can silently continue without GuC only if it was never enabled + * before on this system after reboot, otherwise we risk GPU hangs. + * To check if GuC was loaded before we look at WOPCM registers. + */ + if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc)) + return 0; + + if (!intel_uc_fw_is_available(&guc->fw)) { + ret = uc_is_wopcm_locked(uc) || + intel_uc_fw_is_overridden(&guc->fw) || + intel_uc_supports_guc_submission(uc) ? + intel_uc_fw_status_to_error(guc->fw.status) : 0; + goto err_out; + } + + ret = uc_init_wopcm(uc); + if (ret) + goto err_out; + + guc_reset_interrupts(guc); + + /* WaEnableuKernelHeaderValidFix:skl */ + /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ + if (IS_GEN(i915, 9)) + attempts = 3; + else + attempts = 1; + + while (attempts--) { + /* + * Always reset the GuC just before (re)loading, so + * that the state and timing are fairly predictable + */ + ret = __uc_sanitize(uc); + if (ret) + goto err_out; + + intel_huc_fw_upload(huc); + intel_guc_ads_reset(guc); + intel_guc_write_params(guc); + ret = intel_guc_fw_upload(guc); + if (ret == 0) + break; + + DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and " + "retry %d more time(s)\n", ret, attempts); + } + + /* Did we succeded or run out of retries? */ + if (ret) + goto err_log_capture; + + ret = guc_enable_communication(guc); + if (ret) + goto err_log_capture; + + intel_huc_auth(huc); + + ret = intel_guc_sample_forcewake(guc); + if (ret) + goto err_communication; + + if (intel_uc_supports_guc_submission(uc)) { + ret = intel_guc_submission_enable(guc); + if (ret) + goto err_communication; + } + + dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, + guc->fw.major_ver_found, guc->fw.minor_ver_found, + "submission", + enableddisabled(intel_uc_supports_guc_submission(uc))); + + if (intel_uc_uses_huc(uc)) { + dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), + huc->fw.path, + huc->fw.major_ver_found, huc->fw.minor_ver_found, + "authenticated", + yesno(intel_huc_is_authenticated(huc))); + } + + return 0; + + /* + * We've failed to load the firmware :( + */ +err_communication: + guc_disable_communication(guc); +err_log_capture: + __uc_capture_load_err_log(uc); +err_out: + __uc_sanitize(uc); + + if (!ret) { + dev_notice(i915->drm.dev, "GuC is uninitialized\n"); + /* We want to run without GuC submission */ + return 0; + } + + i915_probe_error(i915, "GuC initialization failed %d\n", ret); + + /* We want to keep KMS alive */ + return -EIO; +} + +void intel_uc_fini_hw(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (!intel_guc_is_running(guc)) + return; + + if (intel_uc_supports_guc_submission(uc)) + intel_guc_submission_disable(guc); + + guc_disable_communication(guc); + __uc_sanitize(uc); +} + +/** + * intel_uc_reset_prepare - Prepare for reset + * @uc: the intel_uc structure + * + * Preparing for full gpu reset. + */ +void intel_uc_reset_prepare(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + if (!intel_guc_is_running(guc)) + return; + + guc_stop_communication(guc); + __uc_sanitize(uc); +} + +void intel_uc_runtime_suspend(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + int err; + + if (!intel_guc_is_running(guc)) + return; + + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + + guc_disable_communication(guc); +} + +void intel_uc_suspend(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + intel_wakeref_t wakeref; + + if (!intel_guc_is_running(guc)) + return; + + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) + intel_uc_runtime_suspend(uc); +} + +static int __uc_resume(struct intel_uc *uc, bool enable_communication) +{ + struct intel_guc *guc = &uc->guc; + int err; + + if (!intel_guc_is_running(guc)) + return 0; + + /* Make sure we enable communication if and only if it's disabled */ + GEM_BUG_ON(enable_communication == guc_communication_enabled(guc)); + + if (enable_communication) + guc_enable_communication(guc); + + err = intel_guc_resume(guc); + if (err) { + DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); + return err; + } + + return 0; +} + +int intel_uc_resume(struct intel_uc *uc) +{ + /* + * When coming out of S3/S4 we sanitize and re-init the HW, so + * communication is already re-enabled at this point. + */ + return __uc_resume(uc, false); +} + +int intel_uc_runtime_resume(struct intel_uc *uc) +{ + /* + * During runtime resume we don't sanitize, so we need to re-init + * communication as well. + */ + return __uc_resume(uc, true); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h new file mode 100644 index 000000000000..527995c21196 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_UC_H_ +#define _INTEL_UC_H_ + +#include "intel_guc.h" +#include "intel_huc.h" +#include "i915_params.h" + +struct intel_uc { + struct intel_guc guc; + struct intel_huc huc; + + /* Snapshot of GuC log from last failed load */ + struct drm_i915_gem_object *load_err_log; +}; + +void intel_uc_init_early(struct intel_uc *uc); +void intel_uc_driver_late_release(struct intel_uc *uc); +void intel_uc_init_mmio(struct intel_uc *uc); +void intel_uc_fetch_firmwares(struct intel_uc *uc); +void intel_uc_cleanup_firmwares(struct intel_uc *uc); +void intel_uc_sanitize(struct intel_uc *uc); +void intel_uc_init(struct intel_uc *uc); +int intel_uc_init_hw(struct intel_uc *uc); +void intel_uc_fini_hw(struct intel_uc *uc); +void intel_uc_fini(struct intel_uc *uc); +void intel_uc_reset_prepare(struct intel_uc *uc); +void intel_uc_suspend(struct intel_uc *uc); +void intel_uc_runtime_suspend(struct intel_uc *uc); +int intel_uc_resume(struct intel_uc *uc); +int intel_uc_runtime_resume(struct intel_uc *uc); + +static inline bool intel_uc_supports_guc(struct intel_uc *uc) +{ + return intel_guc_is_supported(&uc->guc); +} + +static inline bool intel_uc_uses_guc(struct intel_uc *uc) +{ + return intel_guc_is_enabled(&uc->guc); +} + +static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc) +{ + return intel_guc_is_submission_supported(&uc->guc); +} + +static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc) +{ + return intel_guc_is_submission_supported(&uc->guc); +} + +static inline bool intel_uc_supports_huc(struct intel_uc *uc) +{ + return intel_uc_supports_guc(uc); +} + +static inline bool intel_uc_uses_huc(struct intel_uc *uc) +{ + return intel_huc_is_enabled(&uc->huc); +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c new file mode 100644 index 000000000000..bd22bf11adad --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -0,0 +1,616 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2019 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <linux/firmware.h> +#include <drm/drm_print.h> + +#include "intel_uc_fw.h" +#include "intel_uc_fw_abi.h" +#include "i915_drv.h" + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) +{ + GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) + return container_of(uc_fw, struct intel_gt, uc.guc.fw); + + GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC); + return container_of(uc_fw, struct intel_gt, uc.huc.fw); +} + +void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_status status) +{ + uc_fw->__status = status; + DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev, + "%s firmware -> %s\n", + intel_uc_fw_type_repr(uc_fw->type), + status == INTEL_UC_FIRMWARE_SELECTED ? + uc_fw->path : intel_uc_fw_status_repr(status)); +} +#endif + +/* + * List of required GuC and HuC binaries per-platform. + * Must be ordered based on platform + revid, from newer to older. + */ +#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ + fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398)) + +#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \ + "i915/" \ + __stringify(prefix_) name_ \ + __stringify(major_) separator_ \ + __stringify(minor_) separator_ \ + __stringify(patch_) ".bin" + +#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_) + +#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ + __MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_) + +/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ + MODULE_FIRMWARE(guc_); \ + MODULE_FIRMWARE(huc_); + +INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH) + +/* The below structs and macros are used to iterate across the list of blobs */ +struct __packed uc_fw_blob { + u8 major; + u8 minor; + const char *path; +}; + +#define UC_FW_BLOB(major_, minor_, path_) \ + { .major = major_, .minor = minor_, .path = path_ } + +#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) + +#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) + +struct __packed uc_fw_platform_requirement { + enum intel_platform p; + u8 rev; /* first platform rev using this FW */ + const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES]; +}; + +#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \ +{ \ + .p = INTEL_##platform_, \ + .rev = revid_, \ + .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \ + .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \ +}, + +static void +__uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev) +{ + static const struct uc_fw_platform_requirement fw_blobs[] = { + INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB) + }; + int i; + + for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) { + if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { + const struct uc_fw_blob *blob = + &fw_blobs[i].blobs[uc_fw->type]; + uc_fw->path = blob->path; + uc_fw->major_ver_wanted = blob->major; + uc_fw->minor_ver_wanted = blob->minor; + break; + } + } + + /* make sure the list is ordered as expected */ + if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { + for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) { + if (fw_blobs[i].p < fw_blobs[i - 1].p) + continue; + + if (fw_blobs[i].p == fw_blobs[i - 1].p && + fw_blobs[i].rev < fw_blobs[i - 1].rev) + continue; + + pr_err("invalid FW blob order: %s r%u comes before %s r%u\n", + intel_platform_name(fw_blobs[i - 1].p), + fw_blobs[i - 1].rev, + intel_platform_name(fw_blobs[i].p), + fw_blobs[i].rev); + + uc_fw->path = NULL; + } + } + + /* We don't want to enable GuC/HuC on pre-Gen11 by default */ + if (i915_modparams.enable_guc == -1 && p < INTEL_ICELAKE) + uc_fw->path = NULL; +} + +static const char *__override_guc_firmware_path(void) +{ + if (i915_modparams.enable_guc & (ENABLE_GUC_SUBMISSION | + ENABLE_GUC_LOAD_HUC)) + return i915_modparams.guc_firmware_path; + return ""; +} + +static const char *__override_huc_firmware_path(void) +{ + if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC) + return i915_modparams.huc_firmware_path; + return ""; +} + +static void __uc_fw_user_override(struct intel_uc_fw *uc_fw) +{ + const char *path = NULL; + + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + path = __override_guc_firmware_path(); + break; + case INTEL_UC_FW_TYPE_HUC: + path = __override_huc_firmware_path(); + break; + } + + if (unlikely(path)) { + uc_fw->path = path; + uc_fw->user_overridden = true; + } +} + +/** + * intel_uc_fw_init_early - initialize the uC object and select the firmware + * @uc_fw: uC firmware + * @type: type of uC + * @supported: is uC support possible + * @platform: platform identifier + * @rev: hardware revision + * + * Initialize the state of our uC object and relevant tracking and select the + * firmware to fetch and load. + */ +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type, bool supported, + enum intel_platform platform, u8 rev) +{ + /* + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status + * before we're looked at the HW caps to see if we have uc support + */ + BUILD_BUG_ON(INTEL_UC_FIRMWARE_UNINITIALIZED); + GEM_BUG_ON(uc_fw->status); + GEM_BUG_ON(uc_fw->path); + + uc_fw->type = type; + + if (supported) { + __uc_fw_auto_select(uc_fw, platform, rev); + __uc_fw_user_override(uc_fw); + } + + intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + INTEL_UC_FIRMWARE_SELECTED : + INTEL_UC_FIRMWARE_DISABLED : + INTEL_UC_FIRMWARE_NOT_SUPPORTED); +} + +static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, + struct drm_i915_private *i915, + int e) +{ + bool user = e == -EINVAL; + + if (i915_inject_load_error(i915, e)) { + /* non-existing blob */ + uc_fw->path = "<invalid>"; + uc_fw->user_overridden = user; + } else if (i915_inject_load_error(i915, e)) { + /* require next major version */ + uc_fw->major_ver_wanted += 1; + uc_fw->minor_ver_wanted = 0; + uc_fw->user_overridden = user; + } else if (i915_inject_load_error(i915, e)) { + /* require next minor version */ + uc_fw->minor_ver_wanted += 1; + uc_fw->user_overridden = user; + } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) { + /* require prev major version */ + uc_fw->major_ver_wanted -= 1; + uc_fw->minor_ver_wanted = 0; + uc_fw->user_overridden = user; + } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) { + /* require prev minor version - hey, this should work! */ + uc_fw->minor_ver_wanted -= 1; + uc_fw->user_overridden = user; + } else if (user && i915_inject_load_error(i915, e)) { + /* officially unsupported platform */ + uc_fw->major_ver_wanted = 0; + uc_fw->minor_ver_wanted = 0; + uc_fw->user_overridden = true; + } +} + +/** + * intel_uc_fw_fetch - fetch uC firmware + * @uc_fw: uC firmware + * @i915: device private + * + * Fetch uC firmware into GEM obj. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) +{ + struct device *dev = i915->drm.dev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + GEM_BUG_ON(!i915->wopcm.size); + GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); + + err = i915_inject_load_error(i915, -ENXIO); + if (err) + return err; + + __force_fw_fetch_failures(uc_fw, i915, -EINVAL); + __force_fw_fetch_failures(uc_fw, i915, -ESTALE); + + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + /* Check the size of the blob before examining buffer contents */ + if (unlikely(fw->size < sizeof(struct uc_css_header))) { + dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Check integrity of size values inside CSS header */ + size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - + css->exponent_size_dw) * sizeof(u32); + if (unlikely(size != sizeof(struct uc_css_header))) { + dev_warn(dev, + "%s firmware %s: unexpected header size: %zu != %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -EPROTO; + goto fail; + } + + /* uCode size must calculated from other sizes */ + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) { + dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + css->key_size_dw, UOS_RSA_SCRATCH_COUNT); + err = -EPROTO; + goto fail; + } + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size; + if (unlikely(fw->size < size)) { + dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, size); + err = -ENOEXEC; + goto fail; + } + + /* Sanity check whether this fw is not larger than whole WOPCM memory */ + size = __intel_uc_fw_get_upload_size(uc_fw); + if (unlikely(size >= i915->wopcm.size)) { + dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + size, (size_t)i915->wopcm.size); + err = -E2BIG; + goto fail; + } + + /* Get version numbers from the CSS header */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR, + css->sw_version); + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR, + css->sw_version); + break; + + default: + MISSING_CASE(uc_fw->type); + break; + } + + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + if (!intel_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } + } + + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); + + release_firmware(fw); + return 0; + +fail: + intel_uc_fw_change_status(uc_fw, err == -ENOENT ? + INTEL_UC_FIRMWARE_MISSING : + INTEL_UC_FIRMWARE_ERROR); + + dev_notice(dev, "%s firmware %s: fetch failed with error %d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + dev_info(dev, "%s firmware(s) can be downloaded from %s\n", + intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ + return err; +} + +static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) +{ + struct drm_mm_node *node = &ggtt->uc_fw; + + GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(upper_32_bits(node->start)); + GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); + + return lower_32_bits(node->start); +} + +static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw, + struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = gt->ggtt; + struct i915_vma dummy = { + .node.start = uc_fw_ggtt_offset(uc_fw, ggtt), + .node.size = obj->base.size, + .pages = obj->mm.pages, + .vm = &ggtt->vm, + }; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + + /* uc_fw->obj cache domains were not controlled across suspend */ + drm_clflush_sg(dummy.pages); + + ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); +} + +static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw, + struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = gt->ggtt; + u64 start = uc_fw_ggtt_offset(uc_fw, ggtt); + + ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); +} + +static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, + u32 wopcm_offset, u32 dma_flags) +{ + struct intel_uncore *uncore = gt->uncore; + u64 offset; + int ret; + + ret = i915_inject_load_error(gt->i915, -ETIMEDOUT); + if (ret) + return ret; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + /* Set the source address for the uCode */ + offset = uc_fw_ggtt_offset(uc_fw, gt->ggtt); + GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000); + intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); + intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset)); + + /* Set the DMA destination */ + intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, wopcm_offset); + intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + /* + * Set the transfer size. The header plus uCode will be copied to WOPCM + * via DMA, excluding any other components + */ + intel_uncore_write_fw(uncore, DMA_COPY_SIZE, + sizeof(struct uc_css_header) + uc_fw->ucode_size); + + /* Start the DMA */ + intel_uncore_write_fw(uncore, DMA_CTRL, + _MASKED_BIT_ENABLE(dma_flags | START_DMA)); + + /* Wait for DMA to finish */ + ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100); + if (ret) + dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uncore_read_fw(uncore, DMA_CTRL)); + + /* Disable the bits once DMA is over */ + intel_uncore_write_fw(uncore, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags)); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + return ret; +} + +/** + * intel_uc_fw_upload - load uC firmware using custom loader + * @uc_fw: uC firmware + * @gt: the intel_gt structure + * @wopcm_offset: destination offset in wopcm + * @dma_flags: flags for flags for dma ctrl + * + * Loads uC firmware and updates internal flags. + * + * Return: 0 on success, non-zero on failure. + */ +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, + u32 wopcm_offset, u32 dma_flags) +{ + int err; + + /* make sure the status was cleared the last time we reset the uc */ + GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); + + err = i915_inject_load_error(gt->i915, -ENOEXEC); + if (err) + return err; + + if (!intel_uc_fw_is_available(uc_fw)) + return -ENOEXEC; + + /* Call custom loader */ + intel_uc_fw_ggtt_bind(uc_fw, gt); + err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags); + intel_uc_fw_ggtt_unbind(uc_fw, gt); + if (err) + goto fail; + + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_TRANSFERRED); + return 0; + +fail: + i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + err); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + return err; +} + +int intel_uc_fw_init(struct intel_uc_fw *uc_fw) +{ + int err; + + /* this should happen before the load! */ + GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); + + if (!intel_uc_fw_is_available(uc_fw)) + return -ENOEXEC; + + err = i915_gem_object_pin_pages(uc_fw->obj); + if (err) { + DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + } + + return err; +} + +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + if (!intel_uc_fw_is_available(uc_fw)) + return; + + i915_gem_object_unpin_pages(uc_fw->obj); +} + +/** + * intel_uc_fw_cleanup_fetch - cleanup uC firmware + * @uc_fw: uC firmware + * + * Cleans up uC firmware by releasing the firmware GEM obj. + */ +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) +{ + if (!intel_uc_fw_is_available(uc_fw)) + return; + + i915_gem_object_put(fetch_and_zero(&uc_fw->obj)); + + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_SELECTED); +} + +/** + * intel_uc_fw_copy_rsa - copy fw RSA to buffer + * + * @uc_fw: uC firmware + * @dst: dst buffer + * @max_len: max number of bytes to copy + * + * Return: number of copied bytes. + */ +size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) +{ + struct sg_table *pages = uc_fw->obj->mm.pages; + u32 size = min_t(u32, uc_fw->rsa_size, max_len); + u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size; + + GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw)); + + return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset); +} + +/** + * intel_uc_fw_dump - dump information about uC firmware + * @uc_fw: uC firmware + * @p: the &drm_printer + * + * Pretty printer for uC firmware. + */ +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: %s\n", + intel_uc_fw_status_repr(uc_fw->status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h new file mode 100644 index 000000000000..7a0a5989afc9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2019 Intel Corporation + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +#include <linux/types.h> +#include "intel_uc_fw_abi.h" +#include "intel_device_info.h" +#include "i915_gem.h" + +struct drm_printer; +struct drm_i915_private; +struct intel_gt; + +/* Home of GuC, HuC and DMC firmwares */ +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" + +/* + * +------------+---------------------------------------------------+ + * | PHASE | FIRMWARE STATUS TRANSITIONS | + * +============+===================================================+ + * | | UNINITIALIZED | + * +------------+- / | \ -+ + * | | DISABLED <--/ | \--> NOT_SUPPORTED | + * | init_early | V | + * | | SELECTED | + * +------------+- / | \ -+ + * | | MISSING <--/ | \--> ERROR | + * | fetch | | | + * | | /------> AVAILABLE <---<-----------\ | + * +------------+- \ / \ \ \ -+ + * | | FAIL <--< \--> TRANSFERRED \ | + * | upload | \ / \ / | + * | | \---------/ \--> RUNNING | + * +------------+---------------------------------------------------+ + */ + +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ + INTEL_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ + INTEL_UC_FIRMWARE_DISABLED, /* disabled */ + INTEL_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ + INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ + INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ + INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ + INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ + INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ +}; + +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC = 0, + INTEL_UC_FW_TYPE_HUC +}; +#define INTEL_UC_FW_NUM_TYPES 2 + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the uC. + */ +struct intel_uc_fw { + enum intel_uc_fw_type type; + union { + const enum intel_uc_fw_status status; + enum intel_uc_fw_status __status; /* no accidental overwrites */ + }; + const char *path; + bool user_overridden; + size_t size; + struct drm_i915_gem_object *obj; + + /* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ + u16 major_ver_wanted; + u16 minor_ver_wanted; + u16 major_ver_found; + u16 minor_ver_found; + + u32 rsa_size; + u32 ucode_size; +}; + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_status status); +#else +static inline void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_status status) +{ + uc_fw->__status = status; +} +#endif + +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_NOT_SUPPORTED: + return "N/A"; + case INTEL_UC_FIRMWARE_UNINITIALIZED: + return "UNINITIALIZED"; + case INTEL_UC_FIRMWARE_DISABLED: + return "DISABLED"; + case INTEL_UC_FIRMWARE_SELECTED: + return "SELECTED"; + case INTEL_UC_FIRMWARE_MISSING: + return "MISSING"; + case INTEL_UC_FIRMWARE_ERROR: + return "ERROR"; + case INTEL_UC_FIRMWARE_AVAILABLE: + return "AVAILABLE"; + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_TRANSFERRED: + return "TRANSFERRED"; + case INTEL_UC_FIRMWARE_RUNNING: + return "RUNNING"; + } + return "<invalid>"; +} + +static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_NOT_SUPPORTED: + return -ENODEV; + case INTEL_UC_FIRMWARE_UNINITIALIZED: + return -EACCES; + case INTEL_UC_FIRMWARE_DISABLED: + return -EPERM; + case INTEL_UC_FIRMWARE_MISSING: + return -ENOENT; + case INTEL_UC_FIRMWARE_ERROR: + return -ENOEXEC; + case INTEL_UC_FIRMWARE_FAIL: + return -EIO; + case INTEL_UC_FIRMWARE_SELECTED: + return -ESTALE; + case INTEL_UC_FIRMWARE_AVAILABLE: + case INTEL_UC_FIRMWARE_TRANSFERRED: + case INTEL_UC_FIRMWARE_RUNNING: + return 0; + } + return -EINVAL; +} + +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline enum intel_uc_fw_status +__intel_uc_fw_status(struct intel_uc_fw *uc_fw) +{ + /* shouldn't call this before checking hw/blob availability */ + GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); + return uc_fw->status; +} + +static inline bool intel_uc_fw_is_supported(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) != INTEL_UC_FIRMWARE_NOT_SUPPORTED; +} + +static inline bool intel_uc_fw_is_enabled(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) > INTEL_UC_FIRMWARE_DISABLED; +} + +static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE; +} + +static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED; +} + +static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING; +} + +static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) +{ + return uc_fw->user_overridden; +} + +static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) +{ + if (intel_uc_fw_is_loaded(uc_fw)) + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); +} + +static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +/** + * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) +{ + if (!intel_uc_fw_is_available(uc_fw)) + return 0; + + return __intel_uc_fw_get_upload_size(uc_fw); +} + +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type, bool supported, + enum intel_platform platform, u8 rev); +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915); +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, + u32 wopcm_offset, u32 dma_flags); +int intel_uc_fw_init(struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h new file mode 100644 index 000000000000..ae58e8a8c53b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _INTEL_UC_FW_ABI_H +#define _INTEL_UC_FW_ABI_H + +#include <linux/types.h> +#include <linux/build_bug.h> + +/** + * DOC: Firmware Layout + * + * The GuC/HuC firmware layout looks like this:: + * + * +======================================================================+ + * | Firmware blob | + * +===============+===============+============+============+============+ + * | CSS header | uCode | RSA key | modulus | exponent | + * +===============+===============+============+============+============+ + * <-header size-> <---header size continued -----------> + * <--- size -----------------------------------------------------------> + * <-key size-> + * <-mod size-> + * <-exp size-> + * + * The firmware may or may not have modulus key and exponent data. The header, + * uCode and RSA signature are must-have components that will be used by driver. + * Length of each components, which is all in dwords, can be found in header. + * In the case that modulus and exponent are not present in fw, a.k.a truncated + * image, the length value still appears in header. + * + * Driver will do some basic fw size validation based on the following rules: + * + * 1. Header, uCode and RSA are must-have components. + * 2. All firmware components, if they present, are in the sequence illustrated + * in the layout table above. + * 3. Length info of each component can be found in header, in dwords. + * 4. Modulus and exponent key are not required by driver. They may not appear + * in fw. So driver will load a truncated firmware in this case. + * + * The only difference between GuC and HuC firmwares is how the version + * information is saved. + */ + +struct uc_css_header { + u32 module_type; + /* + * header_size includes all non-uCode bits, including css_header, rsa + * key, modulus key and exponent data. + */ + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; + u32 date; +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_DATE_MIN (0xFF << 8) +#define CSS_DATE_SEC (0xFFFF << 16) + char username[8]; + char buildnumber[12]; + u32 sw_version; +#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0) +#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16) +#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0) + u32 reserved[14]; + u32 header_info; +} __packed; +static_assert(sizeof(struct uc_css_header) == 128); + +#endif /* _INTEL_UC_FW_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index 000000000000..bba0eafe1cdb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2017 Intel Corporation + */ + +#include "i915_selftest.h" +#include "gem/i915_gem_pm.h" + +/* max doorbell number + negative test for each client type */ +#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) + +static struct intel_guc_client *clients[ATTEMPTS]; + +static bool available_dbs(struct intel_guc *guc, u32 priority) +{ + unsigned long offset; + unsigned long end; + u16 id; + + /* first half is used for normal priority, second half for high */ + offset = 0; + end = GUC_NUM_DOORBELLS / 2; + if (priority <= GUC_CLIENT_PRIORITY_HIGH) { + offset = end; + end += offset; + } + + id = find_next_zero_bit(guc->doorbell_bitmap, end, offset); + if (id < end) + return true; + + return false; +} + +static int check_all_doorbells(struct intel_guc *guc) +{ + u16 db_id; + + pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS); + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) { + if (!doorbell_ok(guc, db_id)) { + pr_err("doorbell %d, not ok\n", db_id); + return -EIO; + } + } + + return 0; +} + +static int ring_doorbell_nop(struct intel_guc_client *client) +{ + struct guc_process_desc *desc = __get_process_desc(client); + int err; + + client->use_nop_wqi = true; + + spin_lock_irq(&client->wq_lock); + + guc_wq_item_append(client, 0, 0, 0, 0); + guc_ring_doorbell(client); + + spin_unlock_irq(&client->wq_lock); + + client->use_nop_wqi = false; + + /* if there are no issues GuC will update the WQ head and keep the + * WQ in active status + */ + err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10); + if (err) { + pr_err("doorbell %u ring failed!\n", client->doorbell_id); + return -EIO; + } + + if (desc->wq_status != WQ_STATUS_ACTIVE) { + pr_err("doorbell %u ring put WQ in bad state (%u)!\n", + client->doorbell_id, desc->wq_status); + return -EIO; + } + + return 0; +} + +/* + * Basic client sanity check, handy to validate create_clients. + */ +static int validate_client(struct intel_guc_client *client, int client_priority) +{ + if (client->priority != client_priority || + client->doorbell_id == GUC_DOORBELL_INVALID) + return -EINVAL; + else + return 0; +} + +static bool client_doorbell_in_sync(struct intel_guc_client *client) +{ + return !client || doorbell_ok(client->guc, client->doorbell_id); +} + +/* + * Check that we're able to synchronize guc_clients with their doorbells + * + * We're creating clients and reserving doorbells once, at module load. During + * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to + * GuC being reset. In other words - GuC clients are still around, but the + * status of their doorbells may be incorrect. This is the reason behind + * validating that the doorbells status expected by the driver matches what the + * GuC/HW have. + */ +static int igt_guc_clients(void *args) +{ + struct drm_i915_private *dev_priv = args; + intel_wakeref_t wakeref; + struct intel_guc *guc; + int err = 0; + + GEM_BUG_ON(!HAS_GT_UC(dev_priv)); + mutex_lock(&dev_priv->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + + guc = &dev_priv->gt.uc.guc; + if (!guc) { + pr_err("No guc object!\n"); + err = -EINVAL; + goto unlock; + } + + err = check_all_doorbells(guc); + if (err) + goto unlock; + + /* + * Get rid of clients created during driver load because the test will + * recreate them. + */ + guc_clients_disable(guc); + guc_clients_destroy(guc); + if (guc->execbuf_client) { + pr_err("guc_clients_destroy lied!\n"); + err = -EINVAL; + goto unlock; + } + + err = guc_clients_create(guc); + if (err) { + pr_err("Failed to create clients\n"); + goto unlock; + } + GEM_BUG_ON(!guc->execbuf_client); + + err = validate_client(guc->execbuf_client, + GUC_CLIENT_PRIORITY_KMD_NORMAL); + if (err) { + pr_err("execbug client validation failed\n"); + goto out; + } + + /* the client should now have reserved a doorbell */ + if (!has_doorbell(guc->execbuf_client)) { + pr_err("guc_clients_create didn't reserve doorbells\n"); + err = -EINVAL; + goto out; + } + + /* Now enable the clients */ + guc_clients_enable(guc); + + /* each client should now have received a doorbell */ + if (!client_doorbell_in_sync(guc->execbuf_client)) { + pr_err("failed to initialize the doorbells\n"); + err = -EINVAL; + goto out; + } + + /* + * Basic test - an attempt to reallocate a valid doorbell to the + * client it is currently assigned should not cause a failure. + */ + err = create_doorbell(guc->execbuf_client); + +out: + /* + * Leave clean state for other test, plus the driver always destroy the + * clients during unload. + */ + guc_clients_disable(guc); + guc_clients_destroy(guc); + guc_clients_create(guc); + guc_clients_enable(guc); +unlock: + intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + mutex_unlock(&dev_priv->drm.struct_mutex); + return err; +} + +/* + * Create as many clients as number of doorbells. Note that there's already + * client(s)/doorbell(s) created during driver load, but this test creates + * its own and do not interact with the existing ones. + */ +static int igt_guc_doorbells(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + intel_wakeref_t wakeref; + struct intel_guc *guc; + int i, err = 0; + u16 db_id; + + GEM_BUG_ON(!HAS_GT_UC(dev_priv)); + mutex_lock(&dev_priv->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + + guc = &dev_priv->gt.uc.guc; + if (!guc) { + pr_err("No guc object!\n"); + err = -EINVAL; + goto unlock; + } + + err = check_all_doorbells(guc); + if (err) + goto unlock; + + for (i = 0; i < ATTEMPTS; i++) { + clients[i] = guc_client_alloc(guc, i % GUC_CLIENT_PRIORITY_NUM); + + if (!clients[i]) { + pr_err("[%d] No guc client\n", i); + err = -EINVAL; + goto out; + } + + if (IS_ERR(clients[i])) { + if (PTR_ERR(clients[i]) != -ENOSPC) { + pr_err("[%d] unexpected error\n", i); + err = PTR_ERR(clients[i]); + goto out; + } + + if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) { + pr_err("[%d] non-db related alloc fail\n", i); + err = -EINVAL; + goto out; + } + + /* expected, ran out of dbs for this client type */ + continue; + } + + /* + * The check below is only valid because we keep a doorbell + * assigned during the whole life of the client. + */ + if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) { + pr_err("[%d] more clients than doorbells (%d >= %d)\n", + i, clients[i]->stage_id, GUC_NUM_DOORBELLS); + err = -EINVAL; + goto out; + } + + err = validate_client(clients[i], i % GUC_CLIENT_PRIORITY_NUM); + if (err) { + pr_err("[%d] client_alloc sanity check failed!\n", i); + err = -EINVAL; + goto out; + } + + db_id = clients[i]->doorbell_id; + + err = __guc_client_enable(clients[i]); + if (err) { + pr_err("[%d] Failed to create a doorbell\n", i); + goto out; + } + + /* doorbell id shouldn't change, we are holding the mutex */ + if (db_id != clients[i]->doorbell_id) { + pr_err("[%d] doorbell id changed (%d != %d)\n", + i, db_id, clients[i]->doorbell_id); + err = -EINVAL; + goto out; + } + + err = check_all_doorbells(guc); + if (err) + goto out; + + err = ring_doorbell_nop(clients[i]); + if (err) + goto out; + } + +out: + for (i = 0; i < ATTEMPTS; i++) + if (!IS_ERR_OR_NULL(clients[i])) { + __guc_client_disable(clients[i]); + guc_client_free(clients[i]); + } +unlock: + intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + mutex_unlock(&dev_priv->drm.struct_mutex); + return err; +} + +int intel_guc_live_selftest(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_guc_clients), + SUBTEST(igt_guc_doorbells), + }; + + if (!USES_GUC_SUBMISSION(dev_priv)) + return 0; + + return i915_subtests(tests, dev_priv); +} |