diff options
389 files changed, 6809 insertions, 1975 deletions
@@ -107,6 +107,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch> Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com> Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Mark Brown <broonie@sirena.org.uk> +Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com> Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com> Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com> Matthieu CASTET <castet.matthieu@free.fr> diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b2598cc9834c..7242cbda15dd 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -109,6 +109,7 @@ parameter is applicable:: IPV6 IPv6 support is enabled. ISAPNP ISA PnP code is enabled. ISDN Appropriate ISDN support is enabled. + ISOL CPU Isolation is enabled. JOY Appropriate joystick support is enabled. KGDB Kernel debugger support is enabled. KVM Kernel Virtual Machine support is enabled. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..af7104aaffd9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -328,11 +328,15 @@ not play well with APC CPU idle - disable it if you have APC and your system crashes randomly. - apic= [APIC,X86-32] Advanced Programmable Interrupt Controller + apic= [APIC,X86] Advanced Programmable Interrupt Controller Change the output verbosity whilst booting Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. + For X86-32, this can also be used to specify an APIC + driver name. + Format: apic=driver_name + Examples: apic=bigsmp apic_extnmi= [APIC,X86] External NMI delivery setting Format: { bsp (default) | all | none } @@ -1737,7 +1741,7 @@ isapnp= [ISAPNP] Format: <RDP>,<reset>,<pci_scan>,<verbosity> - isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. + isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance. [Deprecated - use cpusets instead] Format: [flag-list,]<cpu-list> @@ -2662,7 +2666,7 @@ Valid arguments: on, off Default: on - nohz_full= [KNL,BOOT] + nohz_full= [KNL,BOOT,SMP,ISOL] The argument is a cpu list, as described above. In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped @@ -2708,6 +2712,8 @@ steal time is computed, but won't influence scheduler behaviour + nopti [X86-64] Disable kernel page table isolation + nolapic [X86-32,APIC] Do not enable or use the local APIC. nolapic_timer [X86-32,APIC] Do not use the local APIC timer. @@ -3282,6 +3288,12 @@ pt. [PARIDE] See Documentation/blockdev/paride.txt. + pti= [X86_64] + Control user/kernel address space isolation: + on - enable + off - disable + auto - default setting + pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in default number. diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst index de50a8561774..9b55952039a6 100644 --- a/Documentation/admin-guide/thunderbolt.rst +++ b/Documentation/admin-guide/thunderbolt.rst @@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with a sysfs attribute called "force_power". For example the intel-wmi-thunderbolt driver exposes this attribute in: - /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power + /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power To force the power to on, write 1 to this attribute file. To disable force power, write 0 to this attribute file. diff --git a/Documentation/devicetree/bindings/net/socionext-netsec.txt b/Documentation/devicetree/bindings/net/socionext-netsec.txt new file mode 100644 index 000000000000..0cff94fb0433 --- /dev/null +++ b/Documentation/devicetree/bindings/net/socionext-netsec.txt @@ -0,0 +1,53 @@ +* Socionext NetSec Ethernet Controller IP + +Required properties: +- compatible: Should be "socionext,synquacer-netsec" +- reg: Address and length of the control register area, followed by the + address and length of the EEPROM holding the MAC address and + microengine firmware +- interrupts: Should contain ethernet controller interrupt +- clocks: phandle to the PHY reference clock +- clock-names: Should be "phy_ref_clk" +- phy-mode: See ethernet.txt file in the same directory +- phy-handle: See ethernet.txt in the same directory. + +- mdio device tree subnode: When the Netsec has a phy connected to its local + mdio, there must be device tree subnode with the following + required properties: + + - #address-cells: Must be <1>. + - #size-cells: Must be <0>. + + For each phy on the mdio bus, there must be a node with the following + fields: + - compatible: Refer to phy.txt + - reg: phy id used to communicate to phy. + +Optional properties: (See ethernet.txt file in the same directory) +- dma-coherent: Boolean property, must only be present if memory + accesses performed by the device are cache coherent. +- local-mac-address: See ethernet.txt in the same directory. +- mac-address: See ethernet.txt in the same directory. +- max-speed: See ethernet.txt in the same directory. +- max-frame-size: See ethernet.txt in the same directory. + +Example: + eth0: ethernet@522d0000 { + compatible = "socionext,synquacer-netsec"; + reg = <0 0x522d0000 0x0 0x10000>, <0 0x10000000 0x0 0x10000>; + interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk_netsec>; + clock-names = "phy_ref_clk"; + phy-mode = "rgmii"; + max-speed = <1000>; + max-frame-size = <9000>; + phy-handle = <&phy1>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + phy1: ethernet-phy@1 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <1>; + }; + }; diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 2e7ee0313c1c..e94d3ac2bdd0 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -341,10 +341,7 @@ GuC GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c - :doc: GuC-specific firmware loader - -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :internal: GuC-based command submission diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 51101708a03a..ea91cb61a602 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,7 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... -fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping + vaddr_end for KASLR +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space @@ -29,20 +31,22 @@ Virtual memory map with 5 level page tables: hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff91ffffffffffff (=49 bits) hole -ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space +ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI +ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... -fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping + vaddr_end for KASLR +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space [fixmap start] - ffffffffff5fffff kernel-internal fixmap range ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole @@ -66,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available during EFI runtime calls. -The module mapping space size changes based on the CONFIG requirements for the -following fixmap section. - Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. + +Be very careful vs. KASLR when changing anything here. The KASLR address +range must not overlap with anything except the KASAN shadow area, which is +correct as KASAN disables KASLR. diff --git a/MAINTAINERS b/MAINTAINERS index 753799d24cd9..f9af20ccee73 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2622,24 +2622,22 @@ F: fs/bfs/ F: include/uapi/linux/bfs_fs.h BLACKFIN ARCHITECTURE -M: Steven Miao <realmz6@gmail.com> L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) T: git git://git.code.sf.net/p/adi-linux/code W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: arch/blackfin/ BLACKFIN EMAC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/net/ethernet/adi/ BLACKFIN MEDIA DRIVER -M: Scott Jiang <scott.jiang.linux@gmail.com> L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org/ -S: Supported +S: Orphan F: drivers/media/platform/blackfin/ F: drivers/media/i2c/adv7183* F: drivers/media/i2c/vs6624* @@ -2647,25 +2645,25 @@ F: drivers/media/i2c/vs6624* BLACKFIN RTC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/rtc/rtc-bfin.c BLACKFIN SDH DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/mmc/host/bfin_sdh.c BLACKFIN SERIAL DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/tty/serial/bfin_uart.c BLACKFIN WATCHDOG DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/watchdog/bfin_wdt.c BLINKM RGB LED DRIVER @@ -5154,15 +5152,15 @@ F: sound/usb/misc/ua101.c EFI TEST DRIVER L: linux-efi@vger.kernel.org M: Ivan Hu <ivan.hu@canonical.com> -M: Matt Fleming <matt@codeblueprint.co.uk> +M: Ard Biesheuvel <ard.biesheuvel@linaro.org> S: Maintained F: drivers/firmware/efi/test/ EFI VARIABLE FILESYSTEM M: Matthew Garrett <matthew.garrett@nebula.com> M: Jeremy Kerr <jk@ozlabs.org> -M: Matt Fleming <matt@codeblueprint.co.uk> -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git +M: Ard Biesheuvel <ard.biesheuvel@linaro.org> +T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git L: linux-efi@vger.kernel.org S: Maintained F: fs/efivarfs/ @@ -5323,7 +5321,6 @@ S: Supported F: security/integrity/evm/ EXTENSIBLE FIRMWARE INTERFACE (EFI) -M: Matt Fleming <matt@codeblueprint.co.uk> M: Ard Biesheuvel <ard.biesheuvel@linaro.org> L: linux-efi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git @@ -10152,7 +10149,7 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin Shelar <pshelar@nicira.com> +M: Pravin B Shelar <pshelar@ovn.org> L: netdev@vger.kernel.org L: dev@openvswitch.org W: http://openvswitch.org @@ -12645,6 +12642,13 @@ F: drivers/md/raid* F: include/linux/raid/ F: include/uapi/linux/raid/ +SOCIONEXT (SNI) NETSEC NETWORK DRIVER +M: Jassi Brar <jaswinder.singh@linaro.org> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/socionext/netsec.c +F: Documentation/devicetree/bindings/net/socionext-netsec.txt + SONIC NETWORK DRIVER M: Thomas Bogendoerfer <tsbogend@alpha.franken.de> L: netdev@vger.kernel.org @@ -13508,6 +13512,7 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com> M: Yehezkel Bernat <yehezkel.bernat@intel.com> T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git S: Maintained +F: Documentation/admin-guide/thunderbolt.rst F: drivers/thunderbolt/ F: include/linux/thunderbolt.h @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* @@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# Make sure -fstack-check isn't enabled (like gentoo apparently did) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) + # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 4e6e9f57e790..dc91c663bcc0 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 90MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <90000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 63954a8b0100..69ff4895f2ba 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 100MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <100000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 8f627c200d60..006aa3de5348 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -114,6 +114,14 @@ reg = <0x00 0x10>, <0x14B8 0x4>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 1GHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <1000000000>; }; serial: serial@5000 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24..ac6b0ed8341e 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_UDL=y CONFIG_FB=y -CONFIG_FB_UDL=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index f35974ee7264..c9173c02081c 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) return 0; __asm__ __volatile__( + " mov lp_count, %5 \n" " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" " breq.d %3, 0, 3f \n" @@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .word 1b, 4b \n" " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) - : "g"(-EFAULT), "l"(count) - : "memory"); + : "g"(-EFAULT), "r"(count) + : "lp_count", "lp_start", "lp_end", "memory"); return res; } diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ef7d9a8ff89..9d27331fe69a 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void) unsigned int exec_ctrl; READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_enb = exec_ctrl & 1; + cpu->extn.dual_enb = !(exec_ctrl & 1); /* dual issue always present for this core */ cpu->extn.dual = 1; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 74315f302971..bf40e06f3fb8 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, */ static int __print_sym(unsigned int address, void *unused) { - __print_symbol(" %s\n", address); + printk(" %pS\n", (void *)address); return 0; } diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index bcd7c9fc5d0f..133a4dae41fe 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC) DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) +DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0) /* * Entry Point for Misaligned Data access Exception, for emulating in software @@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) * Thus TRAP_S <n> can be used for specific purpose * -1 used for software breakpointing (gdb) * -2 used by kprobes + * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as + * -fno-isolate-erroneous-paths-dereference */ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) { @@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) kgdb_trap(regs); break; + case 5: + do_trap5_error(address, regs); + break; default: break; } @@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs) insterror_is_error(address, regs); } + +/* + * abort() call generated by older gcc for __builtin_trap() + */ +void abort(void) +{ + __asm__ __volatile__("trap_s 5\n"); +} diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7d8c1d6c2f60..6e9a0a9a6a04 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs) else pr_cont("Bus Error, check PRM\n"); #endif + } else if (vec == ECR_V_TRAP) { + if (regs->ecr_param == 5) + pr_cont("gcc generated __builtin_trap\n"); } else { pr_cont("Check Programmer's Manual\n"); } diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index f1ac6790da5f..46544e88492d 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -317,25 +317,23 @@ static void __init axs103_early_init(void) * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack * of fudging the freq in DT */ +#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000 + unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; if (num_cores > 2) { - u32 freq = 50, orig; - /* - * TODO: use cpu node "cpu-freq" param instead of platform-specific - * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu. - */ + u32 freq; int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); const struct fdt_property *prop; prop = fdt_get_property(initial_boot_params, off, - "clock-frequency", NULL); - orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; + "assigned-clock-rates", NULL); + freq = be32_to_cpu(*(u32 *)(prop->data)); /* Patching .dtb in-place with new core clock value */ - if (freq != orig ) { - freq = cpu_to_be32(freq * 1000000); + if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) { + freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ); fdt_setprop_inplace(initial_boot_params, off, - "clock-frequency", &freq, sizeof(freq)); + "assigned-clock-rates", &freq, sizeof(freq)); } } #endif diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index fd0ae5e38639..2958aedb649a 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define CREG_PAE (CREG_BASE + 0x180) #define CREG_PAE_UPDATE (CREG_BASE + 0x194) -#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8) -#define CREG_CORE_IF_CLK_DIV_2 0x1 -#define CGU_BASE ARC_PERIPHERAL_BASE -#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4) -#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0) -#define CGU_PLL_STATUS_LOCK BIT(0) -#define CGU_PLL_STATUS_ERR BIT(1) -#define CGU_PLL_CTRL_1GHZ 0x3A10 -#define HSDK_PLL_LOCK_TIMEOUT 500 - -#define HSDK_PLL_LOCKED() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK) - -#define HSDK_PLL_ERR() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR) - -static void __init hsdk_set_cpu_freq_1ghz(void) -{ - u32 timeout = HSDK_PLL_LOCK_TIMEOUT; - - /* - * As we set cpu clock which exceeds 500MHz, the divider for the interface - * clock must be programmed to div-by-2. - */ - iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV); - - /* Set cpu clock to 1GHz */ - iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL); - - while (!HSDK_PLL_LOCKED() && timeout--) - cpu_relax(); - - if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR()) - pr_err("Failed to setup CPU frequency to 1GHz!"); -} - #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) #define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) @@ -98,12 +62,6 @@ static void __init hsdk_init_early(void) * minimum possible div-by-2. */ iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); - - /* - * Setup CPU frequency to 1GHz. - * TODO: remove it after smart hsdk pll driver will be introduced. - */ - hsdk_set_cpu_freq_1ghz(); } static const char *hsdk_compat[] __initconst = { diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index 45d815a86d42..de08d9045cb8 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -219,7 +219,7 @@ compatible = "aspeed,ast2400-vuart"; reg = <0x1e787000 0x40>; reg-shift = <2>; - interrupts = <10>; + interrupts = <8>; clocks = <&clk_uart>; no-loopback-test; status = "disabled"; diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 5f29010cdbd8..9b82cc8843e1 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -221,6 +221,7 @@ jc42@18 { compatible = "nxp,se97b", "jedec,jc-42.4-temp"; reg = <0x18>; + smbus-timeout-disable; }; dpot: mcp4651-104@28 { diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts index 413dbd5d9f64..81942ae83e1f 100644 --- a/arch/arm/boot/dts/da850-lego-ev3.dts +++ b/arch/arm/boot/dts/da850-lego-ev3.dts @@ -178,7 +178,7 @@ */ battery { pinctrl-names = "default"; - pintctrl-0 = <&battery_pins>; + pinctrl-0 = <&battery_pins>; compatible = "lego,ev3-battery"; io-channels = <&adc 4>, <&adc 3>; io-channel-names = "voltage", "current"; @@ -392,7 +392,7 @@ batt_volt_en { gpio-hog; gpios = <6 GPIO_ACTIVE_HIGH>; - output-low; + output-high; }; }; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index b2b95ff205e8..0029ec27819c 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -664,6 +664,10 @@ status = "okay"; }; +&mixer { + status = "okay"; +}; + /* eMMC flash */ &mmc_0 { status = "okay"; diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts index 4f211e3c903a..7bb402d3e9d0 100644 --- a/arch/arm/boot/dts/ls1021a-qds.dts +++ b/arch/arm/boot/dts/ls1021a-qds.dts @@ -215,7 +215,7 @@ reg = <0x2a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; }; diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts index 7202d9c504be..860b898141f0 100644 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@ -187,7 +187,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts index c6d92c25df42..d23ee6d911ac 100644 --- a/arch/arm/boot/dts/rk3066a-marsboard.dts +++ b/arch/arm/boot/dts/rk3066a-marsboard.dts @@ -83,6 +83,10 @@ }; }; +&cpu0 { + cpu0-supply = <&vdd_arm>; +}; + &i2c1 { status = "okay"; clock-frequency = <400000>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cd24894ee5c6..6102e4e7f35c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -956,7 +956,7 @@ iep_mmu: iommu@ff900800 { compatible = "rockchip,iommu"; reg = <0x0 0xff900800 0x0 0x40>; - interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>; + interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "iep_mmu"; #iommu-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index b91300d49a31..5840f5c75c3b 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -502,8 +502,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 6ae4d95e230e..316cb8b2945b 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -82,8 +82,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 16>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 8bfa12b548e0..72d3fe44ecaf 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -429,8 +429,8 @@ interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>, <&ccu CLK_HDMI_DDC>, - <&ccu 7>, - <&ccu 13>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1"; resets = <&ccu RST_AHB1_HDMI>; reset-names = "ahb"; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 68dfa82544fc..59655e42e4b0 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -581,8 +581,8 @@ reg = <0x01c16000 0x1000>; interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 98715538932f..a021ee6da396 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -146,6 +146,7 @@ status = "okay"; axp81x: pmic@3a3 { + compatible = "x-powers,axp813"; reg = <0x3a3>; interrupt-parent = <&r_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi index 0ec1b0a317b4..ff72a8efb73d 100644 --- a/arch/arm/boot/dts/tango4-common.dtsi +++ b/arch/arm/boot/dts/tango4-common.dtsi @@ -156,7 +156,6 @@ reg = <0x6e000 0x400>; ranges = <0 0x6e000 0x400>; interrupt-parent = <&gic>; - interrupt-controller; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5cf04888c581..3e26c6f7a191 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -793,7 +793,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8be04ec95adf..5ace9380626a 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = { { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) }, { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) }, { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) }, - { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, - { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, - { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, - { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, + { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, + { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, + { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, + { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, }; static struct edma_soc_info dm365_edma_pdata = { @@ -925,12 +925,14 @@ static struct resource edma_resources[] = { /* not using TC*_ERR */ }; -static struct platform_device dm365_edma_device = { - .name = "edma", - .id = 0, - .dev.platform_data = &dm365_edma_pdata, - .num_resources = ARRAY_SIZE(edma_resources), - .resource = edma_resources, +static const struct platform_device_info dm365_edma_device __initconst = { + .name = "edma", + .id = 0, + .dma_mask = DMA_BIT_MASK(32), + .res = edma_resources, + .num_res = ARRAY_SIZE(edma_resources), + .data = &dm365_edma_pdata, + .size_data = sizeof(dm365_edma_pdata), }; static struct resource dm365_asp_resources[] = { @@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg, static int __init dm365_init_devices(void) { + struct platform_device *edma_pdev; int ret = 0; if (!cpu_is_davinci_dm365()) return 0; davinci_cfg_reg(DM365_INT_EDMA_CC); - platform_device_register(&dm365_edma_device); + edma_pdev = platform_device_register_full(&dm365_edma_device); + if (IS_ERR(edma_pdev)) { + pr_warn("%s: Failed to register eDMA\n", __func__); + return PTR_ERR(edma_pdev); + } platform_device_register(&dm365_mdio_device); platform_device_register(&dm365_emac_device); diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 45bdbfb96126..4a8d3f83a36e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -75,6 +75,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 806442d3e846..604cdaedac38 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -77,6 +77,7 @@ pinctrl-0 = <&rmii_pins>; phy-mode = "rmii"; phy-handle = <&ext_rmii_phy1>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 0eb2acedf8c3..abe179de35d7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -82,6 +82,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; @@ -95,7 +96,7 @@ &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_vcc1v8>; bus-width = <8>; non-removable; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index a5da18a6f286..43418bd881d8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -45,19 +45,10 @@ #include "sun50i-a64.dtsi" -/ { - reg_vcc3v3: vcc3v3 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; non-removable; disable-wp; bus-width = <4>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts index b6b7a561df8c..a42fd79a62a3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts @@ -71,7 +71,7 @@ pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; vmmc-supply = <®_vcc3v3>; bus-width = <4>; - cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a298df74ca6c..dbe2648649db 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -255,7 +255,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0d85b315ce71..73439cf48659 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -145,7 +145,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index d4f80786e7c2..3890468678ce 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,6 +132,8 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; clock_in_out = "input"; + /* shows instability at 1GBit right now */ + max-speed = <100>; phy-supply = <&vcc_io>; phy-mode = "rgmii"; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 41d61840fb99..2426da631938 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -514,7 +514,7 @@ tsadc: tsadc@ff250000 { compatible = "rockchip,rk3328-tsadc"; reg = <0x0 0xff250000 0x0 0x100>; - interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; + interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>; assigned-clocks = <&cru SCLK_TSADC>; assigned-clock-rates = <50000>; clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 910628d18add..1fc5060d7027 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -155,17 +155,6 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - status = "okay"; - }; }; &cpu_b0 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 48e733136db4..0ac2ace82435 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -198,8 +198,8 @@ gpio-controller; #gpio-cells = <2>; gpio-ranges = <&pinctrl 0 0 0>, - <&pinctrl 96 0 0>, - <&pinctrl 160 0 0>; + <&pinctrl 104 0 0>, + <&pinctrl 168 0 0>; gpio-ranges-group-names = "gpio_range0", "gpio_range1", "gpio_range2"; diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index cb79fba79d43..b88a8dd14933 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -122,7 +122,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index dd5a08aaa4da..3eb4bfc1fb36 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -12,6 +12,7 @@ for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 +#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ @@ -29,6 +30,7 @@ ldcd). */ #define __PA_LDCW_ALIGNMENT 4 +#define __PA_LDCW_ALIGN_ORDER 2 #define __ldcw_align(a) (&(a)->slock) #define __LDCW "ldcw,co" diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index d8f77358e2ba..29b99b8964aa 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index f3cecf5117cf..e95207c0565e 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -35,6 +35,7 @@ #include <asm/pgtable.h> #include <asm/signal.h> #include <asm/unistd.h> +#include <asm/ldcw.h> #include <asm/thread_info.h> #include <linux/linkage.h> @@ -46,6 +47,14 @@ #endif .import pa_tlb_lock,data + .macro load_pa_tlb_lock reg +#if __PA_LDCW_ALIGNMENT > 4 + load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg + depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg +#else + load32 PA(pa_tlb_lock), \reg +#endif + .endm /* space_to_prot macro creates a prot id from a space id */ @@ -457,7 +466,7 @@ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop @@ -480,7 +489,7 @@ /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp tlb_unlock0 \spc,\tmp #endif .endm diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index adf7187f8951..2d40c4ff3f69 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -36,6 +36,7 @@ #include <asm/assembly.h> #include <asm/pgtable.h> #include <asm/cache.h> +#include <asm/ldcw.h> #include <linux/linkage.h> .text @@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP - ldil L%pa_tlb_lock,%r1 - ldo R%pa_tlb_lock(%r1),\la +#if __PA_LDCW_ALIGNMENT > 4 + load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la + depi 0,31,__PA_LDCW_ALIGN_ORDER, \la +#else + load32 pa_tlb_lock, \la +#endif rsm PSW_SM_I,\flags 1: LDCW 0(\la),\tmp cmpib,<>,n 0,\tmp,3f diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 30f92391a93e..cad3e8661cd6 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -39,6 +39,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/fs.h> +#include <linux/cpu.h> #include <linux/module.h> #include <linux/personality.h> #include <linux/ptrace.h> @@ -184,6 +185,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) } /* + * Idle thread support + * + * Detect when running on QEMU with SeaBIOS PDC Firmware and let + * QEMU idle the host too. + */ + +int running_on_qemu __read_mostly; + +void __cpuidle arch_cpu_idle_dead(void) +{ + /* nop on real hardware, qemu will offline CPU. */ + asm volatile("or %%r31,%%r31,%%r31\n":::); +} + +void __cpuidle arch_cpu_idle(void) +{ + local_irq_enable(); + + /* nop on real hardware, qemu will idle sleep. */ + asm volatile("or %%r10,%%r10,%%r10\n":::); +} + +static int __init parisc_idle_init(void) +{ + const char *marker; + + /* check QEMU/SeaBIOS marker in PAGE0 */ + marker = (char *) &PAGE0->pad0; + running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); + + if (!running_on_qemu) + cpu_idle_poll_ctrl(1); + + return 0; +} +arch_initcall(parisc_idle_init); + +/* * Copy architecture-specific thread state */ int diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 13f7854e0d49..48f41399fc0b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -631,11 +631,11 @@ void __init mem_init(void) mem_init_print_info(NULL); #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ printk("virtual kernel memory layout:\n" - " vmalloc : 0x%p - 0x%p (%4ld MB)\n" - " memory : 0x%p - 0x%p (%4ld MB)\n" - " .init : 0x%p - 0x%p (%4ld kB)\n" - " .data : 0x%p - 0x%p (%4ld kB)\n" - " .text : 0x%p - 0x%p (%4ld kB)\n", + " vmalloc : 0x%px - 0x%px (%4ld MB)\n" + " memory : 0x%px - 0x%px (%4ld MB)\n" + " .init : 0x%px - 0x%px (%4ld kB)\n" + " .data : 0x%px - 0x%px (%4ld kB)\n" + " .text : 0x%px - 0x%px (%4ld kB)\n", (void*)VMALLOC_START, (void*)VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581ce..6e1e39035380 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_ACCERR); +} + static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) { @@ -490,7 +495,7 @@ retry: good_area: if (unlikely(access_error(is_write, is_exec, vma))) - return bad_area(regs, address); + return bad_access(regs, address); /* * If for any reason at all we couldn't handle the fault, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea1..df95102e732c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class; static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, dev_err(&dev->dev, "No memory for MSI cascade data\n"); return -ENOMEM; } - irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); + irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, + &fsl_msi_irq_request_class); cascade_data->index = offset; cascade_data->msi_data = msi; cascade_data->virq = virt_msir; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ec8b68e97d3c..2c93cbbcd15e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { /* - * Get the last slot. They should be sorted by base_gfn, so the - * last slot is also the one at the end of the address space. - * We have verified above that at least one slot is present. + * Get the first slot. They are reverse sorted by base_gfn, so + * the first slot is also the one at the end of the address + * space. We have verified above that at least one slot is + * present. */ - ms = slots->memslots + slots->used_slots - 1; + ms = slots->memslots; /* round up so we only use full longs */ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); /* allocate enough bytes to store all the bits */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 572496c688cc..0714bfa56da0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) cbrlo[entries] = gfn << PAGE_SHIFT; } - if (orc) { + if (orc && gfn < ms->bitmap_size) { /* increment only if we are really flipping the bit to 1 */ if (!test_and_set_bit(gfn, ms->pgste_bitmap)) atomic64_inc(&ms->dirty_pages); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index cae5a1e16cbd..c4f8039a35e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess); void disable_sacf_uaccess(mm_segment_t old_fs) { + current->thread.mm_segment = old_fs; if (old_fs == USER_DS && test_facility(27)) { __ctl_load(S390_lowcore.user_asce, 1, 1); clear_cpu_flag(CIF_ASCE_PRIMARY); } - current->thread.mm_segment = old_fs; } EXPORT_SYMBOL(disable_sacf_uaccess); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f7aa5a77827e..2d15d84c20ed 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -181,6 +181,9 @@ out_unlock: static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, size_t size, int flags) { + unsigned long irqflags; + int ret; + /* * With zdev->tlb_refresh == 0, rpcit is not required to establish new * translations when previously invalid translation-table entries are @@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, return 0; } - return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); + ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, + PAGE_ALIGN(size)); + if (ret == -ENOMEM && !s390_iommu_strict) { + /* enable the hypervisor to free some resources */ + if (zpci_refresh_global(zdev)) + goto out; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); + bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, + zdev->lazy_bitmap, zdev->iommu_pages); + bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); + ret = 0; + } +out: + return ret; } static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 19bcb3b45a70..f069929e8211 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) if (cc) zpci_err_insn(cc, status, addr, range); + if (cc == 1 && (status == 4 || status == 16)) + return -ENOMEM; + return (cc) ? -EIO : 0; } diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S index e5547b22cd18..0ddbbb031822 100644 --- a/arch/sparc/lib/hweight.S +++ b/arch/sparc/lib/hweight.S @@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32) .previous ENTRY(__arch_hweight64) - sethi %hi(__sw_hweight16), %g1 - jmpl %g1 + %lo(__sw_hweight16), %g0 + sethi %hi(__sw_hweight64), %g1 + jmpl %g1 + %lo(__sw_hweight64), %g0 nop ENDPROC(__arch_hweight64) EXPORT_SYMBOL(__arch_hweight64) diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 5f25b39f04d4..c4ac6043ebb0 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -298,7 +298,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index d5364ca2e3f9..b5e5e02f8cde 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -23,6 +23,9 @@ */ #undef CONFIG_AMD_MEM_ENCRYPT +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + #include "misc.h" /* These actually do the work of building the kernel identity maps. */ diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh index c9e8499fbfe7..6a10d52a4145 100644 --- a/arch/x86/boot/genimage.sh +++ b/arch/x86/boot/genimage.sh @@ -80,39 +80,43 @@ genfdimage288() { mcopy $FBZIMAGE w:linux } -genisoimage() { +geniso() { tmp_dir=`dirname $FIMAGE`/isoimage rm -rf $tmp_dir mkdir $tmp_dir - for i in lib lib64 share end ; do + for i in lib lib64 share ; do for j in syslinux ISOLINUX ; do if [ -f /usr/$i/$j/isolinux.bin ] ; then isolinux=/usr/$i/$j/isolinux.bin - cp $isolinux $tmp_dir fi done for j in syslinux syslinux/modules/bios ; do if [ -f /usr/$i/$j/ldlinux.c32 ]; then ldlinux=/usr/$i/$j/ldlinux.c32 - cp $ldlinux $tmp_dir fi done if [ -n "$isolinux" -a -n "$ldlinux" ] ; then break fi - if [ $i = end -a -z "$isolinux" ] ; then - echo 'Need an isolinux.bin file, please install syslinux/isolinux.' - exit 1 - fi done + if [ -z "$isolinux" ] ; then + echo 'Need an isolinux.bin file, please install syslinux/isolinux.' + exit 1 + fi + if [ -z "$ldlinux" ] ; then + echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.' + exit 1 + fi + cp $isolinux $tmp_dir + cp $ldlinux $tmp_dir cp $FBZIMAGE $tmp_dir/linux echo "$KCMDLINE" > $tmp_dir/isolinux.cfg if [ -f "$FDINITRD" ] ; then cp "$FDINITRD" $tmp_dir/initrd.img fi - mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ - -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ - $tmp_dir + genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \ + -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \ + -boot-info-table $tmp_dir isohybrid $FIMAGE 2>/dev/null || true rm -rf $tmp_dir } @@ -121,6 +125,6 @@ case $1 in bzdisk) genbzdisk;; fdimage144) genfdimage144;; fdimage288) genfdimage288;; - isoimage) genisoimage;; + isoimage) geniso;; *) echo 'Unknown image format'; exit 1; esac diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3fd8bc560fae..45a63e00a6af 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/jump_label.h> #include <asm/unwind_hints.h> +#include <asm/cpufeatures.h> +#include <asm/page_types.h> +#include <asm/percpu.h> +#include <asm/asm-offsets.h> +#include <asm/processor-flags.h> /* @@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm +#ifdef CONFIG_PAGE_TABLE_ISOLATION + +/* + * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two + * halves: + */ +#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT) +#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT)) + +.macro SET_NOFLUSH_BIT reg:req + bts $X86_CR3_PCID_NOFLUSH_BIT, \reg +.endm + +.macro ADJUST_KERNEL_CR3 reg:req + ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID + /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ + andq $(~PTI_SWITCH_MASK), \reg +.endm + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + ADJUST_KERNEL_CR3 \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +#define THIS_CPU_user_pcid_flush_mask \ + PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask + +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * Test if the ASID needs a flush. + */ + movq \scratch_reg, \scratch_reg2 + andq $(0x7FF), \scratch_reg /* mask ASID */ + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + /* Flush needed, clear the bit */ + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + movq \scratch_reg2, \scratch_reg + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + movq \scratch_reg2, \scratch_reg + SET_NOFLUSH_BIT \scratch_reg + +.Lwrcr3_\@: + /* Flip the PGD and ASID to the user version */ + orq $(PTI_SWITCH_MASK), \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req + pushq %rax + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax + popq %rax +.endm + +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI + movq %cr3, \scratch_reg + movq \scratch_reg, \save_reg + /* + * Is the "switch mask" all zero? That means that both of + * these are zero: + * + * 1. The user/kernel PCID bit, and + * 2. The user/kernel "bit" that points CR3 to the + * bottom half of the 8k PGD + * + * That indicates a kernel CR3 value, not a user CR3. + */ + testq $(PTI_SWITCH_MASK), \scratch_reg + jz .Ldone_\@ + + ADJUST_KERNEL_CR3 \scratch_reg + movq \scratch_reg, %cr3 + +.Ldone_\@: +.endm + +.macro RESTORE_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * KERNEL pages can always resume with NOFLUSH as we do + * explicit flushes. + */ + bt $X86_CR3_PTI_SWITCH_BIT, \save_reg + jnc .Lnoflush_\@ + + /* + * Check if there's a pending flush for the user ASID we're + * about to set. + */ + movq \save_reg, \scratch_reg + andq $(0x7FF), \scratch_reg + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + SET_NOFLUSH_BIT \save_reg + +.Lwrcr3_\@: + /* + * The CR3 write could be avoided when not changing its value, + * but would require a CR3 read *and* a scratch register. + */ + movq \save_reg, %cr3 +.Lend_\@: +.endm + +#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req +.endm +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req +.endm +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req +.endm +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req +.endm +.macro RESTORE_CR3 scratch_reg:req save_reg:req +.endm + +#endif + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3d19c830e1b1..f048e384ff54 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -23,7 +23,6 @@ #include <asm/segment.h> #include <asm/cache.h> #include <asm/errno.h> -#include "calling.h" #include <asm/asm-offsets.h> #include <asm/msr.h> #include <asm/unistd.h> @@ -40,6 +39,8 @@ #include <asm/frame.h> #include <linux/err.h> +#include "calling.h" + .code64 .section .entry.text, "ax" @@ -168,6 +169,9 @@ ENTRY(entry_SYSCALL_64_trampoline) /* Stash the user RSP. */ movq %rsp, RSP_SCRATCH + /* Note: using %rsp as a scratch reg. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + /* Load the top of the task stack into RSP */ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp @@ -207,6 +211,10 @@ ENTRY(entry_SYSCALL_64) */ swapgs + /* + * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it + * is not required to switch CR3. + */ movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -403,6 +411,7 @@ syscall_return_via_sysret: * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi popq %rdi popq %rsp @@ -740,6 +749,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * We can do future final exit work right here. */ + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + /* Restore RDI. */ popq %rdi SWAPGS @@ -822,7 +833,9 @@ native_irq_return_ldt: */ pushq %rdi /* Stash user RDI */ - SWAPGS + SWAPGS /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -838,7 +851,6 @@ native_irq_return_ldt: /* Now RAX == RSP. */ andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ - popq %rdi /* Restore user RDI */ /* * espfix_stack[31:16] == 0. The page tables are set up such that @@ -849,7 +861,11 @@ native_irq_return_ldt: * still points to an RO alias of the ESPFIX stack. */ orq PER_CPU_VAR(espfix_stack), %rax - SWAPGS + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + SWAPGS /* to user GS */ + popq %rdi /* Restore user RDI */ + movq %rax, %rsp UNWIND_HINT_IRET_REGS offset=8 @@ -949,6 +965,8 @@ ENTRY(switch_to_thread_stack) UNWIND_HINT_FUNC pushq %rdi + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI @@ -1250,7 +1268,11 @@ ENTRY(paranoid_entry) js 1f /* negative -> in kernel */ SWAPGS xorl %ebx, %ebx -1: ret + +1: + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + + ret END(paranoid_entry) /* @@ -1272,6 +1294,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: @@ -1299,6 +1322,8 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ @@ -1345,6 +1370,7 @@ ENTRY(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done .Lbstep_iret: @@ -1354,10 +1380,11 @@ ENTRY(error_entry) .Lerror_bad_iret: /* - * We came from an IRET to user mode, so we have user gsbase. - * Switch to kernel gsbase: + * We came from an IRET to user mode, so we have user + * gsbase and CR3. Switch to kernel gsbase and CR3: */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* * Pretend that the exception came from user mode: set up pt_regs @@ -1389,6 +1416,10 @@ END(error_exit) /* * Runs on exception stack. Xen PV does not go through this path at all, * so we can use real assembly here. + * + * Registers: + * %r14: Used to save/restore the CR3 of the interrupted context + * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ ENTRY(nmi) UNWIND_HINT_IRET_REGS @@ -1452,6 +1483,7 @@ ENTRY(nmi) swapgs cld + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 @@ -1704,6 +1736,8 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 95ad40eb7eff..98d5358e4041 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -49,6 +49,10 @@ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS + + /* We are about to clobber %rsp anyway, clobbering here is OK */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ swapgs - /* Stash user ESP and switch to the kernel stack. */ + /* Stash user ESP */ movl %esp, %r8d + + /* Use %rsp as scratch reg. User ESP is stashed in r8 */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + + /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ @@ -256,10 +265,22 @@ sysret32_from_system_call: * when the system call started, which is already known to user * code. We zero R8-R10 to avoid info leaks. */ + movq RSP-ORIG_RAX(%rsp), %rsp + + /* + * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored + * on the process stack which is not mapped to userspace and + * not readable after we SWITCH_TO_USER_CR3. Delay the CR3 + * switch until after after the last reference to the process + * stack. + * + * %r8/%r9 are zeroed before the sysret, thus safe to clobber. + */ + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 + xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r10 - movq RSP-ORIG_RAX(%rsp), %rsp swapgs sysretl END(entry_SYSCALL_compat) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1faf40f2dda9..577fa8adb785 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -344,14 +344,14 @@ int in_gate_area_no_mm(unsigned long addr) * vsyscalls but leave the page not present. If so, we skip calling * this. */ -static void __init set_vsyscall_pgtable_user_bits(void) +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(VSYSCALL_ADDR); + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 @@ -373,7 +373,7 @@ void __init map_vsyscall(void) vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); - set_vsyscall_pgtable_user_bits(); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09c26a4f139c..731153a4681e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = { __init int intel_pmu_init(void) { + struct attribute **extra_attr = NULL; + struct attribute **to_free = NULL; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; @@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void) unsigned int unused; struct extra_reg *er; int version, i; - struct attribute **extra_attr = NULL; char *name; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { @@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_attr = merge_attr(extra_attr, skl_format_attr); + to_free = extra_attr; x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); @@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } + kfree(to_free); return 0; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3674a4b6f8bd..8156e47da7ba 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -3,16 +3,19 @@ #include <linux/types.h> #include <linux/slab.h> +#include <asm/cpu_entry_area.h> #include <asm/perf_event.h> +#include <asm/tlbflush.h> #include <asm/insn.h> #include "../perf_event.h" +/* Waste a full page so it can be mapped into the cpu_entry_area */ +DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 -#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_FIXUP_SIZE PAGE_SIZE /* @@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); -static int alloc_pebs_buffer(int cpu) +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + unsigned long start = (unsigned long)cea; + phys_addr_t pa; + size_t msz = 0; + + pa = virt_to_phys(addr); + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, pa, prot); + + /* + * This is a cross-CPU update of the cpu_entry_area, we must shoot down + * all TLB entries for it. + */ + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void ds_clear_cea(void *cea, size_t size) +{ + unsigned long start = (unsigned long)cea; + size_t msz = 0; + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); + + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); int node = cpu_to_node(cpu); - int max; - void *buffer, *ibuffer; + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int max, node = cpu_to_node(cpu); + void *buffer, *ibuffer, *cea; if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; @@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree_pages(buffer, bsiz); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; } - - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_pebs_vaddr = buffer; + /* Update the cpu entry area mapping */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds->pebs_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; return 0; } static void release_pebs_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.pebs) return; @@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; - void *buffer; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *buffer, *cea; + int max; if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; } - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds->bts_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); + ds->bts_absolute_maximum = ds->bts_buffer_base + max; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); return 0; } static void release_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds_clear_cea(cea, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; - return 0; } static void release_ds_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f7aaadf9331f..8e4ea143ed96 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -14,6 +14,8 @@ #include <linux/perf_event.h> +#include <asm/intel_ds.h> + /* To enable MSR tracing please use the generic trace points. */ /* @@ -77,8 +79,6 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) /* @@ -95,23 +95,6 @@ struct amd_nb { PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - #define PEBS_REGS \ (PERF_REG_X86_AX | \ PERF_REG_X86_BX | \ @@ -216,6 +199,8 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index dbfd0854651f..cf5961ca8677 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..386a6900e206 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,6 +136,7 @@ #endif #ifndef __ASSEMBLY__ +#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -145,5 +146,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif +#endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 2fbc69a0916e..4a7884b8dca5 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -5,6 +5,7 @@ #include <linux/percpu-defs.h> #include <asm/processor.h> +#include <asm/intel_ds.h> /* * cpu_entry_area is a percpu region that contains things needed by the CPU @@ -40,6 +41,18 @@ struct cpu_entry_area { */ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; #endif +#ifdef CONFIG_CPU_SUP_INTEL + /* + * Per CPU debug store for Intel performance monitoring. Wastes a + * full page at the moment. + */ + struct debug_store cpu_debug_store; + /* + * The actual PEBS/BTS buffers must be mapped to user space + * Reserve enough fixmap PTEs. + */ + struct debug_store_buffers cpu_debug_buffers; +#endif }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 800104c8a3ed..21ac898df2d8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,12 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ec8be07c0cda..13c5ee878a47 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; + /* Set the ACCESS bit so it can be mapped RO */ + desc->type |= 1; desc->s = 1; desc->dpl = 0x3; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 14d6d5007314..b027633e7300 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h new file mode 100644 index 000000000000..62a9f4966b42 --- /dev/null +++ b/arch/x86/include/asm/intel_ds.h @@ -0,0 +1,36 @@ +#ifndef _ASM_INTEL_DS_H +#define _ASM_INTEL_DS_H + +#include <linux/percpu-defs.h> + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 8 + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +} __aligned(PAGE_SIZE); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + +struct debug_store_buffers { + char bts_buffer[BTS_BUFFER_SIZE]; + char pebs_buffer[PEBS_BUFFER_SIZE]; +}; + +#endif diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 139feef467f7..c066ffae222b 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); extern int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early); + struct irq_data *irq_data, bool reserve); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5ede7cae1d67..c931b88982a0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -50,10 +50,33 @@ struct ldt_struct { * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ - struct desc_struct *entries; - unsigned int nr_entries; + struct desc_struct *entries; + unsigned int nr_entries; + + /* + * If PTI is in use, then the entries array is not mapped while we're + * in user mode. The whole array will be aliased at the addressed + * given by ldt_slot_va(slot). We use two slots so that we can allocate + * and map, and enable a new LDT without invalidating the mapping + * of an older, still-in-use LDT. + * + * slot will be -1 if this LDT doesn't have an alias mapping. + */ + int slot; }; +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ +#ifdef CONFIG_X86_64 + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +#else + BUG(); +#endif +} + /* * Used for LDT copy/destruction. */ @@ -64,6 +87,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm) } int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); +void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ static inline void init_new_context_ldt(struct mm_struct *mm) { } static inline int ldt_dup_context(struct mm_struct *oldmm, @@ -71,7 +95,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm, { return 0; } -static inline void destroy_context_ldt(struct mm_struct *mm) {} +static inline void destroy_context_ldt(struct mm_struct *mm) { } +static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif static inline void load_mm_ldt(struct mm_struct *mm) @@ -96,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm) * that we can see. */ - if (unlikely(ldt)) - set_ldt(ldt->entries, ldt->nr_entries); - else + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { clear_LDT(); + } #else clear_LDT(); #endif @@ -194,6 +240,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); + ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 4b5e1eafada7..aff42e1da6ee 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + /* * Allocate and free page tables. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 95e2dfd75521..e42b8943cb1a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); void ptdump_walk_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX @@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) static inline int p4d_bad(p4d_t p4d) { - return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; + unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ @@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + unsigned long ignore_flags = _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's @@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud) */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + /* Clone the user space pgd as well */ + memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); +#endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e9f05331e732..81462e9a34f6 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp) #endif } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); +} + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgd(pgdp, pgd); +} +#else +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif + static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); +#else *p4dp = p4d; +#endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + *pgdp = pti_set_user_pgd(pgdp, pgd); +#else *pgdp = pgd; +#endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 3d27831bc58d..6b8f73dcbc2c 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -75,17 +75,27 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* + * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * + * Be very careful vs. KASLR when changing anything here. The KASLR address + * range must not overlap with anything except the KASAN shadow area, which + * is correct as KASAN disables KASLR. + */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(16384, UL) -# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define VMALLOC_SIZE_TB _AC(12800, UL) +# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define LDT_PGD_ENTRY _AC(-112, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define LDT_PGD_ENTRY _AC(-3, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif #ifdef CONFIG_RANDOMIZE_MEMORY @@ -100,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL) #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_PGD _AC(-4, UL) #define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) #define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 43212a43ee69..6a60fea90b9d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -38,6 +38,11 @@ #define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define X86_CR3_PTI_SWITCH_BIT 11 +#endif + #else /* * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cad8dab266bc..d3a67fba200a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -852,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x) #else /* - * User space process size. 47bits minus one guard page. The guard - * page is necessary on Intel CPUs: if a SYSCALL instruction is at - * the highest possible canonical userspace address, then that - * syscall will enter the kernel with a non-canonical return - * address, and SYSRET will explode dangerously. We avoid this - * particular problem by preventing anything from being mapped - * at the maximum canonical address. + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything executable + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] */ #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h new file mode 100644 index 000000000000..0b5ef05b2d2d --- /dev/null +++ b/arch/x86/include/asm/pti.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _ASM_X86_PTI_H +#define _ASM_X86_PTI_H +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern void pti_init(void); +extern void pti_check_boottime_disable(void); +#else +static inline void pti_check_boottime_disable(void) { } +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 9b6df68d8fd1..eb5f7999a893 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *prev, - struct task_struct *next) +static inline void prepare_switch_to(struct task_struct *next) { #ifdef CONFIG_VMAP_STACK /* @@ -70,7 +69,7 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(prev, next); \ + prepare_switch_to(next); \ \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e1884cf35257..4a08dd2ab32a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,38 +10,90 @@ #include <asm/special_insns.h> #include <asm/smp.h> #include <asm/invpcid.h> +#include <asm/pti.h> +#include <asm/processor-flags.h> -static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) -{ - /* - * Bump the generation count. This also serves as a full barrier - * that synchronizes with switch_mm(): callers are required to order - * their read of mm_cpumask after their writes to the paging - * structures. - */ - return atomic64_inc_return(&mm->context.tlb_gen); -} +/* + * The x86 feature is called PCID (Process Context IDentifier). It is similar + * to what is traditionally called ASID on the RISC processors. + * + * We don't use the traditional ASID implementation, where each process/mm gets + * its own ASID and flush/restart when we run out of ASID space. + * + * Instead we have a small per-cpu array of ASIDs and cache the last few mm's + * that came by on this CPU, allowing cheaper switch_mm between processes on + * this CPU. + * + * We end up with different spaces for different things. To avoid confusion we + * use different names for each of them: + * + * ASID - [0, TLB_NR_DYN_ASIDS-1] + * the canonical identifier for an mm + * + * kPCID - [1, TLB_NR_DYN_ASIDS] + * the value we write into the PCID part of CR3; corresponds to the + * ASID+1, because PCID 0 is special. + * + * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] + * for KPTI each mm has two address spaces and thus needs two + * PCID values, but we can still do with a single ASID denomination + * for each mm. Corresponds to kPCID + 2048. + * + */ /* There are 12 bits of space for ASIDS in CR3 */ #define CR3_HW_ASID_BITS 12 + /* * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for * user/kernel switches */ -#define PTI_CONSUMED_ASID_BITS 0 +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define PTI_CONSUMED_PCID_BITS 1 +#else +# define PTI_CONSUMED_PCID_BITS 0 +#endif + +#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) -#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) /* * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account - * for them being zero-based. Another -1 is because ASID 0 is reserved for + * for them being zero-based. Another -1 is because PCID 0 is reserved for * use by non-PCID-aware users. */ -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) + +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 +/* + * Given @asid, compute kPCID + */ static inline u16 kern_pcid(u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Make sure that the dynamic ASID space does not confict with the + * bit we are using to switch between user and kernel ASIDs. + */ + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT)); + /* + * The ASID being passed in here should have respected the + * MAX_ASID_AVAILABLE and thus never have the switch bit set. + */ + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); +#endif + /* + * The dynamically-assigned ASIDs that get passed in are small + * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set, + * so do not bother to clear it. + * * If PCID is on, ASID-aware code paths put the ASID+1 into the * PCID bits. This serves two purposes. It prevents a nasty * situation in which PCID-unaware code saves CR3, loads some other @@ -53,6 +105,18 @@ static inline u16 kern_pcid(u16 asid) return asid + 1; } +/* + * Given @asid, compute uPCID + */ +static inline u16 user_pcid(u16 asid) +{ + u16 ret = kern_pcid(asid); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + ret |= 1 << X86_CR3_PTI_SWITCH_BIT; +#endif + return ret; +} + struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { @@ -95,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void) return !static_cpu_has(X86_FEATURE_PCID); } -/* - * 6 because 6 should be plenty and struct tlb_state will fit in - * two cache lines. - */ -#define TLB_NR_DYN_ASIDS 6 - struct tlb_context { u64 ctx_id; u64 tlb_gen; @@ -135,6 +193,24 @@ struct tlb_state { bool is_lazy; /* + * If set we changed the page tables in such a way that we + * needed an invalidation of all contexts (aka. PCIDs / ASIDs). + * This tells us to go invalidate all the non-loaded ctxs[] + * on the next context switch. + * + * The current ctx was kept up-to-date as it ran and does not + * need to be invalidated. + */ + bool invalidate_other; + + /* + * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate + * the corresponding user PCID needs a flush next time we + * switch to it; see SWITCH_TO_USER_CR3. + */ + unsigned short user_pcid_flush_mask; + + /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. */ @@ -215,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void) } /* + * Mark all other ASIDs as invalid, preserves the current. + */ +static inline void invalidate_other_asid(void) +{ + this_cpu_write(cpu_tlbstate.invalidate_other, true); +} + +/* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot * up after us can get the correct flags. This should only be used @@ -234,18 +318,47 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); /* + * Given an ASID, flush the corresponding user ASID. We can delay this + * until the next time we switch to it. + * + * See SWITCH_TO_USER_CR3. + */ +static inline void invalidate_user_asid(u16 asid) +{ + /* There is no user ASID if address space separation is off */ + if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + return; + + /* + * We only have a single ASID if PCID is off and the CR3 + * write will have flushed it. + */ + if (!cpu_feature_enabled(X86_FEATURE_PCID)) + return; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + __set_bit(kern_pcid(asid), + (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); +} + +/* * flush the entire current user mapping */ static inline void __native_flush_tlb(void) { /* - * If current->mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 - * back: + * Preemption or interrupts must be disabled to protect the access + * to the per CPU variable and to prevent being preempted between + * read_cr3() and write_cr3(). */ - preempt_disable(); + WARN_ON_ONCE(preemptible()); + + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* If current->mm == NULL then the read_cr3() "borrows" an mm */ native_write_cr3(__native_read_cr3()); - preempt_enable(); } /* @@ -259,6 +372,8 @@ static inline void __native_flush_tlb_global(void) /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; @@ -285,7 +400,21 @@ static inline void __native_flush_tlb_global(void) */ static inline void __native_flush_tlb_single(unsigned long addr) { + u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. + * Just use invalidate_user_asid() in case we are called early. + */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) + invalidate_user_asid(loaded_mm_asid); + else + invpcid_flush_one(user_pcid(loaded_mm_asid), addr); } /* @@ -301,14 +430,6 @@ static inline void __flush_tlb_all(void) */ __flush_tlb(); } - - /* - * Note: if we somehow had PCID but not PGE, then this wouldn't work -- - * we'd end up flushing kernel translations for the current ASID but - * we might fail to flush kernel translations for other cached ASIDs. - * - * To avoid this issue, we force PCID off if PGE is off. - */ } /* @@ -318,6 +439,16 @@ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * __flush_tlb_single() will have cleared the TLB entry for this ASID, + * but since kernel space is replicated across all, we must also + * invalidate all others. + */ + invalidate_other_asid(); } #define TLB_FLUSH_ALL -1UL @@ -378,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info); +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) +{ + /* + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. + */ + return atomic64_inc_return(&mm->context.tlb_gen); +} + static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm) { diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 84b9ec0c1bc0..22647a642e98 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed, DECLARE_EVENT_CLASS(vector_activate, TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, - bool early), + bool reserve), - TP_ARGS(irq, is_managed, can_reserve, early), + TP_ARGS(irq, is_managed, can_reserve, reserve), TP_STRUCT__entry( __field( unsigned int, irq ) __field( bool, is_managed ) __field( bool, can_reserve ) - __field( bool, early ) + __field( bool, reserve ) ), TP_fast_assign( __entry->irq = irq; __entry->is_managed = is_managed; __entry->can_reserve = can_reserve; - __entry->early = early; + __entry->reserve = reserve; ), - TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d", __entry->irq, __entry->is_managed, __entry->can_reserve, - __entry->early) + __entry->reserve) ); #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ DEFINE_EVENT_FN(vector_activate, name, \ TP_PROTO(unsigned int irq, bool is_managed, \ - bool can_reserve, bool early), \ - TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + bool can_reserve, bool reserve), \ + TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \ DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index c1688c2d0a12..1f86e1b0a5cd 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* - * WARNING: The entire pt_regs may not be safe to dereference. In some cases, - * only the iret frame registers are accessible. Use with caution! + * If 'partial' returns true, only the iret frame registers are valid. */ -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { if (unwind_done(state)) return NULL; + if (partial) { +#ifdef CONFIG_UNWINDER_ORC + *partial = !state->full_regs; +#else + *partial = false; +#endif + } + return state->regs; } #else -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { return NULL; } diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d9a7c659009c..b986b2ca688a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -7,6 +7,7 @@ #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); /* * Called on instruction fetch fault in vsyscall page. diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 7e1e730396ae..bcba3c643e63 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -78,7 +78,12 @@ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) -#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ + +#define X86_CR3_PCID_BITS 12 +#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) + +#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ +#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) /* * Intel CPU features in CR4 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6e272f3ea984..880441f24146 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; +#ifdef CONFIG_X86_64 else { pr_warning("APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", arg); return -EINVAL; } +#endif return 0; } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index aa85690e9b64..25a87028cb3f 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 7b659c4480c9..5078b5ce63a7 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 201579dc5242..8a7963421460 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, } int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { unsigned long flags; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9b18be764422..ce503c99f5c4 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL : MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU : - MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED : - MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); } diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fa22017de806..02e8acb134f8 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 750449152b04..f8b03bb8e725 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd) irq_matrix_reserve(vector_matrix); apicd->can_reserve = true; apicd->has_reserved = true; + irqd_set_can_reserve(irqd); trace_vector_reserve(irqd->irq, 0); vector_assign_managed_shutdown(irqd); } @@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd) int ret; ret = assign_irq_vector_any_locked(irqd); - if (!ret) + if (!ret) { apicd->has_reserved = false; + /* + * Core might have disabled reservation mode after + * allocating the irq descriptor. Ideally this should + * happen before allocation time, but that would require + * completely convoluted ways of transporting that + * information. + */ + if (!irqd_can_reserve(irqd)) + apicd->can_reserve = false; + } return ret; } @@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd) } static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, - bool early) + bool reserve) { struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; int ret = 0; trace_vector_activate(irqd->irq, apicd->is_managed, - apicd->can_reserve, early); + apicd->can_reserve, reserve); /* Nothing to do for fixed assigned vectors */ if (!apicd->can_reserve && !apicd->is_managed) return 0; raw_spin_lock_irqsave(&vector_lock, flags); - if (early || irqd_is_managed_and_shutdown(irqd)) + if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, } else { /* Release the vector */ apicd->can_reserve = true; + irqd_set_can_reserve(irqd); clear_irq_vector(irqd); realloc = true; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 622f13ca8a94..8b04234e010b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 676b7cf4b62b..76417a9aab73 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -17,6 +17,7 @@ #include <asm/sigframe.h> #include <asm/bootparam.h> #include <asm/suspend.h> +#include <asm/tlbflush.h> #ifdef CONFIG_XEN #include <xen/interface/xen.h> @@ -94,6 +95,9 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + /* TLB state for the entry code */ + OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); + /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9757f07d738..39d7ea865207 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -922,6 +922,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + if (c->x86_vendor != X86_VENDOR_AMD) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + fpu__init_system(c); #ifdef CONFIG_X86_32 @@ -1360,7 +1364,10 @@ void syscall_init(void) (entry_SYSCALL_64_trampoline - _entry_trampoline); wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + if (static_cpu_has(X86_FEATURE_PTI)) + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + else + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 36b17e0febe8..afbecff161d1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs) regs->sp, regs->flags); } -static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) +static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, + bool partial) { - if (on_stack(info, regs, sizeof(*regs))) + /* + * These on_stack() checks aren't strictly necessary: the unwind code + * has already validated the 'regs' pointer. The checks are done for + * ordering reasons: if the registers are on the next stack, we don't + * want to print them out yet. Otherwise they'll be shown as part of + * the wrong stack. Later, when show_trace_log_lvl() switches to the + * next stack, this function will be called again with the same regs so + * they can be printed in the right context. + */ + if (!partial && on_stack(info, regs, sizeof(*regs))) { __show_regs(regs, 0); - else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, - IRET_FRAME_SIZE)) { + + } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { /* * When an interrupt or exception occurs in entry code, the * full pt_regs might not have been saved yet. In that case @@ -98,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; + bool partial; printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); stack = stack ? : get_stack_pointer(task, regs); + regs = unwind_get_entry_regs(&state, &partial); /* * Iterate through the stacks, starting with the current stack pointer. @@ -120,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; if (get_stack_info(stack, task, &stack_info, &visit_mask)) { @@ -140,7 +153,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%s <%s>\n", log_lvl, stack_name); if (regs) - show_regs_safe(&stack_info, regs); + show_regs_if_on_stack(&stack_info, regs, partial); /* * Scan the stack, printing any text addresses we find. At the @@ -164,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by show_regs_safe() below. + * by show_regs_if_on_stack(). */ if (regs && stack == ®s->ip) goto next; @@ -199,9 +212,9 @@ next: unwind_next_frame(&state); /* if the frame has entry regs, print them */ - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, &partial); if (regs) - show_regs_safe(&stack_info, regs); + show_regs_if_on_stack(&stack_info, regs, partial); } if (stack_name) @@ -297,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err) unsigned long sp; #endif printk(KERN_DEFAULT - "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, + "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", - IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); + IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", + IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7dca675fe78d..04a625f0fcda 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not + * strictly *need* the second page, but this allows us to + * have a single set_pgd() implementation that does not + * need to worry about whether it has 4k or 8k to work + * with. + * + * This ensures PGDs are 8k long: + */ +#define PTI_USER_PGD_FILL 512 +/* This ensures they are 8k-aligned: */ +#define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ +GLOBAL(name) +#else +#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define PTI_USER_PGD_FILL 0 +#endif + /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ @@ -350,13 +371,14 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_top_pgt) +NEXT_PGD_PAGE(early_top_pgt) .fill 511,8,0 #ifdef CONFIG_X86_5LEVEL .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #else .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #endif + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts) .data #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC @@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt) */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #else -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .fill 512,8,0 + .fill PTI_USER_PGD_FILL,8,0 #endif #ifdef CONFIG_X86_5LEVEL diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a6b5d62f45a7..26d713ecad34 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -24,6 +24,7 @@ #include <linux/uaccess.h> #include <asm/ldt.h> +#include <asm/tlb.h> #include <asm/desc.h> #include <asm/mmu_context.h> #include <asm/syscalls.h> @@ -51,13 +52,11 @@ static void refresh_ldt_segments(void) static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; - mm_context_t *pc; if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) return; - pc = &mm->context; - set_ldt(pc->ldt->entries, pc->ldt->nr_entries); + load_mm_ldt(mm); refresh_ldt_segments(); } @@ -94,10 +93,126 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) return NULL; } + /* The new LDT isn't aliased for PTI yet. */ + new_ldt->slot = -1; + new_ldt->nr_entries = num_entries; return new_ldt; } +/* + * If PTI is enabled, this maps the LDT into the kernelmode and + * usermode tables for the given mm. + * + * There is no corresponding unmap function. Even if the LDT is freed, we + * leave the PTEs around until the slot is reused or the mm is destroyed. + * This is harmless: the LDT is always in ordinary memory, and no one will + * access the freed slot. + * + * If we wanted to unmap freed LDTs, we'd also need to do a flush to make + * it useful, and the flush would slow down modify_ldt(). + */ +static int +map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + bool is_vmalloc, had_top_level_entry; + unsigned long va; + spinlock_t *ptl; + pgd_t *pgd; + int i; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return 0; + + /* + * Any given ldt_struct should have map_ldt_struct() called at most + * once. + */ + WARN_ON(ldt->slot != -1); + + /* + * Did we already have the top level entry allocated? We can't + * use pgd_none() for this because it doens't do anything on + * 4-level page table kernels. + */ + pgd = pgd_offset(mm, LDT_BASE_ADDR); + had_top_level_entry = (pgd->pgd != 0); + + is_vmalloc = is_vmalloc_addr(ldt->entries); + + for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + unsigned long offset = i << PAGE_SHIFT; + const void *src = (char *)ldt->entries + offset; + unsigned long pfn; + pte_t pte, *ptep; + + va = (unsigned long)ldt_slot_va(slot) + offset; + pfn = is_vmalloc ? vmalloc_to_pfn(src) : + page_to_pfn(virt_to_page(src)); + /* + * Treat the PTI LDT range as a *userspace* range. + * get_locked_pte() will allocate all needed pagetables + * and account for them in this mm. + */ + ptep = get_locked_pte(mm, va, &ptl); + if (!ptep) + return -ENOMEM; + /* + * Map it RO so the easy to find address is not a primary + * target via some kernel interface which misses a + * permission check. + */ + pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)); + set_pte_at(mm, va, ptep, pte); + pte_unmap_unlock(ptep, ptl); + } + + if (mm->context.ldt) { + /* + * We already had an LDT. The top-level entry should already + * have been allocated and synchronized with the usermode + * tables. + */ + WARN_ON(!had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) + WARN_ON(!kernel_to_user_pgdp(pgd)->pgd); + } else { + /* + * This is the first time we're mapping an LDT for this process. + * Sync the pgd to the usermode tables. + */ + WARN_ON(had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON(kernel_to_user_pgdp(pgd)->pgd); + set_pgd(kernel_to_user_pgdp(pgd), *pgd); + } + } + + va = (unsigned long)ldt_slot_va(slot); + flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); + + ldt->slot = slot; +#endif + return 0; +} + +static void free_ldt_pgtables(struct mm_struct *mm) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct mmu_gather tlb; + unsigned long start = LDT_BASE_ADDR; + unsigned long end = start + (1UL << PGDIR_SHIFT); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + tlb_gather_mmu(&tlb, mm, start, end); + free_pgd_range(&tlb, start, end, start, end); + tlb_finish_mmu(&tlb, start, end); +#endif +} + /* After calling this, the LDT is immutable. */ static void finalize_ldt_struct(struct ldt_struct *ldt) { @@ -156,6 +271,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) new_ldt->nr_entries * LDT_ENTRY_SIZE); finalize_ldt_struct(new_ldt); + retval = map_ldt_struct(mm, new_ldt, 0); + if (retval) { + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } mm->context.ldt = new_ldt; out_unlock: @@ -174,6 +295,11 @@ void destroy_context_ldt(struct mm_struct *mm) mm->context.ldt = NULL; } +void ldt_arch_exit_mmap(struct mm_struct *mm) +{ + free_ldt_pgtables(mm); +} + static int read_ldt(void __user *ptr, unsigned long bytecount) { struct mm_struct *mm = current->mm; @@ -287,6 +413,25 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) new_ldt->entries[ldt_info.entry_number] = ldt; finalize_ldt_struct(new_ldt); + /* + * If we are using PTI, map the new LDT into the userspace pagetables. + * If there is already an LDT, use the other slot so that other CPUs + * will continue to use the old LDT until install_ldt() switches + * them over to the new LDT. + */ + error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); + if (error) { + /* + * This only can fail for the first LDT setup. If an LDT is + * already installed then the PTE page is already + * populated. Mop up a half populated page table. + */ + if (!WARN_ON_ONCE(old_ldt)) + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } + install_ldt(mm, new_ldt); free_ldt_struct(old_ldt); error = 0; diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 00bc751c861c..edfede768688 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -48,8 +48,6 @@ static void load_segments(void) "\tmovl $"STR(__KERNEL_DS)",%%eax\n" "\tmovl %%eax,%%ds\n" "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%ss\n" : : : "eax", "memory"); #undef STR @@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image) * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0), 0); idt_invalidate(phys_to_virt(0)); + set_gdt(phys_to_virt(0), 0); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index aed9d94bd46f..832a6acd730f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { +__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8af2e8d0c0a1..145810b0edf6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p) set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); } - - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); #endif x86_init.oem.arch_setup(); @@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c5970efa8557..ed556d50d7ed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) @@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void) unsigned long flags; /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* * Paranoid: Set warm reset code and vector here back * to default values. */ diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 77835bc021c7..093f2ea5dd56 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace, for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, NULL); if (regs) { /* * Kernel mode registers on the stack indicate an @@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk, { int ret; + /* + * If the task doesn't have a stack (e.g., a zombie), the stack is + * "reliably" empty. + */ if (!try_get_task_stack(tsk)) - return -EINVAL; + return 0; ret = __save_stack_trace_reliable(trace, tsk); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 9a9c9b076955..a5b802a12212 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info) || LDT_zero(info)) { + if (LDT_empty(info) || LDT_zero(info)) memset(desc, 0, sizeof(*desc)); - } else { + else fill_ldt(desc, info); - - /* - * Always set the accessed bit so that the CPU - * doesn't try to write to the (read-only) GDT. - */ - desc->type |= 1; - } ++info; ++desc; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f69dbd47d733..446c9ef8cfc3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * * No need for ist_enter here because we don't use RCU. */ - if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d2a8b5a24a44..1e413a9326aa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -61,11 +61,17 @@ jiffies_64 = jiffies; . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; +#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); +#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); + #else #define X64_ALIGN_RODATA_BEGIN #define X64_ALIGN_RODATA_END +#define ALIGN_ENTRY_TEXT_BEGIN +#define ALIGN_ENTRY_TEXT_END + #endif PHDRS { @@ -102,8 +108,10 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT IRQENTRY_TEXT + ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index eb714f1cdf7e..bb31c801f1fc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4986,6 +4986,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" #endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" +#endif "pop %%" _ASM_BP : : [svm]"a"(svm), diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 023afa0c8887..5c14d65f676a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9415,6 +9415,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -9431,12 +9432,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 52195ee3f6d5..27e9e90a8d35 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o -obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_X86_INTEL_MPX) += mpx.o +obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index fe814fd5e014..b9283cc27622 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } +static void percpu_setup_debug_store(int cpu) +{ +#ifdef CONFIG_CPU_SUP_INTEL + int npages; + void *cea; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + cea = &get_cpu_entry_area(cpu)->cpu_debug_store; + npages = sizeof(struct debug_store) / PAGE_SIZE; + BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0); + cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages, + PAGE_KERNEL); + + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers; + /* + * Force the population of PMDs for not yet allocated per cpu + * memory like debug store buffers. + */ + npages = sizeof(struct debug_store_buffers) / PAGE_SIZE; + for (; npages; npages--, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); +#endif +} + /* Setup the fixmap mappings only once per-processor */ static void __init setup_cpu_entry_area(int cpu) { @@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu) cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif + percpu_setup_debug_store(cpu); } static __init void setup_cpu_entry_area_ptes(void) diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index bfcffdf6c577..421f2664ffa0 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -5,7 +5,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level(m, NULL); + ptdump_walk_pgd_level_debugfs(m, NULL, false); return 0; } @@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static struct dentry *pe; +static int ptdump_show_curknl(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curknl(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curknl, NULL); +} + +static const struct file_operations ptdump_curknl_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curknl, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +static struct dentry *pe_curusr; + +static int ptdump_show_curusr(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curusr(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curusr, NULL); +} + +static const struct file_operations ptdump_curusr_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curusr, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static struct dentry *dir, *pe_knl, *pe_curknl; static int __init pt_dump_debug_init(void) { - pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, - &ptdump_fops); - if (!pe) + dir = debugfs_create_dir("page_tables", NULL); + if (!dir) return -ENOMEM; + pe_knl = debugfs_create_file("kernel", 0400, dir, NULL, + &ptdump_fops); + if (!pe_knl) + goto err; + + pe_curknl = debugfs_create_file("current_kernel", 0400, + dir, NULL, &ptdump_curknl_fops); + if (!pe_curknl) + goto err; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pe_curusr = debugfs_create_file("current_user", 0400, + dir, NULL, &ptdump_curusr_fops); + if (!pe_curusr) + goto err; +#endif return 0; +err: + debugfs_remove_recursive(dir); + return -ENOMEM; } static void __exit pt_dump_debug_exit(void) { - debugfs_remove_recursive(pe); + debugfs_remove_recursive(dir); } module_init(pt_dump_debug_init); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 43dedbfb7257..2a4849e92831 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -52,6 +52,9 @@ enum address_markers_idx { USER_SPACE_NR = 0, KERNEL_SPACE_NR, LOW_KERNEL_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif VMALLOC_START_NR, VMEMMAP_START_NR, #ifdef CONFIG_KASAN @@ -59,6 +62,9 @@ enum address_markers_idx { KASAN_SHADOW_END_NR, #endif CPU_ENTRY_AREA_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif #ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, #endif @@ -82,6 +88,9 @@ static struct addr_marker address_markers[] = { [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL + [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, +#endif [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, #ifdef CONFIG_X86_ESPFIX64 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, @@ -467,7 +476,7 @@ static inline bool is_hypervisor_range(int idx) } static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, - bool checkwx) + bool checkwx, bool dmesg) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_top_pgt; @@ -480,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, if (pgd) { start = pgd; - st.to_dmesg = true; + st.to_dmesg = dmesg; } st.check_wx = checkwx; @@ -518,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { - ptdump_walk_pgd_level_core(m, pgd, false); + ptdump_walk_pgd_level_core(m, pgd, false, true); +} + +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (user && static_cpu_has(X86_FEATURE_PTI)) + pgd = kernel_to_user_pgdp(pgd); +#endif + ptdump_walk_pgd_level_core(m, pgd, false, false); +} +EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); + +static void ptdump_walk_user_pgd_level_checkwx(void) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pgd_t *pgd = (pgd_t *) &init_top_pgt; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("x86/mm: Checking user space page tables\n"); + pgd = kernel_to_user_pgdp(pgd); + ptdump_walk_pgd_level_core(NULL, pgd, true, false); +#endif } -EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true); + ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_user_pgd_level_checkwx(); } static int __init pt_dump_init(void) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6fdf91ef130a..82f5252c723a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -20,6 +20,7 @@ #include <asm/kaslr.h> #include <asm/hypervisor.h> #include <asm/cpufeature.h> +#include <asm/pti.h> /* * We need to define the tracepoints somewhere, and tlb.c @@ -160,6 +161,12 @@ struct map_range { static int page_size_mask; +static void enable_global_pages(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + __supported_pte_mask |= _PAGE_GLOBAL; +} + static void __init probe_page_size_mask(void) { /* @@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ + __supported_pte_mask &= ~_PAGE_GLOBAL; if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } else - __supported_pte_mask &= ~_PAGE_GLOBAL; + enable_global_pages(); + } /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { @@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void) static void setup_pcid(void) { -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCID)) { - if (boot_cpu_has(X86_FEATURE_PGE)) { - /* - * This can't be cr4_set_bits_and_update_boot() -- - * the trampoline code can't handle CR4.PCIDE and - * it wouldn't do any good anyway. Despite the name, - * cr4_set_bits_and_update_boot() doesn't actually - * cause the bits in question to remain set all the - * way through the secondary boot asm. - * - * Instead, we brute-force it and set CR4.PCIDE - * manually in start_secondary(). - */ - cr4_set_bits(X86_CR4_PCIDE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't - * work if PCID is on but PGE is not. Since that - * combination doesn't exist on real hardware, there's - * no reason to try to fully support it, but it's - * polite to avoid corrupting data if we're on - * an improperly configured VM. - */ - setup_clear_cpu_cap(X86_FEATURE_PCID); - } + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (!boot_cpu_has(X86_FEATURE_PCID)) + return; + + if (boot_cpu_has(X86_FEATURE_PGE)) { + /* + * This can't be cr4_set_bits_and_update_boot() -- the + * trampoline code can't handle CR4.PCIDE and it wouldn't + * do any good anyway. Despite the name, + * cr4_set_bits_and_update_boot() doesn't actually cause + * the bits in question to remain set all the way through + * the secondary boot asm. + * + * Instead, we brute-force it and set CR4.PCIDE manually in + * start_secondary(). + */ + cr4_set_bits(X86_CR4_PCIDE); + + /* + * INVPCID's single-context modes (2/3) only work if we set + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable + * on systems that have X86_CR4_PCIDE clear, or that have + * no INVPCID support at all. + */ + if (boot_cpu_has(X86_FEATURE_INVPCID)) + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't work if + * PCID is on but PGE is not. Since that combination + * doesn't exist on real hardware, there's no reason to try + * to fully support it, but it's polite to avoid corrupting + * data if we're on an improperly configured VM. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); } -#endif } #ifdef CONFIG_X86_32 @@ -622,6 +639,7 @@ void __init init_mem_mapping(void) { unsigned long end; + pti_check_boottime_disable(); probe_page_size_mask(); setup_pcid(); @@ -845,12 +863,12 @@ void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_SYMBOL_GPL(cpu_tlbstate); +EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 879ef930e2c2..aedebd2ebf1e 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -34,25 +34,14 @@ #define TB_SHIFT 40 /* - * Virtual address start and end range for randomization. The end changes base - * on configuration to have the highest amount of space for randomization. - * It increases the possible random position for each randomized region. + * Virtual address start and end range for randomization. * - * You need to add an if/def entry if you introduce a new memory region - * compatible with KASLR. Your entry must be in logical order with memory - * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to - * ensure that this order is correct and won't be changed. + * The end address could depend on more configuration options to make the + * highest amount of space for randomization available, but that's too hard + * to keep straight and caused issues already. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; - -#if defined(CONFIG_X86_ESPFIX64) -static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; -#elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_END; -#else -static const unsigned long vaddr_end = __START_KERNEL_map; -#endif +static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; @@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void) unsigned long remain_entropy; /* - * All these BUILD_BUG_ON checks ensures the memory layout is - * consistent with the vaddr_start/vaddr_end variables. + * These BUILD_BUG_ON checks ensure the memory layout is consistent + * with the vaddr_start/vaddr_end variables. These checks are very + * limited.... */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_END); - BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || - IS_ENABLED(CONFIG_EFI)) && - vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); if (!kaslr_memory_enabled()) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index d9a9e9fc75dd..391b13402e40 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -405,13 +405,13 @@ bool sme_active(void) { return sme_me_mask && !sev_enabled; } -EXPORT_SYMBOL_GPL(sme_active); +EXPORT_SYMBOL(sme_active); bool sev_active(void) { return sme_me_mask && sev_enabled; } -EXPORT_SYMBOL_GPL(sev_active); +EXPORT_SYMBOL(sev_active); static const struct dma_map_ops sev_dma_ops = { .alloc = sev_alloc, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 96d456a94b03..004abf9ebf12 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } #else + static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { - free_page((unsigned long)pgd); + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c new file mode 100644 index 000000000000..43d4a4a29037 --- /dev/null +++ b/arch/x86/mm/pti.c @@ -0,0 +1,388 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This code is based in part on work published here: + * + * https://github.com/IAIK/KAISER + * + * The original work was written by and and signed off by for the Linux + * kernel by: + * + * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at> + * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at> + * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at> + * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at> + * + * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com> + * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and + * Andy Lutomirsky <luto@amacapital.net> + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/bug.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/uaccess.h> + +#include <asm/cpufeature.h> +#include <asm/hypervisor.h> +#include <asm/vsyscall.h> +#include <asm/cmdline.h> +#include <asm/pti.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/desc.h> + +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + +/* Backporting helper */ +#ifndef __GFP_NOTRACK +#define __GFP_NOTRACK 0 +#endif + +static void __init pti_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + pr_info("%s\n", reason); +} + +static void __init pti_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + pr_info("%s\n", reason); +} + +void __init pti_check_boottime_disable(void) +{ + char arg[5]; + int ret; + + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { + pti_print_if_insecure("disabled on XEN PV."); + return; + } + + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (ret == 3 && !strncmp(arg, "off", 3)) { + pti_print_if_insecure("disabled on command line."); + return; + } + if (ret == 2 && !strncmp(arg, "on", 2)) { + pti_print_if_secure("force enabled on command line."); + goto enable; + } + if (ret == 4 && !strncmp(arg, "auto", 4)) + goto autosel; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) { + pti_print_if_insecure("disabled on command line."); + return; + } + +autosel: + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return; +enable: + setup_force_cpu_cap(X86_FEATURE_PTI); +} + +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + /* + * Changes to the high (kernel) portion of the kernelmode page + * tables are not automatically propagated to the usermode tables. + * + * Users should keep in mind that, unlike the kernelmode tables, + * there is no vmalloc_fault equivalent for the usermode tables. + * Top-level entries added to init_mm's usermode pgd after boot + * will not be automatically propagated to other mms. + */ + if (!pgdp_maps_userspace(pgdp)) + return pgd; + + /* + * The user page tables get the full PGD, accessible from + * userspace: + */ + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + + /* + * If this is normal user memory, make it NX in the kernel + * pagetables so that, if we somehow screw up and return to + * usermode with the kernel CR3 loaded, we'll get a page fault + * instead of allowing user code to execute with the wrong CR3. + * + * As exceptions, we don't set NX if: + * - _PAGE_USER is not set. This could be an executable + * EFI runtime mapping or something similar, and the kernel + * may execute from it + * - we don't have NX support + * - we're clearing the PGD (i.e. the new pgd is not present). + */ + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) + pgd.pgd |= _PAGE_NX; + + /* return the copy of the PGD we want the kernel to use: */ + return pgd; +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a P4D on success, or NULL on failure. + */ +static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +{ + pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + + if (address < PAGE_OFFSET) { + WARN_ONCE(1, "attempt to walk user address\n"); + return NULL; + } + + if (pgd_none(*pgd)) { + unsigned long new_p4d_page = __get_free_page(gfp); + if (!new_p4d_page) + return NULL; + + if (pgd_none(*pgd)) { + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); + new_p4d_page = 0; + } + if (new_p4d_page) + free_page(new_p4d_page); + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); + + return p4d_offset(pgd, address); +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a PMD on success, or NULL on failure. + */ +static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + p4d_t *p4d = pti_user_pagetable_walk_p4d(address); + pud_t *pud; + + BUILD_BUG_ON(p4d_large(*p4d) != 0); + if (p4d_none(*p4d)) { + unsigned long new_pud_page = __get_free_page(gfp); + if (!new_pud_page) + return NULL; + + if (p4d_none(*p4d)) { + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); + new_pud_page = 0; + } + if (new_pud_page) + free_page(new_pud_page); + } + + pud = pud_offset(p4d, address); + /* The user page tables do not use large mappings: */ + if (pud_large(*pud)) { + WARN_ON(1); + return NULL; + } + if (pud_none(*pud)) { + unsigned long new_pmd_page = __get_free_page(gfp); + if (!new_pmd_page) + return NULL; + + if (pud_none(*pud)) { + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); + new_pmd_page = 0; + } + if (new_pmd_page) + free_page(new_pmd_page); + } + + return pmd_offset(pud, address); +} + +#ifdef CONFIG_X86_VSYSCALL_EMULATION +/* + * Walk the shadow copy of the page tables (optionally) trying to allocate + * page table pages on the way down. Does not support large pages. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pte_t *pte; + + /* We can't do anything sensible if we hit a large mapping. */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + + if (pmd_none(*pmd)) { + unsigned long new_pte_page = __get_free_page(gfp); + if (!new_pte_page) + return NULL; + + if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); + new_pte_page = 0; + } + if (new_pte_page) + free_page(new_pte_page); + } + + pte = pte_offset_kernel(pmd, address); + if (pte_flags(*pte) & _PAGE_USER) { + WARN_ONCE(1, "attempt to walk to user pte\n"); + return NULL; + } + return pte; +} + +static void __init pti_setup_vsyscall(void) +{ + pte_t *pte, *target_pte; + unsigned int level; + + pte = lookup_address(VSYSCALL_ADDR, &level); + if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) + return; + + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + if (WARN_ON(!target_pte)) + return; + + *target_pte = *pte; + set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir)); +} +#else +static void __init pti_setup_vsyscall(void) { } +#endif + +static void __init +pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) +{ + unsigned long addr; + + /* + * Clone the populated PMDs which cover start to end. These PMD areas + * can have holes. + */ + for (addr = start; addr < end; addr += PMD_SIZE) { + pmd_t *pmd, *target_pmd; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + pgd = pgd_offset_k(addr); + if (WARN_ON(pgd_none(*pgd))) + return; + p4d = p4d_offset(pgd, addr); + if (WARN_ON(p4d_none(*p4d))) + return; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + target_pmd = pti_user_pagetable_walk_pmd(addr); + if (WARN_ON(!target_pmd)) + return; + + /* + * Copy the PMD. That is, the kernelmode and usermode + * tables will share the last-level page tables of this + * address range + */ + *target_pmd = pmd_clear_flags(*pmd, clear); + } +} + +/* + * Clone a single p4d (i.e. a top-level entry on 4-level systems and a + * next-level entry on 5-level systems. + */ +static void __init pti_clone_p4d(unsigned long addr) +{ + p4d_t *kernel_p4d, *user_p4d; + pgd_t *kernel_pgd; + + user_p4d = pti_user_pagetable_walk_p4d(addr); + kernel_pgd = pgd_offset_k(addr); + kernel_p4d = p4d_offset(kernel_pgd, addr); + *user_p4d = *kernel_p4d; +} + +/* + * Clone the CPU_ENTRY_AREA into the user space visible page table. + */ +static void __init pti_clone_user_shared(void) +{ + pti_clone_p4d(CPU_ENTRY_AREA_BASE); +} + +/* + * Clone the ESPFIX P4D into the user space visinble page table + */ +static void __init pti_setup_espfix64(void) +{ +#ifdef CONFIG_X86_ESPFIX64 + pti_clone_p4d(ESPFIX_BASE_ADDR); +#endif +} + +/* + * Clone the populated PMDs of the entry and irqentry text and force it RO. + */ +static void __init pti_clone_entry_text(void) +{ + pti_clone_pmds((unsigned long) __entry_text_start, + (unsigned long) __irqentry_text_end, + _PAGE_RW | _PAGE_GLOBAL); +} + +/* + * Initialize kernel page table isolation + */ +void __init pti_init(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("enabled\n"); + + pti_clone_user_shared(); + pti_clone_entry_text(); + pti_setup_espfix64(); + pti_setup_vsyscall(); +} diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0a1be3adc97e..a1561957dccb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -28,6 +28,38 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * We get here when we do something requiring a TLB invalidation + * but could not go invalidate all of the contexts. We do the + * necessary invalidation by clearing out the 'ctx_id' which + * forces a TLB flush when the context is loaded. + */ +void clear_asid_other(void) +{ + u16 asid; + + /* + * This is only expected to be set if we have disabled + * kernel _PAGE_GLOBAL pages. + */ + if (!static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON_ONCE(1); + return; + } + + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { + /* Do not need to flush the current asid */ + if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid)) + continue; + /* + * Make sure the next time we go to switch to + * this asid, we do a flush: + */ + this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0); + } + this_cpu_write(cpu_tlbstate.invalidate_other, false); +} + atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); @@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + if (this_cpu_read(cpu_tlbstate.invalidate_other)) + clear_asid_other(); + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != next->context.ctx_id) @@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) +{ + unsigned long new_mm_cr3; + + if (need_flush) { + invalidate_user_asid(new_asid); + new_mm_cr3 = build_cr3(pgdir, new_asid); + } else { + new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); + } + + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + void leave_mm(int cpu) { struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); @@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next->pgd, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, true); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next->pgd, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, false); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6a151ce70e86..d87ac96e37ed 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -196,6 +196,9 @@ static pgd_t *efi_pgd; * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. */ int __init efi_alloc_page_tables(void) { @@ -208,7 +211,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 8a99a2e96537..5b513ccffde4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, /* * Update the first page pointer to skip over the CSH header. */ - cap_info->pages[0] += csh->headersize; + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; return 1; } diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 5f6fd860820a..e4cb9f4cde8a 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * on the specified blade to allow the sending of MSIs to the specified CPU. */ static int uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); return 0; diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 444a387df219..35d4dcea381f 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) unsigned int i; list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { - ctx->rcvused -= rsgl->sg_num_bytes; + atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused); af_alg_free_sg(&rsgl->sgl); list_del(&rsgl->list); if (rsgl != &areq->first_rsgl) @@ -1163,7 +1163,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, areq->last_rsgl = rsgl; len += err; - ctx->rcvused += err; + atomic_add(err, &ctx->rcvused); rsgl->sg_num_bytes = err; iov_iter_advance(&msg->msg_iter, err); } diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index ddcc45f77edd..e9885a35ef6e 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -571,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index baef9bfccdda..c5c47b680152 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -390,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index db1bc3147bc4..600afa99941f 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); + poly_hash = __crypto_hash_alg_common(poly); + + err = -EINVAL; + if (poly_hash->digestsize != POLY1305_DIGEST_SIZE) + goto out_put_poly; err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; - poly_hash = __crypto_hash_alg_common(poly); err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, aead_crypto_instance(inst)); if (err) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ee9cfb99fe25..f8ec3d4ba4a8 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->child); } +static void pcrypt_free(struct aead_instance *inst) +{ + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); + kfree(inst); +} + static int pcrypt_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.encrypt = pcrypt_aead_encrypt; inst->alg.decrypt = pcrypt_aead_decrypt; + inst->free = pcrypt_free; + err = aead_register_instance(tmpl, inst); if (err) goto out_drop_aead; @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) return -EINVAL; } -static void pcrypt_free(struct crypto_instance *inst) -{ - struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - - crypto_drop_aead(&ctx->spawn); - kfree(inst); -} - static int pcrypt_cpumask_change_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt) static struct crypto_template pcrypt_tmpl = { .name = "pcrypt", .create = pcrypt_create, - .free = pcrypt_free, .module = THIS_MODULE, }; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index bccec9de0533..a7ecfde66b7b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -482,7 +482,8 @@ enum binder_deferred_state { * @tsk task_struct for group_leader of process * (invariant after initialized) * @files files_struct for process - * (invariant after initialized) + * (protected by @files_lock) + * @files_lock mutex to protect @files * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -530,6 +531,7 @@ struct binder_proc { int pid; struct task_struct *tsk; struct files_struct *files; + struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { - struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; + int ret; - if (files == NULL) - return -ESRCH; - - if (!lock_task_sighand(proc->tsk, &irqs)) - return -EMFILE; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + ret = -ESRCH; + goto err; + } + if (!lock_task_sighand(proc->tsk, &irqs)) { + ret = -EMFILE; + goto err; + } rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + ret = __alloc_fd(proc->files, 0, rlim_cur, flags); +err: + mutex_unlock(&proc->files_lock); + return ret; } /* @@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { + mutex_lock(&proc->files_lock); if (proc->files) __fd_install(proc->files, fd, file); + mutex_unlock(&proc->files_lock); } /* @@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { int retval; - if (proc->files == NULL) - return -ESRCH; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + retval = -ESRCH; + goto err; + } retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; - +err: + mutex_unlock(&proc->files_lock); return retval; } @@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) ret = binder_alloc_mmap_handler(&proc->alloc, vma); if (ret) return ret; + mutex_lock(&proc->files_lock); proc->files = get_files_struct(current); + mutex_unlock(&proc->files_lock); return 0; err_bad_arg: @@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + mutex_init(&proc->files_lock); INIT_LIST_HEAD(&proc->todo); proc->default_priority = task_nice(current); binder_dev = container_of(filp->private_data, struct binder_device, @@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work) files = NULL; if (defer & BINDER_DEFERRED_PUT_FILES) { + mutex_lock(&proc->files_lock); files = proc->files; if (files) proc->files = NULL; + mutex_unlock(&proc->files_lock); } if (defer & BINDER_DEFERRED_FLUSH) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index eb3af2739537..07532d83be0b 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } +static bool cache_node_is_unified(struct cacheinfo *this_leaf) +{ + return of_property_read_bool(this_leaf->of_node, "cache-unified"); +} + static void cache_of_override_properties(unsigned int cpu) { int index; @@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { this_leaf = this_cpu_ci->info_list + index; + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf)) + this_leaf->type = CACHE_TYPE_UNIFIED; cache_size(this_leaf); cache_get_line_size(this_leaf); cache_nr_sets(this_leaf); diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 328ca93781cf..1b76d9585902 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = { .match = sunxi_rsb_device_match, .probe = sunxi_rsb_device_probe, .remove = sunxi_rsb_device_remove, + .uevent = of_device_uevent_modalias, }; static void sunxi_rsb_dev_release(struct device *dev) diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 3e104f5aa0c2..b56b3f711d94 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_AUTHENC + select CRYPTO_GF128MUL ---help--- The Chelsio Crypto Co-processor driver for T6 adapters. diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 89ba9e85c0f3..4bcef78a08aa 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv ndesc = ctx->handle_result(priv, ring, sreq->req, &should_complete, &ret); if (ndesc < 0) { + kfree(sreq); dev_err(priv->dev, "failed to handle result (%d)", ndesc); return; } diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 5438552bc6d7..fcc0a606d748 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -14,6 +14,7 @@ #include <crypto/aes.h> #include <crypto/skcipher.h> +#include <crypto/internal/skcipher.h> #include "safexcel.h" @@ -33,6 +34,10 @@ struct safexcel_cipher_ctx { unsigned int key_len; }; +struct safexcel_cipher_req { + bool needs_inv; +}; + static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, struct crypto_async_request *async, struct safexcel_command_desc *cdesc, @@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx, return 0; } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct skcipher_request *req = skcipher_request_cast(async); struct safexcel_result_desc *rdesc; @@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async, spin_unlock_bh(&priv->ring[ring].egress_lock); request->req = &req->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = n_rdesc; @@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_aes_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return ndesc; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int err; + + if (sreq->needs_inv) { + sreq->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_cipher_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, struct safexcel_crypto_priv *priv = ctx->priv; int ret; - ctx->base.handle_result = safexcel_handle_inv_result; - ret = safexcel_invalidate_cache(async, &ctx->base, priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -381,28 +401,46 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int ret; + + if (sreq->needs_inv) + ret = safexcel_cipher_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_aes_send(async, ring, request, + commands, results); + return ret; +} + static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct skcipher_request req; + SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm)); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct skcipher_request)); + memset(req, 0, sizeof(struct skcipher_request)); /* create invalidation request */ init_completion(&result.completion); - skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + safexcel_inv_complete, &result); - skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_cipher_send_inv; + sreq->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) @@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req, enum safexcel_cipher_direction dir, u32 mode) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; + sreq->needs_inv = false; ctx->direction = dir; ctx->mode = mode; if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_cipher_send_inv; + if (ctx->base.needs_inv) { + sreq->needs_inv = true; + ctx->base.needs_inv = false; + } } else { ctx->base.ring = safexcel_select_ring(priv); - ctx->base.send = safexcel_aes_send; - ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, EIP197_GFP_FLAGS(req->base), &ctx->base.ctxr_dma); @@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm) alg.skcipher.base); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_send; + ctx->base.handle_result = safexcel_handle_result; + + crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm), + sizeof(struct safexcel_cipher_req)); return 0; } diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 74feb6227101..0c5a5820b06e 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -32,9 +32,10 @@ struct safexcel_ahash_req { bool last_req; bool finish; bool hmac; + bool needs_inv; u8 state_sz; /* expected sate size, only set once */ - u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; + u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32)); u64 len; u64 processed; @@ -119,15 +120,15 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx, } } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct safexcel_result_desc *rdesc; struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); - int cache_len, result_sz = sreq->state_sz; + int cache_len; *ret = 0; @@ -148,8 +149,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, spin_unlock_bh(&priv->ring[ring].egress_lock); if (sreq->finish) - result_sz = crypto_ahash_digestsize(ahash); - memcpy(sreq->state, areq->result, result_sz); + memcpy(areq->result, sreq->state, + crypto_ahash_digestsize(ahash)); dma_unmap_sg(priv->dev, areq->src, sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE); @@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, return 1; } -static int safexcel_ahash_send(struct crypto_async_request *async, int ring, - struct safexcel_request *request, int *commands, - int *results) +static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, + struct safexcel_request *request, + int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); @@ -273,7 +274,7 @@ send_command: /* Add the token */ safexcel_hash_token(first_cdesc, len, req->state_sz); - ctx->base.result_dma = dma_map_single(priv->dev, areq->result, + ctx->base.result_dma = dma_map_single(priv->dev, req->state, req->state_sz, DMA_FROM_DEVICE); if (dma_mapping_error(priv->dev, ctx->base.result_dma)) { ret = -EINVAL; @@ -292,7 +293,6 @@ send_command: req->processed += len; request->req = &areq->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = 1; @@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_ahash_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return 1; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int err; + + if (req->needs_inv) { + req->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_ahash_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); int ret; - ctx->base.handle_result = safexcel_handle_inv_result; ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -412,28 +429,46 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_ahash_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int ret; + + if (req->needs_inv) + ret = safexcel_ahash_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_ahash_send_req(async, ring, request, + commands, results); + return ret; +} + static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) { struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct ahash_request req; + AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm)); + struct safexcel_ahash_req *rctx = ahash_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct ahash_request)); + memset(req, 0, sizeof(struct ahash_request)); /* create invalidation request */ init_completion(&result.completion); - ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, safexcel_inv_complete, &result); - ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + ahash_request_set_tfm(req, __crypto_ahash_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_ahash_send_inv; + rctx->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) @@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq) struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; - ctx->base.send = safexcel_ahash_send; + req->needs_inv = false; if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq); if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_ahash_send_inv; + if (ctx->base.needs_inv) { + ctx->base.needs_inv = false; + req->needs_inv = true; + } } else { ctx->base.ring = safexcel_select_ring(priv); ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, @@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm) struct safexcel_alg_template, alg.ahash); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_ahash_send; + ctx->base.handle_result = safexcel_handle_result; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct safexcel_ahash_req)); diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 48de52cf2ecc..662e709812cc 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1625,6 +1625,7 @@ static int queue_cache_init(void) CWQ_ENTRY_SIZE, 0, NULL); if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; return -ENOMEM; } return 0; @@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; + queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL; } static long spu_queue_register_workfn(void *arg) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index ec8ac5c4dd84..055e2e8f985a 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -20,10 +20,6 @@ #define NO_FURTHER_WRITE_ACTION -1 -#ifndef phys_to_page -#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT) -#endif - /** * efi_free_all_buff_pages - free all previous allocated buffer pages * @cap_info: pointer to current instance of capsule_info structure @@ -35,7 +31,7 @@ static void efi_free_all_buff_pages(struct capsule_info *cap_info) { while (cap_info->index > 0) - __free_page(phys_to_page(cap_info->pages[--cap_info->index])); + __free_page(cap_info->pages[--cap_info->index]); cap_info->index = NO_FURTHER_WRITE_ACTION; } @@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info) cap_info->pages = temp_page; + temp_page = krealloc(cap_info->phys, + pages_needed * sizeof(phys_addr_t *), + GFP_KERNEL | __GFP_ZERO); + if (!temp_page) + return -ENOMEM; + + cap_info->phys = temp_page; + return 0; } @@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, **/ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) { + bool do_vunmap = false; int ret; - ret = efi_capsule_update(&cap_info->header, cap_info->pages); + /* + * cap_info->capsule may have been assigned already by a quirk + * handler, so only overwrite it if it is NULL + */ + if (!cap_info->capsule) { + cap_info->capsule = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); + if (!cap_info->capsule) + return -ENOMEM; + do_vunmap = true; + } + + ret = efi_capsule_update(cap_info->capsule, cap_info->phys); + if (do_vunmap) + vunmap(cap_info->capsule); if (ret) { pr_err("capsule update failed\n"); return ret; @@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, goto failed; } - cap_info->pages[cap_info->index++] = page_to_phys(page); + cap_info->pages[cap_info->index] = page; + cap_info->phys[cap_info->index] = page_to_phys(page); cap_info->page_bytes_remain = PAGE_SIZE; + cap_info->index++; } else { - page = phys_to_page(cap_info->pages[cap_info->index - 1]); + page = cap_info->pages[cap_info->index - 1]; } kbuff = kmap(page); @@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file) struct capsule_info *cap_info = file->private_data; kfree(cap_info->pages); + kfree(cap_info->phys); kfree(file->private_data); file->private_data = NULL; return 0; @@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } + cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); + return -ENOMEM; + } + file->private_data = cap_info; return 0; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index dfcf56ee3c61..76861a00bb92 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = { * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) @@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, d->host_data); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class); irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); irq_set_noprobe(irq); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 545d43a587b7..bb4f8cf18bd9 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( * category than their parents, so it won't report false recursion. */ static struct lock_class_key brcmstb_gpio_irq_lock_class; +static struct lock_class_key brcmstb_gpio_irq_request_class; static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, @@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, &bank->gc); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); + irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, + &brcmstb_gpio_irq_request_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 8db47f671708..02fa8fe2292a 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { * than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int tegra_gpio_probe(struct platform_device *pdev) { @@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, + &gpio_request_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 2313af82fad3..acd59113e08b 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) static int xgene_gpio_sb_domain_activate(struct irq_domain *d, struct irq_data *irq_data, - bool early) + bool reserve) { struct xgene_gpio_sb *priv = d->host_data; u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index aad84a6306c4..44332b793718 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void) } int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { unsigned long flags; int status = 0; @@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_add_irqchip(chip, key); + status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) goto err_remove_chip; @@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ - irq_set_lockdep_class(irq, chip->irq.lock_key); + irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key); irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); /* Chips that use nested thread handlers have them marked */ if (chip->irq.threaded) @@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gpiochip: the GPIO chip to add the IRQ chip to - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request */ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct irq_chip *irqchip = gpiochip->irq.chip; const struct irq_domain_ops *ops; @@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.default_type = type; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; if (gpiochip->irq.domain_ops) ops = gpiochip->irq.domain_ops; @@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE * to have the core avoid setting up any default type in the hardware. * @threaded: whether this irqchip uses a nested thread handler - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request * * This function closely associates a certain irqchip with a certain * gpiochip, providing an irq domain to translate the local IRQs to @@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct device_node *of_node; @@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, gpiochip->irq.default_type = type; gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; gpiochip->irq.domain = irq_domain_add_simple(of_node, gpiochip->ngpio, first_irq, &gpiochip_domain_ops, gpiochip); @@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { return 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index a9782b1aba47..34daf895f848 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -1360,7 +1360,7 @@ void dpp1_cm_set_output_csc_adjustment( void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, - const struct default_adjustment *default_adjust); + enum dc_color_space colorspace); void dpp1_cm_set_gamut_remap( struct dpp *dpp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 40627c244bf5..ed1216b53465 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -225,14 +225,13 @@ void dpp1_cm_set_gamut_remap( void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, - const struct default_adjustment *default_adjust) + enum dc_color_space colorspace) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); uint32_t ocsc_mode = 0; - if (default_adjust != NULL) { - switch (default_adjust->out_color_space) { + switch (colorspace) { case COLOR_SPACE_SRGB: case COLOR_SPACE_2020_RGB_FULLRANGE: ocsc_mode = 0; @@ -253,7 +252,6 @@ void dpp1_cm_set_output_csc_default( case COLOR_SPACE_UNKNOWN: default: break; - } } REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 961ad5c3b454..05dc01e54531 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2097,6 +2097,8 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, tbl_entry.color_space = color_space; //tbl_entry.regval = matrix; pipe_ctx->plane_res.dpp->funcs->opp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry); + } else { + pipe_ctx->plane_res.dpp->funcs->opp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace); } } static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 83a68460edcd..9420dfb94d39 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -64,7 +64,7 @@ struct dpp_funcs { void (*opp_set_csc_default)( struct dpp *dpp, - const struct default_adjustment *default_adjust); + enum dc_color_space colorspace); void (*opp_set_csc_adjustment)( struct dpp *dpp, diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2e065facdce7..a0f4d2a2a481 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -168,16 +168,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc) void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y) { + const struct drm_format_info *format = fb->format; + unsigned int num_planes = format->num_planes; u32 addr = drm_fb_obj(fb)->dev_addr; - int num_planes = fb->format->num_planes; int i; if (num_planes > 3) num_planes = 3; - for (i = 0; i < num_planes; i++) + addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] + + x * format->cpp[0]; + + y /= format->vsub; + x /= format->hsub; + + for (i = 1; i < num_planes; i++) addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] + - x * fb->format->cpp[i]; + x * format->cpp[i]; for (; i < 3; i++) addrs[i] = 0; } @@ -744,15 +751,14 @@ void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, if (plane->fb) drm_framebuffer_put(plane->fb); - /* Power down the Y/U/V FIFOs */ - sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; - /* Power down most RAMs and FIFOs if this is the primary plane */ if (plane->type == DRM_PLANE_TYPE_PRIMARY) { - sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | - CFG_PDWN32x32 | CFG_PDWN64x66; + sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | + CFG_PDWN32x32 | CFG_PDWN64x66; dma_ctrl0_mask = CFG_GRA_ENA; } else { + /* Power down the Y/U/V FIFOs */ + sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; dma_ctrl0_mask = CFG_DMA_ENA; } @@ -1225,17 +1231,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc", dcrtc); - if (ret < 0) { - kfree(dcrtc); - return ret; - } + if (ret < 0) + goto err_crtc; if (dcrtc->variant->init) { ret = dcrtc->variant->init(dcrtc, dev); - if (ret) { - kfree(dcrtc); - return ret; - } + if (ret) + goto err_crtc; } /* Ensure AXI pipeline is enabled */ @@ -1246,13 +1248,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, dcrtc->crtc.port = port; primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (!primary) - return -ENOMEM; + if (!primary) { + ret = -ENOMEM; + goto err_crtc; + } ret = armada_drm_plane_init(primary); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_universal_plane_init(drm, &primary->base, 0, @@ -1263,7 +1267,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL, @@ -1282,6 +1286,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, err_crtc_init: primary->base.funcs->destroy(&primary->base); +err_crtc: + kfree(dcrtc); + return ret; } diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h index bab11f483575..bfd3514fbe9b 100644 --- a/drivers/gpu/drm/armada/armada_crtc.h +++ b/drivers/gpu/drm/armada/armada_crtc.h @@ -42,6 +42,8 @@ struct armada_plane_work { }; struct armada_plane_state { + u16 src_x; + u16 src_y; u32 src_hw; u32 dst_hw; u32 dst_yx; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index b411b608821a..aba947696178 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -99,6 +99,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, { struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + const struct drm_format_info *format; struct drm_rect src = { .x1 = src_x, .y1 = src_y, @@ -117,7 +118,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, }; uint32_t val, ctrl0; unsigned idx = 0; - bool visible; + bool visible, fb_changed; int ret; trace_armada_ovl_plane_update(plane, crtc, fb, @@ -138,6 +139,18 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (!visible) ctrl0 &= ~CFG_DMA_ENA; + /* + * Shifting a YUV packed format image by one pixel causes the U/V + * planes to swap. Compensate for it by also toggling the UV swap. + */ + format = fb->format; + if (format->num_planes == 1 && src.x1 >> 16 & (format->hsub - 1)) + ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + + fb_changed = plane->fb != fb || + dplane->base.state.src_x != src.x1 >> 16 || + dplane->base.state.src_y != src.y1 >> 16; + if (!dcrtc->plane) { dcrtc->plane = plane; armada_ovl_update_attr(&dplane->prop, dcrtc); @@ -145,7 +158,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, /* FIXME: overlay on an interlaced display */ /* Just updating the position/size? */ - if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) { + if (!fb_changed && dplane->base.state.ctrl0 == ctrl0) { val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16; dplane->base.state.src_hw = val; @@ -169,9 +182,8 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) armada_drm_plane_work_cancel(dcrtc, &dplane->base); - if (plane->fb != fb) { - u32 addrs[3], pixel_format; - int num_planes, hsub; + if (fb_changed) { + u32 addrs[3]; /* * Take a reference on the new framebuffer - we want to @@ -182,23 +194,11 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (plane->fb) armada_ovl_retire_fb(dplane, plane->fb); - src_y = src.y1 >> 16; - src_x = src.x1 >> 16; + dplane->base.state.src_y = src_y = src.y1 >> 16; + dplane->base.state.src_x = src_x = src.x1 >> 16; armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y); - pixel_format = fb->format->format; - hsub = drm_format_horz_chroma_subsampling(pixel_format); - num_planes = fb->format->num_planes; - - /* - * Annoyingly, shifting a YUYV-format image by one pixel - * causes the U/V planes to toggle. Toggle the UV swap. - * (Unfortunately, this causes momentary colour flickering.) - */ - if (src_x & (hsub - 1) && num_planes == 1) - ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y0); armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 54b5d4c582b6..e143004e66d5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2368,6 +2368,9 @@ struct drm_i915_private { */ struct workqueue_struct *wq; + /* ordered wq for modesets */ + struct workqueue_struct *modeset_wq; + /* Display functions */ struct drm_i915_display_funcs display; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3866c49bc390..333f40bc03bb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6977,6 +6977,7 @@ enum { #define RESET_PCH_HANDSHAKE_ENABLE (1<<4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) +#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30) #define MASK_WAKEMEM (1<<13) #define SKL_DFSM _MMIO(0x51000) @@ -8522,6 +8523,7 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) #define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) +#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index b2a6d62b71c0..60cf4e58389a 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -860,16 +860,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) { - int min_cdclk = skl_calc_cdclk(0, vco); u32 val; WARN_ON(vco != 8100000 && vco != 8640000); - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - /* * We always enable DPLL0 with the lowest link rate possible, but still * taking into account the VCO required to operate the eDP panel at the @@ -923,7 +917,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 freq_select, pcu_ack; + u32 freq_select, pcu_ack, cdclk_ctl; int ret; WARN_ON((cdclk == 24000) != (vco == 0)); @@ -940,7 +934,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, return; } - /* set CDCLK_CTL */ + /* Choose frequency for this cdclk */ switch (cdclk) { case 450000: case 432000: @@ -968,10 +962,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); + cdclk_ctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk.hw.vco != vco) { + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + } + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); + POSTING_READ(CDCLK_CTL); + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 30cf273d57aa..123585eeb87d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12544,11 +12544,15 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); i915_sw_fence_commit(&intel_state->commit_ready); - if (nonblock) + if (nonblock && intel_state->modeset) { + queue_work(dev_priv->modeset_wq, &state->commit_work); + } else if (nonblock) { queue_work(system_unbound_wq, &state->commit_work); - else + } else { + if (intel_state->modeset) + flush_workqueue(dev_priv->modeset_wq); intel_atomic_commit_tail(state); - + } return 0; } @@ -14462,6 +14466,8 @@ int intel_modeset_init(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; + dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + drm_mode_config_init(dev); dev->mode_config.min_width = 0; @@ -15270,6 +15276,8 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_cleanup_gt_powersave(dev_priv); intel_teardown_gmbus(dev_priv); + + destroy_workqueue(dev_priv->modeset_wq); } void intel_connector_attach_encoder(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 6e3b430fccdc..55ea5eb3b7df 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { - i915_reg_t psr_ctl; + i915_reg_t psr_status; u32 psr_status_mask; if (dev_priv->psr.aux_frame_sync) @@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, 0); if (dev_priv->psr.psr2_support) { - psr_ctl = EDP_PSR2_CTL; + psr_status = EDP_PSR2_STATUS_CTL; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & + I915_WRITE(EDP_PSR2_CTL, + I915_READ(EDP_PSR2_CTL) & ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); } else { - psr_ctl = EDP_PSR_STATUS_CTL; + psr_status = EDP_PSR_STATUS_CTL; psr_status_mask = EDP_PSR_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & ~EDP_PSR_ENABLE); + I915_WRITE(EDP_PSR_CTL, + I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); } /* Wait till PSR is idle */ if (intel_wait_for_register(dev_priv, - psr_ctl, psr_status_mask, 0, + psr_status, psr_status_mask, 0, 2000)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8af286c63d3b..7e115f3927f6 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -598,6 +598,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Enabling DC5\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5); } @@ -625,6 +630,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("Disabling DC6\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); } @@ -1786,6 +1796,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index e626eddf24d5..23db74ae1826 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -78,6 +78,8 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) /* then read the message */ msg.len = cnt & 0xf; + if (msg.len > CEC_MAX_MSG_SIZE - 2) + msg.len = CEC_MAX_MSG_SIZE - 2; msg.msg[0] = hdmi_read_reg(core->base, HDMI_CEC_RX_CMD_HEADER); msg.msg[1] = hdmi_read_reg(core->base, @@ -104,26 +106,6 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) } } -static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1) -{ - if (stat1 & 2) { - u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); - - cec_transmit_done(core->adap, - CEC_TX_STATUS_NACK | - CEC_TX_STATUS_MAX_RETRIES, - 0, (dbg3 >> 4) & 7, 0, 0); - } else if (stat1 & 1) { - cec_transmit_done(core->adap, - CEC_TX_STATUS_ARB_LOST | - CEC_TX_STATUS_MAX_RETRIES, - 0, 0, 0, 0); - } else if (stat1 == 0) { - cec_transmit_done(core->adap, CEC_TX_STATUS_OK, - 0, 0, 0, 0); - } -} - void hdmi4_cec_irq(struct hdmi_core_data *core) { u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); @@ -132,27 +114,21 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0); hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1); - if (stat0 & 0x40) + if (stat0 & 0x20) { + cec_transmit_done(core->adap, CEC_TX_STATUS_OK, + 0, 0, 0, 0); REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); - else if (stat0 & 0x24) - hdmi_cec_transmit_fifo_empty(core, stat1); - if (stat1 & 2) { + } else if (stat1 & 0x02) { u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); cec_transmit_done(core->adap, CEC_TX_STATUS_NACK | CEC_TX_STATUS_MAX_RETRIES, 0, (dbg3 >> 4) & 7, 0, 0); - } else if (stat1 & 1) { - cec_transmit_done(core->adap, - CEC_TX_STATUS_ARB_LOST | - CEC_TX_STATUS_MAX_RETRIES, - 0, 0, 0, 0); + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } if (stat0 & 0x02) hdmi_cec_received_msg(core); - if (stat1 & 0x3) - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) @@ -231,18 +207,14 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) /* * Enable CEC interrupts: * Transmit Buffer Full/Empty Change event - * Transmitter FIFO Empty event * Receiver FIFO Not Empty event */ - hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26); + hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22); /* * Enable CEC interrupts: - * RX FIFO Overrun Error event - * Short Pulse Detected event * Frame Retransmit Count Exceeded event - * Start Bit Irregularity event */ - hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f); + hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02); /* cec calibration enable (self clearing) */ hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index b5ba6441489f..5d252fb27a82 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -1007,6 +1007,8 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) pr_info("Initializing pool allocator\n"); _manager = kzalloc(sizeof(*_manager), GFP_KERNEL); + if (!_manager) + return -ENOMEM; ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index f3fcb836a1f9..0c3f608131cf 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) ret = hid_add_field(parser, HID_FEATURE_REPORT, data); break; default: - hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag); + hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag); ret = 0; } diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 68cdc962265b..271f31461da4 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, (u8 *)&word, 2); break; case I2C_SMBUS_I2C_BLOCK_DATA: - size = I2C_SMBUS_BLOCK_DATA; - /* fallthrough */ + if (read_write == I2C_SMBUS_READ) { + read_length = data->block[0]; + count = cp2112_write_read_req(buf, addr, read_length, + command, NULL, 0); + } else { + count = cp2112_write_req(buf, addr, command, + data->block + 1, + data->block[0]); + } + break; case I2C_SMBUS_BLOCK_DATA: if (I2C_SMBUS_READ == read_write) { count = cp2112_write_read_req(buf, addr, @@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, case I2C_SMBUS_WORD_DATA: data->word = le16_to_cpup((__le16 *)buf); break; + case I2C_SMBUS_I2C_BLOCK_DATA: + memcpy(data->block + 1, buf, read_length); + break; case I2C_SMBUS_BLOCK_DATA: if (read_length > I2C_SMBUS_BLOCK_MAX) { ret = -EPROTO; diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 9325545fc3ae..edc0f64bb584 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -32,10 +32,6 @@ #ifdef CONFIG_HOLTEK_FF -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>"); -MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); - /* * These commands and parameters are currently known: * @@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = { .probe = holtek_probe, }; module_hid_driver(holtek_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>"); +MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 76ed9a216f10..610223f0e945 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj) pr_debug("child device %s unregistered\n", dev_name(&device_obj->device)); + kset_unregister(device_obj->channels_kset); + /* * Kick off the process of unregistering the device. * This will call vmbus_remove() and eventually vmbus_device_release() diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a1d687a664f8..66f0268f37a6 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -314,7 +314,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, } #endif -struct ib_device *__ib_device_get_by_index(u32 ifindex); +struct ib_device *ib_device_get_by_index(u32 ifindex); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 30914f3baa5f..465520627e4b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -134,7 +134,7 @@ static int ib_device_check_mandatory(struct ib_device *device) return 0; } -struct ib_device *__ib_device_get_by_index(u32 index) +static struct ib_device *__ib_device_get_by_index(u32 index) { struct ib_device *device; @@ -145,6 +145,22 @@ struct ib_device *__ib_device_get_by_index(u32 index) return NULL; } +/* + * Caller is responsible to return refrerence count by calling put_device() + */ +struct ib_device *ib_device_get_by_index(u32 index) +{ + struct ib_device *device; + + down_read(&lists_rwsem); + device = __ib_device_get_by_index(index); + if (device) + get_device(&device->dev); + + up_read(&lists_rwsem); + return device; +} + static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 9a05245a1acf..0dcd1aa6f683 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -142,27 +142,34 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(index); + device = ib_device_get_by_index(index); if (!device) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto err; + } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); err = fill_dev_info(msg, device); - if (err) { - nlmsg_free(msg); - return err; - } + if (err) + goto err_free; nlmsg_end(msg, nlh); + put_device(&device->dev); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: + nlmsg_free(msg); +err: + put_device(&device->dev); + return err; } static int _nldev_get_dumpit(struct ib_device *device, @@ -220,31 +227,40 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(index); + device = ib_device_get_by_index(index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); - if (!rdma_is_port_valid(device, port)) - return -EINVAL; + if (!rdma_is_port_valid(device, port)) { + err = -EINVAL; + goto err; + } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto err; + } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); err = fill_port_info(msg, device, port); - if (err) { - nlmsg_free(msg); - return err; - } + if (err) + goto err_free; nlmsg_end(msg, nlh); + put_device(&device->dev); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: + nlmsg_free(msg); +err: + put_device(&device->dev); + return err; } static int nldev_port_get_dumpit(struct sk_buff *skb, @@ -265,7 +281,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, return -EINVAL; ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(ifindex); + device = ib_device_get_by_index(ifindex); if (!device) return -EINVAL; @@ -299,7 +315,9 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, nlmsg_end(skb, nlh); } -out: cb->args[0] = idx; +out: + put_device(&device->dev); + cb->args[0] = idx; return skb->len; } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 313bfb9ccb71..4975f3e6596e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -642,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, goto err_free_mr; mr->max_pages = max_num_sg; - err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_free_pl; @@ -653,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, return &mr->ibmr; err_free_pl: + mr->ibmr.device = pd->device; mlx4_free_priv_pages(mr); err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 12b7f911f0e5..8880351df179 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev, return 0; } -static void neigh_add_path(struct sk_buff *skb, u8 *daddr, - struct net_device *dev) +static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr, + struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rdma_netdev *rn = netdev_priv(dev); @@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, spin_unlock_irqrestore(&priv->lock, flags); ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - return; + return NULL; + } + + /* To avoid race condition, make sure that the + * neigh will be added only once. + */ + if (unlikely(!list_empty(&neigh->list))) { + spin_unlock_irqrestore(&priv->lock, flags); + return neigh; } path = __path_find(dev, daddr + 4); @@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, path->ah->last_send = rn->send(dev, skb, path->ah->ah, IPOIB_QPN(daddr)); ipoib_neigh_put(neigh); - return; + return NULL; } } else { neigh->ah = NULL; @@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, spin_unlock_irqrestore(&priv->lock, flags); ipoib_neigh_put(neigh); - return; + return NULL; err_path: ipoib_neigh_free(neigh); @@ -983,6 +991,8 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); ipoib_neigh_put(neigh); + + return NULL; } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) case htons(ETH_P_TIPC): neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (unlikely(!neigh)) { - neigh_add_path(skb, phdr->hwaddr, dev); - return NETDEV_TX_OK; + neigh = neigh_add_path(skb, phdr->hwaddr, dev); + if (likely(!neigh)) + return NETDEV_TX_OK; } break; case htons(ETH_P_ARP): diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 93e149efc1f5..9b3f47ae2016 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) spin_lock_irqsave(&priv->lock, flags); if (!neigh) { neigh = ipoib_neigh_alloc(daddr, dev); - if (neigh) { + /* Make sure that the neigh will be added only + * once to mcast list. + */ + if (neigh && list_empty(&neigh->list)) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8a1bd354b1cc..bfa576aa9f03 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1013,8 +1013,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) return -ENOMEM; attr->qp_state = IB_QPS_INIT; - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; attr->port_num = ch->sport->port; attr->pkey_index = 0; @@ -2078,7 +2077,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto destroy_ib; } - guid = (__be16 *)¶m->primary_path->sgid.global.interface_id; + guid = (__be16 *)¶m->primary_path->dgid.global.interface_id; snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x", be16_to_cpu(guid[0]), be16_to_cpu(guid[1]), be16_to_cpu(guid[2]), be16_to_cpu(guid[3])); diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 3d8ff09eba57..c868a878c84f 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -163,7 +163,7 @@ static unsigned int get_time_pit(void) #define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" -#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) +#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE) #define GET_TIME(x) do { x = get_cycles(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "get_cycles" diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index ae473123583b..3d51175c4d72 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1651,7 +1651,7 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf) return union_desc; dev_err(&intf->dev, - "Union descriptor to short (%d vs %zd\n)", + "Union descriptor too short (%d vs %zd)\n", union_desc->bLength, sizeof(*union_desc)); return NULL; } diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 6bf56bb5f8d9..d91f3b1c5375 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -326,8 +326,6 @@ static int xenkbd_probe(struct xenbus_device *dev, 0, width, 0, 0); input_set_abs_params(mtouch, ABS_MT_POSITION_Y, 0, height, 0, 0); - input_set_abs_params(mtouch, ABS_MT_PRESSURE, - 0, 255, 0, 0); ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT); if (ret) { diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index b84cd978fce2..a4aaa748e987 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd) case 5: etd->hw_version = 3; break; - case 6 ... 14: + case 6 ... 15: etd->hw_version = 4; break; default: diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index e102d7764bc2..a458e5ec9e41 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/input.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/platform_device.h> #include <linux/async.h> #include <linux/i2c.h> @@ -1261,10 +1262,13 @@ static int elants_i2c_probe(struct i2c_client *client, } /* - * Systems using device tree should set up interrupt via DTS, - * the rest will use the default falling edge interrupts. + * Platform code (ACPI, DTS) should normally set up interrupt + * for us, but in case it did not let's fall back to using falling + * edge to be compatible with older Chromebooks. */ - irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING; + irqflags = irq_get_trigger_type(client->irq); + if (!irqflags) + irqflags = IRQF_TRIGGER_FALLING; error = devm_request_threaded_irq(&client->dev, client->irq, NULL, elants_i2c_irq, diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c index fc080a7c2e1f..f1cd4dd9a4a3 100644 --- a/drivers/input/touchscreen/hideep.c +++ b/drivers/input/touchscreen/hideep.c @@ -10,8 +10,7 @@ #include <linux/of.h> #include <linux/firmware.h> #include <linux/delay.h> -#include <linux/gpio.h> -#include <linux/gpio/machine.h> +#include <linux/gpio/consumer.h> #include <linux/i2c.h> #include <linux/acpi.h> #include <linux/interrupt.h> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d5eb004091d..97baf88d9505 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, struct irq_cfg *cfg); static int irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f122071688fd..744592d330ca 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1698,13 +1698,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1UL << ias) - 1; domain->geometry.force_aperture = true; - smmu_domain->pgtbl_ops = pgtbl_ops; ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); - if (ret < 0) + if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); + return ret; + } - return ret; + smmu_domain->pgtbl_ops = pgtbl_ops; + return 0; } static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) @@ -1731,7 +1733,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { - int i; + int i, j; struct arm_smmu_master_data *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; @@ -1739,6 +1741,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) u32 sid = fwspec->ids[i]; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + /* Bridged PCI devices may end up with duplicated IDs */ + for (j = 0; j < i; j++) + if (fwspec->ids[j] == sid) + break; + if (j < i) + continue; + arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 76a193c7fcfc..66f69af2c219 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain, } static int intel_irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { intel_ir_reconfigure_irte(irq_data, true); return 0; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4039e64cd342..06f025fd5726 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, } static int its_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); @@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq } static int its_vpe_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); struct its_node *its; diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 06f29cf5018a..cee59fe1321c 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) */ static struct lock_class_key intc_irqpin_irq_lock_class; +/* And this is for the request mutex */ +static struct lock_class_key intc_irqpin_irq_request_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); - irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class, + &intc_irqpin_irq_request_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index f3654fd2eaf3..ede4fa0ac2cc 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -186,8 +186,9 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - led_stop_software_blink(led_cdev); + del_timer_sync(&led_cdev->blink_timer); + clear_bit(LED_BLINK_SW, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 09cf3699e354..a307832d7e45 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = { }; static struct lock_class_key arizona_irq_lock_class; +static struct lock_class_key arizona_irq_request_class; static int arizona_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) @@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq, struct arizona *data = h->host_data; irq_set_chip_data(virq, data); - irq_set_lockdep_class(virq, &arizona_irq_lock_class); + irq_set_lockdep_class(virq, &arizona_irq_lock_class, + &arizona_irq_request_class); irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); irq_set_nested_thread(virq, 1); irq_set_noprobe(virq); diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 590fb9aad77d..c3ed885c155c 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -1543,6 +1543,9 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev) rtsx_pci_power_off(pcr, HOST_ENTER_S1); pci_disable_device(pcidev); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); } #else /* CONFIG_PM */ diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 90b9a9ccbe60..9285f60e5783 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -963,6 +963,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command) switch (command) { case NAND_CMD_READ0: + case NAND_CMD_READOOB: case NAND_CMD_PAGEPROG: info->use_ecc = 1; break; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 3cd371c94e83..634c51e6b8ae 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -543,7 +543,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) data = be32_to_cpup((__be32 *)&cf->data[0]); priv->write(data, &priv->tx_mb->data[0]); } - if (cf->can_dlc > 3) { + if (cf->can_dlc > 4) { data = be32_to_cpup((__be32 *)&cf->data[4]); priv->write(data, &priv->tx_mb->data[1]); } diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b00358297424..12ff0020ecd6 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -395,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) if (dev->can.state == CAN_STATE_ERROR_WARNING || dev->can.state == CAN_STATE_ERROR_PASSIVE) { + cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (txerr > rxerr) ? CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; } diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 68ac3e88a8ce..8bf80ad9dc44 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 5d1753cfacea..ed6828821fbd 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, tbp = peer_tb; } - if (tbp[IFLA_IFNAME]) { + if (ifmp && tbp[IFLA_IFNAME]) { nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 561b05089cb6..db830a1141d9 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1497,10 +1497,13 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port) { struct b53_device *dev = ds->priv; - /* Older models support a different tag format that we do not - * support in net/dsa/tag_brcm.c yet. + /* Older models (5325, 5365) support a different tag format that we do + * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed + * mode to be turned on which means we need to specifically manage ARL + * misses on multicast addresses (TBD). */ - if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) + if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) || + !b53_can_enable_brcm_tags(ds, port)) return DSA_TAG_PROTO_NONE; /* Broadcom BCM58xx chips have a flow accelerator on Port 8 diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index f4e13a7014bd..36c8950dbd2d 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -602,7 +602,7 @@ struct vortex_private { struct sk_buff* rx_skbuff[RX_RING_SIZE]; struct sk_buff* tx_skbuff[TX_RING_SIZE]; unsigned int cur_rx, cur_tx; /* The next free ring entry */ - unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + unsigned int dirty_tx; /* The ring entries to be free()ed. */ struct vortex_extra_stats xstats; /* NIC-specific extra stats */ struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ @@ -618,7 +618,6 @@ struct vortex_private { /* The remainder are related to chip state, mostly media selection. */ struct timer_list timer; /* Media selection timer. */ - struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */ int options; /* User-settable misc. driver options. */ unsigned int media_override:4, /* Passed-in media type. */ default_media:4, /* Read from the EEPROM/Wn3_Config. */ @@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *vp, int phy_id, int location, int value); static void vortex_timer(struct timer_list *t); -static void rx_oom_timer(struct timer_list *t); static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, @@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev) timer_setup(&vp->timer, vortex_timer, 0); mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); - timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0); if (vortex_debug > 1) pr_debug("%s: Initial media type %s.\n", @@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev) window_write16(vp, 0x0040, 4, Wn4_NetDiag); if (vp->full_bus_master_rx) { /* Boomerang bus master. */ - vp->cur_rx = vp->dirty_rx = 0; + vp->cur_rx = 0; /* Initialize the RxEarly register as recommended. */ iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); iowrite32(0x0020, ioaddr + PktStatus); @@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev) struct vortex_private *vp = netdev_priv(dev); int i; int retval; + dma_addr_t dma; /* Use the now-standard shared IRQ implementation. */ if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? @@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev) break; /* Bad news! */ skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ - vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); + dma = pci_map_single(VORTEX_PCI(vp), skb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) + break; + vp->rx_ring[i].addr = cpu_to_le32(dma); } if (i != RX_RING_SIZE) { pr_emerg("%s: no memory for rx ring\n", dev->name); @@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) int len = (skb->len + 3) & ~3; vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + spin_lock_irq(&vp->window_lock); window_set(vp, 7); iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr); @@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev) int entry = vp->cur_rx % RX_RING_SIZE; void __iomem *ioaddr = vp->ioaddr; int rx_status; - int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; + int rx_work_limit = RX_RING_SIZE; if (vortex_debug > 5) pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS)); @@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev) } else { /* The packet length: up to 4.5K!. */ int pkt_len = rx_status & 0x1fff; - struct sk_buff *skb; + struct sk_buff *skb, *newskb; + dma_addr_t newdma; dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); if (vortex_debug > 4) @@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev) pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_copy++; } else { + /* Pre-allocate the replacement skb. If it or its + * mapping fails then recycle the buffer thats already + * in place + */ + newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); + if (!newskb) { + dev->stats.rx_dropped++; + goto clear_complete; + } + newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { + dev->stats.rx_dropped++; + consume_skb(newskb); + goto clear_complete; + } + /* Pass up the skbuff already on the Rx ring. */ skb = vp->rx_skbuff[entry]; - vp->rx_skbuff[entry] = NULL; + vp->rx_skbuff[entry] = newskb; + vp->rx_ring[entry].addr = cpu_to_le32(newdma); skb_put(skb, pkt_len); pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_nocopy++; @@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev) netif_rx(skb); dev->stats.rx_packets++; } - entry = (++vp->cur_rx) % RX_RING_SIZE; - } - /* Refill the Rx ring buffers. */ - for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) { - struct sk_buff *skb; - entry = vp->dirty_rx % RX_RING_SIZE; - if (vp->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); - if (skb == NULL) { - static unsigned long last_jif; - if (time_after(jiffies, last_jif + 10 * HZ)) { - pr_warn("%s: memory shortage\n", - dev->name); - last_jif = jiffies; - } - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) - mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1)); - break; /* Bad news! */ - } - vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); - vp->rx_skbuff[entry] = skb; - } +clear_complete: vp->rx_ring[entry].status = 0; /* Clear complete bit. */ iowrite16(UpUnstall, ioaddr + EL3_CMD); + entry = (++vp->cur_rx) % RX_RING_SIZE; } return 0; } -/* - * If we've hit a total OOM refilling the Rx ring we poll once a second - * for some memory. Otherwise there is no way to restart the rx process. - */ -static void -rx_oom_timer(struct timer_list *t) -{ - struct vortex_private *vp = from_timer(vp, t, rx_oom_timer); - struct net_device *dev = vp->mii.dev; - - spin_lock_irq(&vp->lock); - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ - boomerang_rx(dev); - if (vortex_debug > 1) { - pr_debug("%s: rx_oom_timer %s\n", dev->name, - ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying"); - } - spin_unlock_irq(&vp->lock); -} - static void vortex_down(struct net_device *dev, int final_down) { @@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down) netdev_reset_queue(dev); netif_stop_queue(dev); - del_timer_sync(&vp->rx_oom_timer); del_timer_sync(&vp->timer); /* Turn off statistics ASAP. We update dev->stats below. */ diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a6f283232cb7..6975150d144e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq; MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); +static void check_for_admin_com_state(struct ena_adapter *adapter); +static void ena_destroy_device(struct ena_adapter *adapter); +static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev) { @@ -1570,7 +1573,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) static int ena_up_complete(struct ena_adapter *adapter) { - int rc, i; + int rc; rc = ena_rss_configure(adapter); if (rc) @@ -1589,17 +1592,6 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); - /* Enable completion queues interrupt */ - for (i = 0; i < adapter->num_queues; i++) - ena_unmask_interrupt(&adapter->tx_ring[i], - &adapter->rx_ring[i]); - - /* schedule napi in case we had pending packets - * from the last time we disable napi - */ - for (i = 0; i < adapter->num_queues; i++) - napi_schedule(&adapter->ena_napi[i].napi); - return 0; } @@ -1736,7 +1728,7 @@ create_err: static int ena_up(struct ena_adapter *adapter) { - int rc; + int rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); @@ -1779,6 +1771,17 @@ static int ena_up(struct ena_adapter *adapter) set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], + &adapter->rx_ring[i]); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + return rc; err_up: @@ -1889,6 +1892,17 @@ static int ena_close(struct net_device *netdev) if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); + /* Check for device status and issue reset if needed*/ + check_for_admin_com_state(adapter); + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, ifdown, adapter->netdev, + "Destroy failure, restarting device\n"); + ena_dump_stats_to_dmesg(adapter); + /* rtnl lock already obtained in dev_ioctl() layer */ + ena_destroy_device(adapter); + ena_restore_device(adapter); + } + return 0; } @@ -2549,11 +2563,12 @@ static void ena_destroy_device(struct ena_adapter *adapter) ena_com_set_admin_running_state(ena_dev, false); - ena_close(netdev); + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); /* Before releasing the ENA resources, a device reset is required. * (to prevent the device from accessing them). - * In case the reset flag is set and the device is up, ena_close + * In case the reset flag is set and the device is up, ena_down() * already perform the reset, so it can be skipped. */ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 5ee18660bc33..c9617675f934 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) netdev_err(bp->dev, "vf ndo called though sriov is disabled\n"); return -EINVAL; } - if (vf_id >= bp->pf.max_vfs) { + if (vf_id >= bp->pf.active_vfs) { netdev_err(bp->dev, "Invalid VF id %d\n", vf_id); return -EINVAL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 2ae5ed151369..1d9b08c20f95 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -416,7 +416,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, } /* If all IP and L4 fields are wildcarded then this is an L2 flow */ - if (is_wildcard(&l3_mask, sizeof(l3_mask)) && + if (is_wildcard(l3_mask, sizeof(*l3_mask)) && is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) { flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2; } else { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 69d0b64e6986..1ff71825868c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -345,7 +345,6 @@ struct adapter_params { unsigned int sf_size; /* serial flash size in bytes */ unsigned int sf_nsec; /* # of flash sectors */ - unsigned int sf_fw_start; /* start of FW image in flash */ unsigned int fw_vers; /* firmware version */ unsigned int bs_vers; /* bootstrap version */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index f2a60e01d563..0e9f64a46ac5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2847,8 +2847,6 @@ enum { SF_RD_DATA_FAST = 0xb, /* read flash */ SF_RD_ID = 0x9f, /* read ID */ SF_ERASE_SECTOR = 0xd8, /* erase sector */ - - FW_MAX_SIZE = 16 * SF_SEC_SIZE, }; /** @@ -3561,8 +3559,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) const __be32 *p = (const __be32 *)fw_data; const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data; unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; - unsigned int fw_img_start = adap->params.sf_fw_start; - unsigned int fw_start_sec = fw_img_start / sf_sec_size; + unsigned int fw_start_sec = FLASH_FW_START_SEC; + unsigned int fw_size = FLASH_FW_MAX_SIZE; + unsigned int fw_start = FLASH_FW_START; if (!size) { dev_err(adap->pdev_dev, "FW image has no data\n"); @@ -3578,9 +3577,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) "FW image size differs from size in FW header\n"); return -EINVAL; } - if (size > FW_MAX_SIZE) { + if (size > fw_size) { dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n", - FW_MAX_SIZE); + fw_size); return -EFBIG; } if (!t4_fw_matches_chip(adap, hdr)) @@ -3607,11 +3606,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); - ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); if (ret) goto out; - addr = fw_img_start; + addr = fw_start; for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; @@ -3621,7 +3620,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) } ret = t4_write_flash(adap, - fw_img_start + offsetof(struct fw_hdr, fw_ver), + fw_start + offsetof(struct fw_hdr, fw_ver), sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); out: if (ret) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 02dd5246dfae..1a49297224ed 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -103,7 +103,7 @@ static struct be_cmd_priv_map cmd_priv_map[] = { static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem) { int i; - int num_entries = sizeof(cmd_priv_map)/sizeof(struct be_cmd_priv_map); + int num_entries = ARRAY_SIZE(cmd_priv_map); u32 cmd_privileges = adapter->cmd_privileges; for (i = 0; i < num_entries; i++) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e17d10b8b041..90aa69a08922 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3489,6 +3489,10 @@ fec_probe(struct platform_device *pdev) goto failed_regulator; } } else { + if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto failed_regulator; + } fep->reg_phy = NULL; } @@ -3572,8 +3576,9 @@ failed_clk_ipg: failed_clk: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); -failed_phy: of_node_put(phy_node); +failed_phy: + dev_id--; failed_ioremap: free_netdev(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b23107d7821f..14c7625c6f19 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1126,8 +1126,8 @@ static int hns3_nic_set_features(struct net_device *netdev, return 0; } -static void -hns3_nic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +static void hns3_nic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct hns3_nic_priv *priv = netdev_priv(netdev); int queue_num = priv->ae_handle->kinfo.num_tqps; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 1e8fac3ae750..d3cb3ec2c62b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -59,41 +59,6 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT) -/* netdev stats */ -#define HNS3_NETDEV_STAT(_string, _member) { \ - .stats_string = _string, \ - .stats_offset = offsetof(struct rtnl_link_stats64, _member) \ -} - -static const struct hns3_stats hns3_netdev_stats[] = { - /* Rx per-queue statistics */ - HNS3_NETDEV_STAT("rx_packets", rx_packets), - HNS3_NETDEV_STAT("tx_packets", tx_packets), - HNS3_NETDEV_STAT("rx_bytes", rx_bytes), - HNS3_NETDEV_STAT("tx_bytes", tx_bytes), - HNS3_NETDEV_STAT("rx_errors", rx_errors), - HNS3_NETDEV_STAT("tx_errors", tx_errors), - HNS3_NETDEV_STAT("rx_dropped", rx_dropped), - HNS3_NETDEV_STAT("tx_dropped", tx_dropped), - HNS3_NETDEV_STAT("multicast", multicast), - HNS3_NETDEV_STAT("collisions", collisions), - HNS3_NETDEV_STAT("rx_length_errors", rx_length_errors), - HNS3_NETDEV_STAT("rx_over_errors", rx_over_errors), - HNS3_NETDEV_STAT("rx_crc_errors", rx_crc_errors), - HNS3_NETDEV_STAT("rx_frame_errors", rx_frame_errors), - HNS3_NETDEV_STAT("rx_fifo_errors", rx_fifo_errors), - HNS3_NETDEV_STAT("rx_missed_errors", rx_missed_errors), - HNS3_NETDEV_STAT("tx_aborted_errors", tx_aborted_errors), - HNS3_NETDEV_STAT("tx_carrier_errors", tx_carrier_errors), - HNS3_NETDEV_STAT("tx_fifo_errors", tx_fifo_errors), - HNS3_NETDEV_STAT("tx_heartbeat_errors", tx_heartbeat_errors), - HNS3_NETDEV_STAT("tx_window_errors", tx_window_errors), - HNS3_NETDEV_STAT("rx_compressed", rx_compressed), - HNS3_NETDEV_STAT("tx_compressed", tx_compressed), -}; - -#define HNS3_NETDEV_STATS_COUNT ARRAY_SIZE(hns3_netdev_stats) - #define HNS3_SELF_TEST_TPYE_NUM 1 #define HNS3_NIC_LB_TEST_PKT_NUM 1 #define HNS3_NIC_LB_TEST_RING_ID 0 @@ -466,27 +431,6 @@ static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data) return data; } -static u8 *hns3_netdev_stats_get_strings(u8 *data) -{ - int i; - - /* get strings for netdev */ - for (i = 0; i < HNS3_NETDEV_STATS_COUNT; i++) { - snprintf(data, ETH_GSTRING_LEN, - hns3_netdev_stats[i].stats_string); - data += ETH_GSTRING_LEN; - } - - snprintf(data, ETH_GSTRING_LEN, "netdev_rx_dropped"); - data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "netdev_tx_dropped"); - data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "netdev_tx_timeout"); - data += ETH_GSTRING_LEN; - - return data; -} - static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -498,7 +442,6 @@ static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_STATS: - buff = hns3_netdev_stats_get_strings(buff); buff = hns3_get_strings_tqps(h, buff); h->ae_algo->ops->get_strings(h, stringset, (u8 *)buff); break; @@ -537,27 +480,6 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data) return data; } -static u64 *hns3_get_netdev_stats(struct net_device *netdev, u64 *data) -{ - struct hns3_nic_priv *priv = netdev_priv(netdev); - const struct rtnl_link_stats64 *net_stats; - struct rtnl_link_stats64 temp; - u8 *stat; - int i; - - net_stats = dev_get_stats(netdev, &temp); - for (i = 0; i < HNS3_NETDEV_STATS_COUNT; i++) { - stat = (u8 *)net_stats + hns3_netdev_stats[i].stats_offset; - *data++ = *(u64 *)stat; - } - - *data++ = netdev->rx_dropped.counter; - *data++ = netdev->tx_dropped.counter; - *data++ = priv->tx_timeout_count; - - return data; -} - /* hns3_get_stats - get detail statistics. * @netdev: net device * @stats: statistics info. @@ -574,7 +496,7 @@ static void hns3_get_stats(struct net_device *netdev, return; } - p = hns3_get_netdev_stats(netdev, p); + h->ae_algo->ops->update_stats(h, &netdev->stats); /* get per-queue stats */ p = hns3_get_stats_tqps(h, p); diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index d7bdea79e9fa..8fd2458060a0 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -331,7 +331,8 @@ struct e1000_adapter { enum e1000_state_t { __E1000_TESTING, __E1000_RESETTING, - __E1000_DOWN + __E1000_DOWN, + __E1000_DISABLED }; #undef pr_fmt diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 1982f7917a8d..3dd4aeb2706d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter, static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; - struct e1000_adapter *adapter; + struct e1000_adapter *adapter = NULL; struct e1000_hw *hw; static int cards_found; @@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 tmp = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; int bars, need_ioport; + bool disable_dev = false; /* do not allocate ioport bars when not needed */ need_ioport = e1000_is_need_ioport(pdev); @@ -1259,11 +1260,13 @@ err_mdio_ioremap: iounmap(hw->ce4100_gbe_mdio_base_virt); iounmap(hw->hw_addr); err_ioremap: + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, bars); err_pci_reg: - pci_disable_device(pdev); + if (!adapter || disable_dev) + pci_disable_device(pdev); return err; } @@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + bool disable_dev; e1000_down_and_stop(adapter); e1000_release_manageability(adapter); @@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev) iounmap(hw->flash_address); pci_release_selected_regions(pdev, adapter->bars); + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); - pci_disable_device(pdev); + if (disable_dev) + pci_disable_device(pdev); } /** @@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) if (netif_running(netdev)) e1000_free_irq(adapter); - pci_disable_device(pdev); + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); return 0; } @@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev) pr_err("Cannot enable PCI device from suspend\n"); return err; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); @@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) e1000_down(adapter); - pci_disable_device(pdev); + + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) pr_err("Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf03..31277d3bb7dc 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,6 +1367,9 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 321d8be80871..42dcaefc4c19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p) else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); + /* Copy the address first, so that we avoid a possible race with + * .set_rx_mode(). If we copy after changing the address in the filter + * list, we might open ourselves to a narrow race window where + * .set_rx_mode could delete our dev_addr filter and prevent traffic + * from passing. + */ + ether_addr_copy(netdev->dev_addr, addr->sa_data); + spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); i40e_add_mac_filter(vsi, addr->sa_data); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, addr->sa_data); if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; @@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + i40e_del_mac_filter(vsi, addr); return 0; @@ -6038,8 +6053,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) /* Set Bit 7 to be valid */ mode = I40E_AQ_SET_SWITCH_BIT7_VALID; - /* Set L4type to both TCP and UDP support */ - mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; + /* Set L4type for TCP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP; /* Set cloud filter mode */ mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; @@ -6969,18 +6984,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_valid_ether_addr(filter->src_mac)) || (is_multicast_ether_addr(filter->dst_mac) && is_multicast_ether_addr(filter->src_mac))) - return -EINVAL; + return -EOPNOTSUPP; - /* Make sure port is specified, otherwise bail out, for channel - * specific cloud filter needs 'L4 port' to be non-zero + /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP + * ports are not supported via big buffer now. */ - if (!filter->dst_port) - return -EINVAL; + if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) + return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ if (filter->src_port || filter->src_ipv4 || !ipv6_addr_any(&filter->ip.v6.src_ip6)) - return -EINVAL; + return -EOPNOTSUPP; /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); @@ -6991,7 +7006,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_multicast_ether_addr(filter->src_mac)) { /* MAC + IP : unsupported mode */ if (filter->dst_ipv4) - return -EINVAL; + return -EOPNOTSUPP; /* since we validated that L4 port must be valid before * we get here, start with respective "flags" value @@ -7356,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, if (tc < 0) { dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); - return -EINVAL; + return -EOPNOTSUPP; } if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2a8a85e3ae8f..40edb6e5e6f6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3061,10 +3061,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -3072,7 +3092,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 50864f99446d..1ba29bb85b67 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 8611763d6129..03a4df0bed96 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -333,7 +333,6 @@ struct ixgbe_ring { struct net_device *netdev; /* netdev ring belongs to */ struct bpf_prog *xdp_prog; struct device *dev; /* device for DMA mapping */ - struct ixgbe_fwd_adapter *l2_accel_priv; void *desc; /* descriptor ring memory */ union { struct ixgbe_tx_buffer *tx_buffer_info; @@ -397,8 +396,7 @@ enum ixgbe_ring_f_enum { #define MAX_XDP_QUEUES (IXGBE_MAX_FDIR_INDICES + 1) #define IXGBE_MAX_L2A_QUEUES 4 #define IXGBE_BAD_L2A_QUEUE 3 -#define IXGBE_MAX_MACVLANS 31 -#define IXGBE_MAX_DCBMACVLANS 8 +#define IXGBE_MAX_MACVLANS 63 struct ixgbe_ring_feature { u16 limit; /* upper limit on feature indices */ @@ -723,8 +721,7 @@ struct ixgbe_adapter { u16 bridge_mode; - u16 eeprom_verh; - u16 eeprom_verl; + char eeprom_id[NVM_VER_SIZE]; u16 eeprom_cap; u32 interrupt_event; @@ -768,7 +765,8 @@ struct ixgbe_adapter { #endif /*CONFIG_DEBUG_FS*/ u8 default_up; - unsigned long fwd_bitmask; /* Bitmask indicating in use pools */ + /* Bitmask indicating in use pools */ + DECLARE_BITMAP(fwd_bitmask, IXGBE_MAX_MACVLANS + 1); #define IXGBE_MAX_LINK_HANDLE 10 struct ixgbe_jump_table *jump_tables[IXGBE_MAX_LINK_HANDLE]; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 9bef255f6a18..1948e4208fb4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -4028,6 +4028,118 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) return 0; } +/** + * ixgbe_get_orom_version - Return option ROM from EEPROM + * + * @hw: pointer to hardware structure + * @nvm_ver: pointer to output structure + * + * if valid option ROM version, nvm_ver->or_valid set to true + * else nvm_ver->or_valid is false. + **/ +void ixgbe_get_orom_version(struct ixgbe_hw *hw, + struct ixgbe_nvm_version *nvm_ver) +{ + u16 offset, eeprom_cfg_blkh, eeprom_cfg_blkl; + + nvm_ver->or_valid = false; + /* Option Rom may or may not be present. Start with pointer */ + hw->eeprom.ops.read(hw, NVM_OROM_OFFSET, &offset); + + /* make sure offset is valid */ + if (offset == 0x0 || offset == NVM_INVALID_PTR) + return; + + hw->eeprom.ops.read(hw, offset + NVM_OROM_BLK_HI, &eeprom_cfg_blkh); + hw->eeprom.ops.read(hw, offset + NVM_OROM_BLK_LOW, &eeprom_cfg_blkl); + + /* option rom exists and is valid */ + if ((eeprom_cfg_blkl | eeprom_cfg_blkh) == 0x0 || + eeprom_cfg_blkl == NVM_VER_INVALID || + eeprom_cfg_blkh == NVM_VER_INVALID) + return; + + nvm_ver->or_valid = true; + nvm_ver->or_major = eeprom_cfg_blkl >> NVM_OROM_SHIFT; + nvm_ver->or_build = (eeprom_cfg_blkl << NVM_OROM_SHIFT) | + (eeprom_cfg_blkh >> NVM_OROM_SHIFT); + nvm_ver->or_patch = eeprom_cfg_blkh & NVM_OROM_PATCH_MASK; +} + +/** + * ixgbe_get_oem_prod_version Etrack ID from EEPROM + * + * @hw: pointer to hardware structure + * @nvm_ver: pointer to output structure + * + * if valid OEM product version, nvm_ver->oem_valid set to true + * else nvm_ver->oem_valid is false. + **/ +void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw, + struct ixgbe_nvm_version *nvm_ver) +{ + u16 rel_num, prod_ver, mod_len, cap, offset; + + nvm_ver->oem_valid = false; + hw->eeprom.ops.read(hw, NVM_OEM_PROD_VER_PTR, &offset); + + /* Return is offset to OEM Product Version block is invalid */ + if (offset == 0x0 && offset == NVM_INVALID_PTR) + return; + + /* Read product version block */ + hw->eeprom.ops.read(hw, offset, &mod_len); + hw->eeprom.ops.read(hw, offset + NVM_OEM_PROD_VER_CAP_OFF, &cap); + + /* Return if OEM product version block is invalid */ + if (mod_len != NVM_OEM_PROD_VER_MOD_LEN || + (cap & NVM_OEM_PROD_VER_CAP_MASK) != 0x0) + return; + + hw->eeprom.ops.read(hw, offset + NVM_OEM_PROD_VER_OFF_L, &prod_ver); + hw->eeprom.ops.read(hw, offset + NVM_OEM_PROD_VER_OFF_H, &rel_num); + + /* Return if version is invalid */ + if ((rel_num | prod_ver) == 0x0 || + rel_num == NVM_VER_INVALID || prod_ver == NVM_VER_INVALID) + return; + + nvm_ver->oem_major = prod_ver >> NVM_VER_SHIFT; + nvm_ver->oem_minor = prod_ver & NVM_VER_MASK; + nvm_ver->oem_release = rel_num; + nvm_ver->oem_valid = true; +} + +/** + * ixgbe_get_etk_id - Return Etrack ID from EEPROM + * + * @hw: pointer to hardware structure + * @nvm_ver: pointer to output structure + * + * word read errors will return 0xFFFF + **/ +void ixgbe_get_etk_id(struct ixgbe_hw *hw, + struct ixgbe_nvm_version *nvm_ver) +{ + u16 etk_id_l, etk_id_h; + + if (hw->eeprom.ops.read(hw, NVM_ETK_OFF_LOW, &etk_id_l)) + etk_id_l = NVM_VER_INVALID; + if (hw->eeprom.ops.read(hw, NVM_ETK_OFF_HI, &etk_id_h)) + etk_id_h = NVM_VER_INVALID; + + /* The word order for the version format is determined by high order + * word bit 15. + */ + if ((etk_id_h & NVM_ETK_VALID) == 0) { + nvm_ver->etk_id = etk_id_h; + nvm_ver->etk_id |= (etk_id_l << NVM_ETK_SHIFT); + } else { + nvm_ver->etk_id = etk_id_l; + nvm_ver->etk_id |= (etk_id_h << NVM_ETK_SHIFT); + } +} + void ixgbe_disable_rx_generic(struct ixgbe_hw *hw) { u32 rxctrl; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index a01409e2e06c..4d4c02366cb3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -139,6 +139,12 @@ extern const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT]; s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw); s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw); +void ixgbe_get_etk_id(struct ixgbe_hw *hw, + struct ixgbe_nvm_version *nvm_ver); +void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw, + struct ixgbe_nvm_version *nvm_ver); +void ixgbe_get_orom_version(struct ixgbe_hw *hw, + struct ixgbe_nvm_version *nvm_ver); void ixgbe_disable_rx_generic(struct ixgbe_hw *hw); void ixgbe_enable_rx_generic(struct ixgbe_hw *hw); s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 0aaf70b3cfcd..3bcf58b27d8b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1014,16 +1014,13 @@ static void ixgbe_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - u32 nvm_track_id; strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, ixgbe_driver_version, sizeof(drvinfo->version)); - nvm_track_id = (adapter->eeprom_verh << 16) | - adapter->eeprom_verl; - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "0x%08x", - nvm_track_id); + strlcpy(drvinfo->fw_version, adapter->eeprom_id, + sizeof(drvinfo->fw_version)); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index a23c2b5411a0..6e6b3c175267 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -1034,11 +1034,8 @@ int ixgbe_fcoe_get_hbainfo(struct net_device *netdev, ixgbe_driver_name, ixgbe_driver_version); /* Firmware Version */ - snprintf(info->firmware_version, - sizeof(info->firmware_version), - "0x%08x", - (adapter->eeprom_verh << 16) | - adapter->eeprom_verl); + strlcpy(info->firmware_version, adapter->eeprom_id, + sizeof(info->firmware_version)); /* Model */ if (hw->mac.type == ixgbe_mac_82599EB) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 8e2a957aca18..cceafbc3f1db 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -350,6 +350,9 @@ static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter) if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) return false; + /* limit VMDq instances on the PF by number of Tx queues */ + vmdq_i = min_t(u16, vmdq_i, MAX_TX_QUEUES / tcs); + /* Add starting offset to total pool count */ vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset; @@ -512,12 +515,14 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) #ifdef IXGBE_FCOE u16 fcoe_i = 0; #endif - bool pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1); /* only proceed if SR-IOV is enabled */ if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) return false; + /* limit l2fwd RSS based on total Tx queue limit */ + rss_i = min_t(u16, rss_i, MAX_TX_QUEUES / vmdq_i); + /* Add starting offset to total pool count */ vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset; @@ -525,7 +530,7 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); /* 64 pool mode with 2 queues per pool */ - if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) { + if (vmdq_i > 32) { vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; rss_m = IXGBE_RSS_2Q_MASK; rss_i = min_t(u16, rss_i, 2); @@ -701,7 +706,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) adapter->num_rx_queues = 1; adapter->num_tx_queues = 1; adapter->num_xdp_queues = 0; - adapter->num_rx_pools = adapter->num_rx_queues; + adapter->num_rx_pools = 1; adapter->num_rx_queues_per_pool = 1; #ifdef CONFIG_IXGBE_DCB diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 95aba975b391..e47e0c470508 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -192,6 +192,13 @@ static struct workqueue_struct *ixgbe_wq; static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *); +static const struct net_device_ops ixgbe_netdev_ops; + +static bool netif_is_ixgbe(struct net_device *dev) +{ + return dev && (dev->netdev_ops == &ixgbe_netdev_ops); +} + static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, u32 reg, u16 *value) { @@ -1064,24 +1071,12 @@ static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring) static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring) { - struct ixgbe_adapter *adapter; - struct ixgbe_hw *hw; - u32 head, tail; - - if (ring->l2_accel_priv) - adapter = ring->l2_accel_priv->real_adapter; - else - adapter = netdev_priv(ring->netdev); - - hw = &adapter->hw; - head = IXGBE_READ_REG(hw, IXGBE_TDH(ring->reg_idx)); - tail = IXGBE_READ_REG(hw, IXGBE_TDT(ring->reg_idx)); + unsigned int head, tail; - if (head != tail) - return (head < tail) ? - tail - head : (tail + ring->count - head); + head = ring->next_to_clean; + tail = ring->next_to_use; - return 0; + return ((head <= tail) ? tail : tail + ring->count) - head; } static inline bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring) @@ -2517,13 +2512,6 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask); } -enum latency_range { - lowest_latency = 0, - low_latency = 1, - bulk_latency = 2, - latency_invalid = 255 -}; - /** * ixgbe_update_itr - update the dynamic ITR value based on statistics * @q_vector: structure containing interrupt and ring information @@ -2536,8 +2524,6 @@ enum latency_range { * based on theoretical maximum wire speed and thresholds were set based * on testing data as well as attempting to minimize response time * while increasing bulk throughput. - * this functionality is controlled by the InterruptThrottleRate module - * parameter (see ixgbe_param.c) **/ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, struct ixgbe_ring_container *ring_container) @@ -3855,16 +3841,20 @@ static void ixgbe_store_vfreta(struct ixgbe_adapter *adapter) u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter); struct ixgbe_hw *hw = &adapter->hw; u32 vfreta = 0; - unsigned int pf_pool = adapter->num_vfs; /* Write redirection table to HW */ for (i = 0; i < reta_entries; i++) { + u16 pool = adapter->num_rx_pools; + vfreta |= (u32)adapter->rss_indir_tbl[i] << (i & 0x3) * 8; - if ((i & 3) == 3) { - IXGBE_WRITE_REG(hw, IXGBE_PFVFRETA(i >> 2, pf_pool), + if ((i & 3) != 3) + continue; + + while (pool--) + IXGBE_WRITE_REG(hw, + IXGBE_PFVFRETA(i >> 2, VMDQ_P(pool)), vfreta); - vfreta = 0; - } + vfreta = 0; } } @@ -3901,13 +3891,17 @@ static void ixgbe_setup_vfreta(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; - unsigned int pf_pool = adapter->num_vfs; int i, j; /* Fill out hash function seeds */ - for (i = 0; i < 10; i++) - IXGBE_WRITE_REG(hw, IXGBE_PFVFRSSRK(i, pf_pool), - *(adapter->rss_key + i)); + for (i = 0; i < 10; i++) { + u16 pool = adapter->num_rx_pools; + + while (pool--) + IXGBE_WRITE_REG(hw, + IXGBE_PFVFRSSRK(i, VMDQ_P(pool)), + *(adapter->rss_key + i)); + } /* Fill out the redirection table */ for (i = 0, j = 0; i < 64; i++, j++) { @@ -3973,7 +3967,7 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) if ((hw->mac.type >= ixgbe_mac_X550) && (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) { - unsigned int pf_pool = adapter->num_vfs; + u16 pool = adapter->num_rx_pools; /* Enable VF RSS mode */ mrqc |= IXGBE_MRQC_MULTIPLE_RSS; @@ -3983,7 +3977,11 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) ixgbe_setup_vfreta(adapter); vfmrqc = IXGBE_MRQC_RSSEN; vfmrqc |= rss_field; - IXGBE_WRITE_REG(hw, IXGBE_PFVFMRQC(pf_pool), vfmrqc); + + while (pool--) + IXGBE_WRITE_REG(hw, + IXGBE_PFVFMRQC(VMDQ_P(pool)), + vfmrqc); } else { ixgbe_setup_reta(adapter); mrqc |= rss_field; @@ -4146,7 +4144,7 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int rss_i = adapter->ring_feature[RING_F_RSS].indices; - u16 pool; + u16 pool = adapter->num_rx_pools; /* PSRTYPE must be initialized in non 82598 adapters */ u32 psrtype = IXGBE_PSRTYPE_TCPHDR | @@ -4163,7 +4161,7 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) else if (rss_i > 1) psrtype |= 1u << 29; - for_each_set_bit(pool, &adapter->fwd_bitmask, 32) + while (pool--) IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype); } @@ -4490,8 +4488,9 @@ static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i]; - if (ring->l2_accel_priv) + if (!netif_is_ixgbe(ring->netdev)) continue; + j = ring->reg_idx; vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)); vlnctrl &= ~IXGBE_RXDCTL_VME; @@ -4527,8 +4526,9 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i]; - if (ring->l2_accel_priv) + if (!netif_is_ixgbe(ring->netdev)) continue; + j = ring->reg_idx; vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)); vlnctrl |= IXGBE_RXDCTL_VME; @@ -5279,29 +5279,6 @@ static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool, IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); } -static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter) -{ - struct ixgbe_adapter *adapter = vadapter->real_adapter; - int rss_i = adapter->num_rx_queues_per_pool; - struct ixgbe_hw *hw = &adapter->hw; - u16 pool = vadapter->pool; - u32 psrtype = IXGBE_PSRTYPE_TCPHDR | - IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | - IXGBE_PSRTYPE_L2HDR | - IXGBE_PSRTYPE_IPV6HDR; - - if (hw->mac.type == ixgbe_mac_82598EB) - return; - - if (rss_i > 3) - psrtype |= 2u << 29; - else if (rss_i > 1) - psrtype |= 1u << 29; - - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype); -} - /** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from @@ -5365,7 +5342,6 @@ static void ixgbe_disable_fwd_ring(struct ixgbe_fwd_adapter *vadapter, usleep_range(10000, 20000); ixgbe_irq_disable_queues(adapter, BIT_ULL(index)); ixgbe_clean_rx_ring(rx_ring); - rx_ring->l2_accel_priv = NULL; } static int ixgbe_fwd_ring_down(struct net_device *vdev, @@ -5383,10 +5359,8 @@ static int ixgbe_fwd_ring_down(struct net_device *vdev, adapter->rx_ring[rxbase + i]->netdev = adapter->netdev; } - for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { - adapter->tx_ring[txbase + i]->l2_accel_priv = NULL; + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) adapter->tx_ring[txbase + i]->netdev = adapter->netdev; - } return 0; @@ -5399,14 +5373,13 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev, unsigned int rxbase, txbase, queues; int i, baseq, err = 0; - if (!test_bit(accel->pool, &adapter->fwd_bitmask)) + if (!test_bit(accel->pool, adapter->fwd_bitmask)) return 0; baseq = accel->pool * adapter->num_rx_queues_per_pool; - netdev_dbg(vdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n", + netdev_dbg(vdev, "pool %i:%i queues %i:%i\n", accel->pool, adapter->num_rx_pools, - baseq, baseq + adapter->num_rx_queues_per_pool, - adapter->fwd_bitmask); + baseq, baseq + adapter->num_rx_queues_per_pool); accel->netdev = vdev; accel->rx_base_queue = rxbase = baseq; @@ -5417,14 +5390,11 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev, for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { adapter->rx_ring[rxbase + i]->netdev = vdev; - adapter->rx_ring[rxbase + i]->l2_accel_priv = accel; ixgbe_configure_rx_ring(adapter, adapter->rx_ring[rxbase + i]); } - for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) adapter->tx_ring[txbase + i]->netdev = vdev; - adapter->tx_ring[txbase + i]->l2_accel_priv = accel; - } queues = min_t(unsigned int, adapter->num_rx_queues_per_pool, vdev->num_tx_queues); @@ -5437,10 +5407,10 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev, goto fwd_queue_err; if (is_valid_ether_addr(vdev->dev_addr)) - ixgbe_add_mac_filter(adapter, vdev->dev_addr, accel->pool); + ixgbe_add_mac_filter(adapter, vdev->dev_addr, + VMDQ_P(accel->pool)); - ixgbe_fwd_psrtype(accel); - ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter); + ixgbe_macvlan_set_rx_mode(vdev, VMDQ_P(accel->pool), adapter); return err; fwd_queue_err: ixgbe_fwd_ring_down(vdev, accel); @@ -6304,7 +6274,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, } /* PF holds first pool slot */ - set_bit(0, &adapter->fwd_bitmask); + set_bit(0, adapter->fwd_bitmask); set_bit(__IXGBE_DOWN, &adapter->state); return 0; @@ -6791,7 +6761,7 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; - u32 ctrl, fctrl; + u32 ctrl; u32 wufc = adapter->wol; #ifdef CONFIG_PM int retval = 0; @@ -6816,18 +6786,18 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) hw->mac.ops.stop_link_on_d3(hw); if (wufc) { + u32 fctrl; + ixgbe_set_rx_mode(netdev); /* enable the optics for 82599 SFP+ fiber as we can WoL */ if (hw->mac.ops.enable_tx_laser) hw->mac.ops.enable_tx_laser(hw); - /* turn on all-multi mode if wake on multicast is enabled */ - if (wufc & IXGBE_WUFC_MC) { - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - fctrl |= IXGBE_FCTRL_MPE; - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - } + /* enable the reception of multicast packets */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= IXGBE_FCTRL_MPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); ctrl |= IXGBE_CTRL_GIO_DIS; @@ -7663,6 +7633,7 @@ sfp_out: static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + u32 cap_speed; u32 speed; bool autoneg = false; @@ -7675,16 +7646,14 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter) adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; - speed = hw->phy.autoneg_advertised; - if ((!speed) && (hw->mac.ops.get_link_capabilities)) { - hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg); + hw->mac.ops.get_link_capabilities(hw, &cap_speed, &autoneg); - /* setup the highest link when no autoneg */ - if (!autoneg) { - if (speed & IXGBE_LINK_SPEED_10GB_FULL) - speed = IXGBE_LINK_SPEED_10GB_FULL; - } - } + /* advertise highest capable link speed */ + if (!autoneg && (cap_speed & IXGBE_LINK_SPEED_10GB_FULL)) + speed = IXGBE_LINK_SPEED_10GB_FULL; + else + speed = cap_speed & (IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, speed, true); @@ -8877,7 +8846,6 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - bool pools; /* Hardware supports up to 8 traffic classes */ if (tc > adapter->dcb_cfg.num_tcs.pg_tcs) @@ -8886,10 +8854,6 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) if (hw->mac.type == ixgbe_mac_82598EB && tc && tc < MAX_TRAFFIC_CLASS) return -EINVAL; - pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1); - if (tc && pools && adapter->num_rx_pools > IXGBE_MAX_DCBMACVLANS) - return -EBUSY; - /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the * hardware is not flexible enough to do this dynamically. @@ -9052,6 +9016,7 @@ static int get_macvlan_queue(struct net_device *upper, void *_data) static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, u8 *queue, u64 *action) { + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; unsigned int num_vfs = adapter->num_vfs, vf; struct upper_walk_data data; struct net_device *upper; @@ -9060,11 +9025,7 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, for (vf = 0; vf < num_vfs; ++vf) { upper = pci_get_drvdata(adapter->vfinfo[vf].vfdev); if (upper->ifindex == ifindex) { - if (adapter->num_rx_pools > 1) - *queue = vf * 2; - else - *queue = vf * adapter->num_rx_queues_per_pool; - + *queue = vf * __ALIGN_MASK(1, ~vmdq->mask); *action = vf + 1; *action <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; return 0; @@ -9831,6 +9792,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) struct ixgbe_fwd_adapter *fwd_adapter = NULL; struct ixgbe_adapter *adapter = netdev_priv(pdev); int used_pools = adapter->num_vfs + adapter->num_rx_pools; + int tcs = netdev_get_num_tc(pdev) ? : 1; unsigned int limit; int pool, err; @@ -9858,7 +9820,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) } if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && - adapter->num_rx_pools > IXGBE_MAX_DCBMACVLANS - 1) || + adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) || (adapter->num_rx_pools > IXGBE_MAX_MACVLANS)) return ERR_PTR(-EBUSY); @@ -9866,10 +9828,9 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) if (!fwd_adapter) return ERR_PTR(-ENOMEM); - pool = find_first_zero_bit(&adapter->fwd_bitmask, 32); - adapter->num_rx_pools++; - set_bit(pool, &adapter->fwd_bitmask); - limit = find_last_bit(&adapter->fwd_bitmask, 32); + pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); + set_bit(pool, adapter->fwd_bitmask); + limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools + 1); /* Enable VMDq flag so device will be set in VM mode */ adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED; @@ -9895,8 +9856,7 @@ fwd_add_err: /* unwind counter and free adapter struct */ netdev_info(pdev, "%s: dfwd hardware acceleration failed\n", vdev->name); - clear_bit(pool, &adapter->fwd_bitmask); - adapter->num_rx_pools--; + clear_bit(pool, adapter->fwd_bitmask); kfree(fwd_adapter); return ERR_PTR(err); } @@ -9907,10 +9867,9 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) struct ixgbe_adapter *adapter = fwd_adapter->real_adapter; unsigned int limit; - clear_bit(fwd_adapter->pool, &adapter->fwd_bitmask); - adapter->num_rx_pools--; + clear_bit(fwd_adapter->pool, adapter->fwd_bitmask); - limit = find_last_bit(&adapter->fwd_bitmask, 32); + limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools); adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter); @@ -9925,11 +9884,11 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) } ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev)); - netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n", + netdev_dbg(pdev, "pool %i:%i queues %i:%i\n", fwd_adapter->pool, adapter->num_rx_pools, fwd_adapter->rx_base_queue, - fwd_adapter->rx_base_queue + adapter->num_rx_queues_per_pool, - adapter->fwd_bitmask); + fwd_adapter->rx_base_queue + + adapter->num_rx_queues_per_pool); kfree(fwd_adapter); } @@ -10243,6 +10202,41 @@ bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, } /** + * ixgbe_set_fw_version - Set FW version + * @adapter: the adapter private structure + * + * This function is used by probe and ethtool to determine the FW version to + * format to display. The FW version is taken from the EEPROM/NVM. + */ +static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_nvm_version nvm_ver; + + ixgbe_get_oem_prod_version(hw, &nvm_ver); + if (nvm_ver.oem_valid) { + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "%x.%x.%x", nvm_ver.oem_major, nvm_ver.oem_minor, + nvm_ver.oem_release); + return; + } + + ixgbe_get_etk_id(hw, &nvm_ver); + ixgbe_get_orom_version(hw, &nvm_ver); + + if (nvm_ver.or_valid) { + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x, %d.%d.%d", nvm_ver.etk_id, nvm_ver.or_major, + nvm_ver.or_build, nvm_ver.or_patch); + return; + } + + /* Set ETrack ID format */ + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x", nvm_ver.etk_id); +} + +/** * ixgbe_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in ixgbe_pci_tbl @@ -10578,8 +10572,7 @@ skip_sriov: device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); /* save off EEPROM version number */ - hw->eeprom.ops.read(hw, 0x2e, &adapter->eeprom_verh); - hw->eeprom.ops.read(hw, 0x2d, &adapter->eeprom_verl); + ixgbe_set_fw_version(adapter); /* pick up the PCI bus settings for reporting later */ if (ixgbe_pcie_from_parent(hw)) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 112d24c6c9ce..0085f4632966 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -227,9 +227,6 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; - struct ixgbe_hw *hw = &adapter->hw; - u32 gpie; - u32 vmdctl; int rss; /* set num VFs to 0 to prevent access to vfinfo */ @@ -271,18 +268,6 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) pci_disable_sriov(adapter->pdev); #endif - /* turn off device IOV mode */ - IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, 0); - gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); - gpie &= ~IXGBE_GPIE_VTMODE_MASK; - IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); - - /* set default pool back to 0 */ - vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); - vmdctl &= ~IXGBE_VT_CTL_POOL_MASK; - IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl); - IXGBE_WRITE_FLUSH(hw); - /* Disable VMDq flag so device will be set in VM mode */ if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; @@ -305,10 +290,9 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) { #ifdef CONFIG_PCI_IOV struct ixgbe_adapter *adapter = pci_get_drvdata(dev); - int err = 0; - u8 num_tc; - int i; int pre_existing_vfs = pci_num_vf(dev); + int err = 0, num_rx_pools, i, limit; + u8 num_tc; if (pre_existing_vfs && pre_existing_vfs != num_vfs) err = ixgbe_disable_sriov(adapter); @@ -331,22 +315,14 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) * other values out of range. */ num_tc = netdev_get_num_tc(adapter->netdev); + num_rx_pools = adapter->num_rx_pools; + limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC : + (num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC; - if (num_tc > 4) { - if ((num_vfs + adapter->num_rx_pools) > IXGBE_MAX_VFS_8TC) { - e_dev_err("Currently the device is configured with %d TCs, Creating more than %d VFs is not allowed\n", num_tc, IXGBE_MAX_VFS_8TC); - return -EPERM; - } - } else if ((num_tc > 1) && (num_tc <= 4)) { - if ((num_vfs + adapter->num_rx_pools) > IXGBE_MAX_VFS_4TC) { - e_dev_err("Currently the device is configured with %d TCs, Creating more than %d VFs is not allowed\n", num_tc, IXGBE_MAX_VFS_4TC); - return -EPERM; - } - } else { - if ((num_vfs + adapter->num_rx_pools) > IXGBE_MAX_VFS_1TC) { - e_dev_err("Currently the device is configured with %d TCs, Creating more than %d VFs is not allowed\n", num_tc, IXGBE_MAX_VFS_1TC); - return -EPERM; - } + if (num_vfs > (limit - num_rx_pools)) { + e_dev_err("Currently configured with %d TCs, and %d offloaded macvlans. Creating more than %d VFs is not allowed\n", + num_tc, num_rx_pools - 1, limit - num_rx_pools); + return -EPERM; } err = __ixgbe_enable_sriov(adapter, num_vfs); @@ -378,13 +354,15 @@ static int ixgbe_pci_sriov_disable(struct pci_dev *dev) int err; #ifdef CONFIG_PCI_IOV u32 current_flags = adapter->flags; + int prev_num_vf = pci_num_vf(dev); #endif err = ixgbe_disable_sriov(adapter); /* Only reinit if no error and state changed */ #ifdef CONFIG_PCI_IOV - if (!err && current_flags != adapter->flags) + if (!err && (current_flags != adapter->flags || + prev_num_vf != pci_num_vf(dev))) ixgbe_sriov_reinit(adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index ffa0ee5cd0f5..21eb79ae3c30 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -235,6 +235,45 @@ struct ixgbe_thermal_sensor_data { struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS]; }; +#define NVM_OROM_OFFSET 0x17 +#define NVM_OROM_BLK_LOW 0x83 +#define NVM_OROM_BLK_HI 0x84 +#define NVM_OROM_PATCH_MASK 0xFF +#define NVM_OROM_SHIFT 8 + +#define NVM_VER_MASK 0x00FF /* version mask */ +#define NVM_VER_SHIFT 8 /* version bit shift */ +#define NVM_OEM_PROD_VER_PTR 0x1B /* OEM Product version block pointer */ +#define NVM_OEM_PROD_VER_CAP_OFF 0x1 /* OEM Product version format offset */ +#define NVM_OEM_PROD_VER_OFF_L 0x2 /* OEM Product version offset low */ +#define NVM_OEM_PROD_VER_OFF_H 0x3 /* OEM Product version offset high */ +#define NVM_OEM_PROD_VER_CAP_MASK 0xF /* OEM Product version cap mask */ +#define NVM_OEM_PROD_VER_MOD_LEN 0x3 /* OEM Product version module length */ +#define NVM_ETK_OFF_LOW 0x2D /* version low order word */ +#define NVM_ETK_OFF_HI 0x2E /* version high order word */ +#define NVM_ETK_SHIFT 16 /* high version word shift */ +#define NVM_VER_INVALID 0xFFFF +#define NVM_ETK_VALID 0x8000 +#define NVM_INVALID_PTR 0xFFFF +#define NVM_VER_SIZE 32 /* version sting size */ + +struct ixgbe_nvm_version { + u32 etk_id; + u8 nvm_major; + u16 nvm_minor; + u8 nvm_id; + + bool oem_valid; + u8 oem_major; + u8 oem_minor; + u16 oem_release; + + bool or_valid; + u8 or_major; + u16 or_build; + u8 or_patch; +}; + /* Interrupt Registers */ #define IXGBE_EICR 0x00800 #define IXGBE_EICS 0x00808 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 1f4a69134ade..573f743b556a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1896,10 +1896,6 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev) unsigned int flags = netdev->flags; int xcast_mode; - xcast_mode = (flags & IFF_ALLMULTI) ? IXGBEVF_XCAST_MODE_ALLMULTI : - (flags & (IFF_BROADCAST | IFF_MULTICAST)) ? - IXGBEVF_XCAST_MODE_MULTI : IXGBEVF_XCAST_MODE_NONE; - /* request the most inclusive mode we need */ if (flags & IFF_PROMISC) xcast_mode = IXGBEVF_XCAST_MODE_PROMISC; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index bf1f04164885..ebc1f566a4d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1094,12 +1094,21 @@ static int mlx4_en_set_ringparam(struct net_device *dev, if (param->rx_jumbo_pending || param->rx_mini_pending) return -EINVAL; + if (param->rx_pending < MLX4_EN_MIN_RX_SIZE) { + en_warn(priv, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + MLX4_EN_MIN_RX_SIZE); + return -EINVAL; + } + if (param->tx_pending < MLX4_EN_MIN_TX_SIZE) { + en_warn(priv, "%s: tx_pending (%d) < min (%lu)\n", + __func__, param->tx_pending, + MLX4_EN_MIN_TX_SIZE); + return -EINVAL; + } + rx_size = roundup_pow_of_two(param->rx_pending); - rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); - rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(param->tx_pending); - tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); - tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 8f05efa5c829..1554780d1810 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -296,7 +296,6 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct mlx5e_channels new_channels = {}; u32 rx_pending_wqes; u32 min_rq_size; - u32 max_rq_size; u8 log_rq_size; u8 log_sq_size; u32 num_mtts; @@ -315,8 +314,6 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, 1 << mlx5_min_log_rq_size(rq_wq_type)); - max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << mlx5_max_log_rq_size(rq_wq_type)); rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type, param->rx_pending); @@ -326,12 +323,6 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, min_rq_size); return -EINVAL; } - if (param->rx_pending > max_rq_size) { - netdev_info(priv->netdev, "%s: rx_pending (%d) > max (%d)\n", - __func__, param->rx_pending, - max_rq_size); - return -EINVAL; - } num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && @@ -347,12 +338,6 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); return -EINVAL; } - if (param->tx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE)) { - netdev_info(priv->netdev, "%s: tx_pending (%d) > max (%d)\n", - __func__, param->tx_pending, - 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE); - return -EINVAL; - } log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d373df7b11bd..aa96cc6a0fa6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4422,7 +4422,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; @@ -4550,6 +4553,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; @@ -4566,7 +4570,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a0adcd886589..346b8b688b6f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -366,6 +366,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index be657b8533f0..434b3922b34f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3228,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, { if (!removing) nh->should_offload = 1; - else if (nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7b8548e25ae7..593ad31be749 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, return NULL; } +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 857f67bebc6a..272c5962e4f7 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1675,33 +1675,24 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp) } } -static void __rtl8169_check_link_status(struct net_device *dev, - struct rtl8169_private *tp, - void __iomem *ioaddr, bool pm) +static void rtl8169_check_link_status(struct net_device *dev, + struct rtl8169_private *tp, + void __iomem *ioaddr) { if (tp->link_ok(ioaddr)) { rtl_link_chg_patch(tp); /* This is to cancel a scheduled suspend if there's one. */ - if (pm) - pm_request_resume(&tp->pci_dev->dev); + pm_request_resume(&tp->pci_dev->dev); netif_carrier_on(dev); if (net_ratelimit()) netif_info(tp, ifup, dev, "link up\n"); } else { netif_carrier_off(dev); netif_info(tp, ifdown, dev, "link down\n"); - if (pm) - pm_schedule_suspend(&tp->pci_dev->dev, 5000); + pm_runtime_idle(&tp->pci_dev->dev); } } -static void rtl8169_check_link_status(struct net_device *dev, - struct rtl8169_private *tp, - void __iomem *ioaddr) -{ - __rtl8169_check_link_status(dev, tp, ioaddr, false); -} - #define WAKE_ANY (WAKE_PHY | WAKE_MAGIC | WAKE_UCAST | WAKE_BCAST | WAKE_MCAST) static u32 __rtl8169_get_wol(struct rtl8169_private *tp) @@ -7746,7 +7737,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp) rtl8169_pcierr_interrupt(dev); if (status & LinkChg) - __rtl8169_check_link_status(dev, tp, tp->mmio_addr, true); + rtl8169_check_link_status(dev, tp, tp->mmio_addr); rtl_irq_enable_all(tp); } @@ -7959,7 +7950,7 @@ static int rtl_open(struct net_device *dev) rtl_unlock_work(tp); tp->saved_wolopts = 0; - pm_runtime_put_noidle(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); rtl8169_check_link_status(dev, tp, ioaddr); out: @@ -8103,8 +8094,10 @@ static int rtl8169_runtime_suspend(struct device *device) struct net_device *dev = pci_get_drvdata(pdev); struct rtl8169_private *tp = netdev_priv(dev); - if (!tp->TxDescArray) + if (!tp->TxDescArray) { + rtl_pll_power_down(tp); return 0; + } rtl_lock_work(tp); tp->saved_wolopts = __rtl8169_get_wol(tp); @@ -8146,9 +8139,11 @@ static int rtl8169_runtime_idle(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); - struct rtl8169_private *tp = netdev_priv(dev); - return tp->TxDescArray ? -EBUSY : 0; + if (!netif_running(dev) || !netif_carrier_ok(dev)) + pm_schedule_suspend(device, 10000); + + return -EBUSY; } static const struct dev_pm_ops rtl8169_pm_ops = { @@ -8195,9 +8190,6 @@ static void rtl_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct rtl8169_private *tp = netdev_priv(dev); - struct device *d = &pdev->dev; - - pm_runtime_get_sync(d); rtl8169_net_suspend(dev); @@ -8215,8 +8207,6 @@ static void rtl_shutdown(struct pci_dev *pdev) pci_wake_from_d3(pdev, true); pci_set_power_state(pdev, PCI_D3hot); } - - pm_runtime_put_noidle(d); } static void rtl_remove_one(struct pci_dev *pdev) @@ -8701,11 +8691,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl8168_driver_start(tp); } - if (pci_dev_run_wake(pdev)) - pm_runtime_put_noidle(&pdev->dev); - netif_carrier_off(dev); + if (pci_dev_run_wake(pdev)) + pm_runtime_put_sync(&pdev->dev); + return 0; } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d47bbbb22e7c..7aa1c12750b3 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, @@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, @@ -3225,18 +3225,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig index 3a1829e59e24..6bcfe27fc560 100644 --- a/drivers/net/ethernet/socionext/Kconfig +++ b/drivers/net/ethernet/socionext/Kconfig @@ -19,4 +19,16 @@ config SNI_AVE Driver for gigabit ethernet MACs, called AVE, in the Socionext UniPhier family. +config SNI_NETSEC + tristate "Socionext NETSEC ethernet support" + depends on (ARCH_SYNQUACER || COMPILE_TEST) && OF + select PHYLIB + select MII + ---help--- + Enable to add support for the SocioNext NetSec Gigabit Ethernet + controller + PHY, as found on the Synquacer SC2A11 SoC + + To compile this driver as a module, choose M here: the module will be + called netsec. If unsure, say N. + endif #NET_VENDOR_SOCIONEXT diff --git a/drivers/net/ethernet/socionext/Makefile b/drivers/net/ethernet/socionext/Makefile index ab83df63b670..7fd837a999fd 100644 --- a/drivers/net/ethernet/socionext/Makefile +++ b/drivers/net/ethernet/socionext/Makefile @@ -3,3 +3,4 @@ # Makefile for all ethernet ip drivers on Socionext platforms # obj-$(CONFIG_SNI_AVE) += sni_ave.o +obj-$(CONFIG_SNI_NETSEC) += netsec.o diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c new file mode 100644 index 000000000000..a8edcf387bba --- /dev/null +++ b/drivers/net/ethernet/socionext/netsec.c @@ -0,0 +1,1775 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/types.h> +#include <linux/clk.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/acpi.h> +#include <linux/of_mdio.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> + +#include <net/tcp.h> +#include <net/ip6_checksum.h> + +#define NETSEC_REG_SOFT_RST 0x104 +#define NETSEC_REG_COM_INIT 0x120 + +#define NETSEC_REG_TOP_STATUS 0x200 +#define NETSEC_IRQ_RX BIT(1) +#define NETSEC_IRQ_TX BIT(0) + +#define NETSEC_REG_TOP_INTEN 0x204 +#define NETSEC_REG_INTEN_SET 0x234 +#define NETSEC_REG_INTEN_CLR 0x238 + +#define NETSEC_REG_NRM_TX_STATUS 0x400 +#define NETSEC_REG_NRM_TX_INTEN 0x404 +#define NETSEC_REG_NRM_TX_INTEN_SET 0x428 +#define NETSEC_REG_NRM_TX_INTEN_CLR 0x42c +#define NRM_TX_ST_NTOWNR BIT(17) +#define NRM_TX_ST_TR_ERR BIT(16) +#define NRM_TX_ST_TXDONE BIT(15) +#define NRM_TX_ST_TMREXP BIT(14) + +#define NETSEC_REG_NRM_RX_STATUS 0x440 +#define NETSEC_REG_NRM_RX_INTEN 0x444 +#define NETSEC_REG_NRM_RX_INTEN_SET 0x468 +#define NETSEC_REG_NRM_RX_INTEN_CLR 0x46c +#define NRM_RX_ST_RC_ERR BIT(16) +#define NRM_RX_ST_PKTCNT BIT(15) +#define NRM_RX_ST_TMREXP BIT(14) + +#define NETSEC_REG_PKT_CMD_BUF 0xd0 + +#define NETSEC_REG_CLK_EN 0x100 + +#define NETSEC_REG_PKT_CTRL 0x140 + +#define NETSEC_REG_DMA_TMR_CTRL 0x20c +#define NETSEC_REG_F_TAIKI_MC_VER 0x22c +#define NETSEC_REG_F_TAIKI_VER 0x230 +#define NETSEC_REG_DMA_HM_CTRL 0x214 +#define NETSEC_REG_DMA_MH_CTRL 0x220 +#define NETSEC_REG_ADDR_DIS_CORE 0x218 +#define NETSEC_REG_DMAC_HM_CMD_BUF 0x210 +#define NETSEC_REG_DMAC_MH_CMD_BUF 0x21c + +#define NETSEC_REG_NRM_TX_PKTCNT 0x410 + +#define NETSEC_REG_NRM_TX_DONE_PKTCNT 0x414 +#define NETSEC_REG_NRM_TX_DONE_TXINT_PKTCNT 0x418 + +#define NETSEC_REG_NRM_TX_TMR 0x41c + +#define NETSEC_REG_NRM_RX_PKTCNT 0x454 +#define NETSEC_REG_NRM_RX_RXINT_PKTCNT 0x458 +#define NETSEC_REG_NRM_TX_TXINT_TMR 0x420 +#define NETSEC_REG_NRM_RX_RXINT_TMR 0x460 + +#define NETSEC_REG_NRM_RX_TMR 0x45c + +#define NETSEC_REG_NRM_TX_DESC_START_UP 0x434 +#define NETSEC_REG_NRM_TX_DESC_START_LW 0x408 +#define NETSEC_REG_NRM_RX_DESC_START_UP 0x474 +#define NETSEC_REG_NRM_RX_DESC_START_LW 0x448 + +#define NETSEC_REG_NRM_TX_CONFIG 0x430 +#define NETSEC_REG_NRM_RX_CONFIG 0x470 + +#define MAC_REG_STATUS 0x1024 +#define MAC_REG_DATA 0x11c0 +#define MAC_REG_CMD 0x11c4 +#define MAC_REG_FLOW_TH 0x11cc +#define MAC_REG_INTF_SEL 0x11d4 +#define MAC_REG_DESC_INIT 0x11fc +#define MAC_REG_DESC_SOFT_RST 0x1204 +#define NETSEC_REG_MODE_TRANS_COMP_STATUS 0x500 + +#define GMAC_REG_MCR 0x0000 +#define GMAC_REG_MFFR 0x0004 +#define GMAC_REG_GAR 0x0010 +#define GMAC_REG_GDR 0x0014 +#define GMAC_REG_FCR 0x0018 +#define GMAC_REG_BMR 0x1000 +#define GMAC_REG_RDLAR 0x100c +#define GMAC_REG_TDLAR 0x1010 +#define GMAC_REG_OMR 0x1018 + +#define MHZ(n) ((n) * 1000 * 1000) + +#define NETSEC_TX_SHIFT_OWN_FIELD 31 +#define NETSEC_TX_SHIFT_LD_FIELD 30 +#define NETSEC_TX_SHIFT_DRID_FIELD 24 +#define NETSEC_TX_SHIFT_PT_FIELD 21 +#define NETSEC_TX_SHIFT_TDRID_FIELD 16 +#define NETSEC_TX_SHIFT_CC_FIELD 15 +#define NETSEC_TX_SHIFT_FS_FIELD 9 +#define NETSEC_TX_LAST 8 +#define NETSEC_TX_SHIFT_CO 7 +#define NETSEC_TX_SHIFT_SO 6 +#define NETSEC_TX_SHIFT_TRS_FIELD 4 + +#define NETSEC_RX_PKT_OWN_FIELD 31 +#define NETSEC_RX_PKT_LD_FIELD 30 +#define NETSEC_RX_PKT_SDRID_FIELD 24 +#define NETSEC_RX_PKT_FR_FIELD 23 +#define NETSEC_RX_PKT_ER_FIELD 21 +#define NETSEC_RX_PKT_ERR_FIELD 16 +#define NETSEC_RX_PKT_TDRID_FIELD 12 +#define NETSEC_RX_PKT_FS_FIELD 9 +#define NETSEC_RX_PKT_LS_FIELD 8 +#define NETSEC_RX_PKT_CO_FIELD 6 + +#define NETSEC_RX_PKT_ERR_MASK 3 + +#define NETSEC_MAX_TX_PKT_LEN 1518 +#define NETSEC_MAX_TX_JUMBO_PKT_LEN 9018 + +#define NETSEC_RING_GMAC 15 +#define NETSEC_RING_MAX 2 + +#define NETSEC_TCP_SEG_LEN_MAX 1460 +#define NETSEC_TCP_JUMBO_SEG_LEN_MAX 8960 + +#define NETSEC_RX_CKSUM_NOTAVAIL 0 +#define NETSEC_RX_CKSUM_OK 1 +#define NETSEC_RX_CKSUM_NG 2 + +#define NETSEC_TOP_IRQ_REG_CODE_LOAD_END BIT(20) +#define NETSEC_IRQ_TRANSITION_COMPLETE BIT(4) + +#define NETSEC_MODE_TRANS_COMP_IRQ_N2T BIT(20) +#define NETSEC_MODE_TRANS_COMP_IRQ_T2N BIT(19) + +#define NETSEC_INT_PKTCNT_MAX 2047 + +#define NETSEC_FLOW_START_TH_MAX 95 +#define NETSEC_FLOW_STOP_TH_MAX 95 +#define NETSEC_FLOW_PAUSE_TIME_MIN 5 + +#define NETSEC_CLK_EN_REG_DOM_ALL 0x3f + +#define NETSEC_PKT_CTRL_REG_MODE_NRM BIT(28) +#define NETSEC_PKT_CTRL_REG_EN_JUMBO BIT(27) +#define NETSEC_PKT_CTRL_REG_LOG_CHKSUM_ER BIT(3) +#define NETSEC_PKT_CTRL_REG_LOG_HD_INCOMPLETE BIT(2) +#define NETSEC_PKT_CTRL_REG_LOG_HD_ER BIT(1) +#define NETSEC_PKT_CTRL_REG_DRP_NO_MATCH BIT(0) + +#define NETSEC_CLK_EN_REG_DOM_G BIT(5) +#define NETSEC_CLK_EN_REG_DOM_C BIT(1) +#define NETSEC_CLK_EN_REG_DOM_D BIT(0) + +#define NETSEC_COM_INIT_REG_DB BIT(2) +#define NETSEC_COM_INIT_REG_CLS BIT(1) +#define NETSEC_COM_INIT_REG_ALL (NETSEC_COM_INIT_REG_CLS | \ + NETSEC_COM_INIT_REG_DB) + +#define NETSEC_SOFT_RST_REG_RESET 0 +#define NETSEC_SOFT_RST_REG_RUN BIT(31) + +#define NETSEC_DMA_CTRL_REG_STOP 1 +#define MH_CTRL__MODE_TRANS BIT(20) + +#define NETSEC_GMAC_CMD_ST_READ 0 +#define NETSEC_GMAC_CMD_ST_WRITE BIT(28) +#define NETSEC_GMAC_CMD_ST_BUSY BIT(31) + +#define NETSEC_GMAC_BMR_REG_COMMON 0x00412080 +#define NETSEC_GMAC_BMR_REG_RESET 0x00020181 +#define NETSEC_GMAC_BMR_REG_SWR 0x00000001 + +#define NETSEC_GMAC_OMR_REG_ST BIT(13) +#define NETSEC_GMAC_OMR_REG_SR BIT(1) + +#define NETSEC_GMAC_MCR_REG_IBN BIT(30) +#define NETSEC_GMAC_MCR_REG_CST BIT(25) +#define NETSEC_GMAC_MCR_REG_JE BIT(20) +#define NETSEC_MCR_PS BIT(15) +#define NETSEC_GMAC_MCR_REG_FES BIT(14) +#define NETSEC_GMAC_MCR_REG_FULL_DUPLEX_COMMON 0x0000280c +#define NETSEC_GMAC_MCR_REG_HALF_DUPLEX_COMMON 0x0001a00c + +#define NETSEC_FCR_RFE BIT(2) +#define NETSEC_FCR_TFE BIT(1) + +#define NETSEC_GMAC_GAR_REG_GW BIT(1) +#define NETSEC_GMAC_GAR_REG_GB BIT(0) + +#define NETSEC_GMAC_GAR_REG_SHIFT_PA 11 +#define NETSEC_GMAC_GAR_REG_SHIFT_GR 6 +#define GMAC_REG_SHIFT_CR_GAR 2 + +#define NETSEC_GMAC_GAR_REG_CR_25_35_MHZ 2 +#define NETSEC_GMAC_GAR_REG_CR_35_60_MHZ 3 +#define NETSEC_GMAC_GAR_REG_CR_60_100_MHZ 0 +#define NETSEC_GMAC_GAR_REG_CR_100_150_MHZ 1 +#define NETSEC_GMAC_GAR_REG_CR_150_250_MHZ 4 +#define NETSEC_GMAC_GAR_REG_CR_250_300_MHZ 5 + +#define NETSEC_GMAC_RDLAR_REG_COMMON 0x18000 +#define NETSEC_GMAC_TDLAR_REG_COMMON 0x1c000 + +#define NETSEC_REG_NETSEC_VER_F_TAIKI 0x50000 + +#define NETSEC_REG_DESC_RING_CONFIG_CFG_UP BIT(31) +#define NETSEC_REG_DESC_RING_CONFIG_CH_RST BIT(30) +#define NETSEC_REG_DESC_TMR_MODE 4 +#define NETSEC_REG_DESC_ENDIAN 0 + +#define NETSEC_MAC_DESC_SOFT_RST_SOFT_RST 1 +#define NETSEC_MAC_DESC_INIT_REG_INIT 1 + +#define NETSEC_EEPROM_MAC_ADDRESS 0x00 +#define NETSEC_EEPROM_HM_ME_ADDRESS_H 0x08 +#define NETSEC_EEPROM_HM_ME_ADDRESS_L 0x0C +#define NETSEC_EEPROM_HM_ME_SIZE 0x10 +#define NETSEC_EEPROM_MH_ME_ADDRESS_H 0x14 +#define NETSEC_EEPROM_MH_ME_ADDRESS_L 0x18 +#define NETSEC_EEPROM_MH_ME_SIZE 0x1C +#define NETSEC_EEPROM_PKT_ME_ADDRESS 0x20 +#define NETSEC_EEPROM_PKT_ME_SIZE 0x24 + +#define DESC_NUM 128 +#define NAPI_BUDGET (DESC_NUM / 2) + +#define DESC_SZ sizeof(struct netsec_de) + +#define NETSEC_F_NETSEC_VER_MAJOR_NUM(x) ((x) & 0xffff0000) + +enum ring_id { + NETSEC_RING_TX = 0, + NETSEC_RING_RX +}; + +struct netsec_desc { + struct sk_buff *skb; + dma_addr_t dma_addr; + void *addr; + u16 len; +}; + +struct netsec_desc_ring { + phys_addr_t desc_phys; + struct netsec_desc *desc; + void *vaddr; + u16 pkt_cnt; + u16 head, tail; +}; + +struct netsec_priv { + struct netsec_desc_ring desc_ring[NETSEC_RING_MAX]; + struct ethtool_coalesce et_coalesce; + spinlock_t reglock; /* protect reg access */ + struct napi_struct napi; + phy_interface_t phy_interface; + struct net_device *ndev; + struct device_node *phy_np; + struct phy_device *phydev; + struct mii_bus *mii_bus; + void __iomem *ioaddr; + void __iomem *eeprom_base; + struct device *dev; + struct clk *clk; + u32 msg_enable; + u32 freq; + bool rx_cksum_offload_flag; +}; + +struct netsec_de { /* Netsec Descriptor layout */ + u32 attr; + u32 data_buf_addr_up; + u32 data_buf_addr_lw; + u32 buf_len_info; +}; + +struct netsec_tx_pkt_ctrl { + u16 tcp_seg_len; + bool tcp_seg_offload_flag; + bool cksum_offload_flag; +}; + +struct netsec_rx_pkt_info { + int rx_cksum_result; + int err_code; + bool err_flag; +}; + +static void netsec_write(struct netsec_priv *priv, u32 reg_addr, u32 val) +{ + writel(val, priv->ioaddr + reg_addr); +} + +static u32 netsec_read(struct netsec_priv *priv, u32 reg_addr) +{ + return readl(priv->ioaddr + reg_addr); +} + +/************* MDIO BUS OPS FOLLOW *************/ + +#define TIMEOUT_SPINS_MAC 1000 +#define TIMEOUT_SECONDARY_MS_MAC 100 + +static u32 netsec_clk_type(u32 freq) +{ + if (freq < MHZ(35)) + return NETSEC_GMAC_GAR_REG_CR_25_35_MHZ; + if (freq < MHZ(60)) + return NETSEC_GMAC_GAR_REG_CR_35_60_MHZ; + if (freq < MHZ(100)) + return NETSEC_GMAC_GAR_REG_CR_60_100_MHZ; + if (freq < MHZ(150)) + return NETSEC_GMAC_GAR_REG_CR_100_150_MHZ; + if (freq < MHZ(250)) + return NETSEC_GMAC_GAR_REG_CR_150_250_MHZ; + + return NETSEC_GMAC_GAR_REG_CR_250_300_MHZ; +} + +static int netsec_wait_while_busy(struct netsec_priv *priv, u32 addr, u32 mask) +{ + u32 timeout = TIMEOUT_SPINS_MAC; + + while (--timeout && netsec_read(priv, addr) & mask) + cpu_relax(); + if (timeout) + return 0; + + timeout = TIMEOUT_SECONDARY_MS_MAC; + while (--timeout && netsec_read(priv, addr) & mask) + usleep_range(1000, 2000); + + if (timeout) + return 0; + + netdev_WARN(priv->ndev, "%s: timeout\n", __func__); + + return -ETIMEDOUT; +} + +static int netsec_mac_write(struct netsec_priv *priv, u32 addr, u32 value) +{ + netsec_write(priv, MAC_REG_DATA, value); + netsec_write(priv, MAC_REG_CMD, addr | NETSEC_GMAC_CMD_ST_WRITE); + return netsec_wait_while_busy(priv, + MAC_REG_CMD, NETSEC_GMAC_CMD_ST_BUSY); +} + +static int netsec_mac_read(struct netsec_priv *priv, u32 addr, u32 *read) +{ + int ret; + + netsec_write(priv, MAC_REG_CMD, addr | NETSEC_GMAC_CMD_ST_READ); + ret = netsec_wait_while_busy(priv, + MAC_REG_CMD, NETSEC_GMAC_CMD_ST_BUSY); + if (ret) + return ret; + + *read = netsec_read(priv, MAC_REG_DATA); + + return 0; +} + +static int netsec_mac_wait_while_busy(struct netsec_priv *priv, + u32 addr, u32 mask) +{ + u32 timeout = TIMEOUT_SPINS_MAC; + int ret, data; + + do { + ret = netsec_mac_read(priv, addr, &data); + if (ret) + break; + cpu_relax(); + } while (--timeout && (data & mask)); + + if (timeout) + return 0; + + timeout = TIMEOUT_SECONDARY_MS_MAC; + do { + usleep_range(1000, 2000); + + ret = netsec_mac_read(priv, addr, &data); + if (ret) + break; + cpu_relax(); + } while (--timeout && (data & mask)); + + if (timeout && !ret) + return 0; + + netdev_WARN(priv->ndev, "%s: timeout\n", __func__); + + return -ETIMEDOUT; +} + +static int netsec_mac_update_to_phy_state(struct netsec_priv *priv) +{ + struct phy_device *phydev = priv->ndev->phydev; + u32 value = 0; + + value = phydev->duplex ? NETSEC_GMAC_MCR_REG_FULL_DUPLEX_COMMON : + NETSEC_GMAC_MCR_REG_HALF_DUPLEX_COMMON; + + if (phydev->speed != SPEED_1000) + value |= NETSEC_MCR_PS; + + if (priv->phy_interface != PHY_INTERFACE_MODE_GMII && + phydev->speed == SPEED_100) + value |= NETSEC_GMAC_MCR_REG_FES; + + value |= NETSEC_GMAC_MCR_REG_CST | NETSEC_GMAC_MCR_REG_JE; + + if (phy_interface_mode_is_rgmii(priv->phy_interface)) + value |= NETSEC_GMAC_MCR_REG_IBN; + + if (netsec_mac_write(priv, GMAC_REG_MCR, value)) + return -ETIMEDOUT; + + return 0; +} + +static int netsec_phy_write(struct mii_bus *bus, + int phy_addr, int reg, u16 val) +{ + struct netsec_priv *priv = bus->priv; + + if (netsec_mac_write(priv, GMAC_REG_GDR, val)) + return -ETIMEDOUT; + if (netsec_mac_write(priv, GMAC_REG_GAR, + phy_addr << NETSEC_GMAC_GAR_REG_SHIFT_PA | + reg << NETSEC_GMAC_GAR_REG_SHIFT_GR | + NETSEC_GMAC_GAR_REG_GW | NETSEC_GMAC_GAR_REG_GB | + (netsec_clk_type(priv->freq) << + GMAC_REG_SHIFT_CR_GAR))) + return -ETIMEDOUT; + + return netsec_mac_wait_while_busy(priv, GMAC_REG_GAR, + NETSEC_GMAC_GAR_REG_GB); +} + +static int netsec_phy_read(struct mii_bus *bus, int phy_addr, int reg_addr) +{ + struct netsec_priv *priv = bus->priv; + u32 data; + int ret; + + if (netsec_mac_write(priv, GMAC_REG_GAR, NETSEC_GMAC_GAR_REG_GB | + phy_addr << NETSEC_GMAC_GAR_REG_SHIFT_PA | + reg_addr << NETSEC_GMAC_GAR_REG_SHIFT_GR | + (netsec_clk_type(priv->freq) << + GMAC_REG_SHIFT_CR_GAR))) + return -ETIMEDOUT; + + ret = netsec_mac_wait_while_busy(priv, GMAC_REG_GAR, + NETSEC_GMAC_GAR_REG_GB); + if (ret) + return ret; + + ret = netsec_mac_read(priv, GMAC_REG_GDR, &data); + if (ret) + return ret; + + return data; +} + +/************* ETHTOOL_OPS FOLLOW *************/ + +static void netsec_et_get_drvinfo(struct net_device *net_device, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, "netsec", sizeof(info->driver)); + strlcpy(info->bus_info, dev_name(net_device->dev.parent), + sizeof(info->bus_info)); +} + +static int netsec_et_get_coalesce(struct net_device *net_device, + struct ethtool_coalesce *et_coalesce) +{ + struct netsec_priv *priv = netdev_priv(net_device); + + *et_coalesce = priv->et_coalesce; + + return 0; +} + +static int netsec_et_set_coalesce(struct net_device *net_device, + struct ethtool_coalesce *et_coalesce) +{ + struct netsec_priv *priv = netdev_priv(net_device); + + priv->et_coalesce = *et_coalesce; + + if (priv->et_coalesce.tx_coalesce_usecs < 50) + priv->et_coalesce.tx_coalesce_usecs = 50; + if (priv->et_coalesce.tx_max_coalesced_frames < 1) + priv->et_coalesce.tx_max_coalesced_frames = 1; + + netsec_write(priv, NETSEC_REG_NRM_TX_DONE_TXINT_PKTCNT, + priv->et_coalesce.tx_max_coalesced_frames); + netsec_write(priv, NETSEC_REG_NRM_TX_TXINT_TMR, + priv->et_coalesce.tx_coalesce_usecs); + netsec_write(priv, NETSEC_REG_NRM_TX_INTEN_SET, NRM_TX_ST_TXDONE); + netsec_write(priv, NETSEC_REG_NRM_TX_INTEN_SET, NRM_TX_ST_TMREXP); + + if (priv->et_coalesce.rx_coalesce_usecs < 50) + priv->et_coalesce.rx_coalesce_usecs = 50; + if (priv->et_coalesce.rx_max_coalesced_frames < 1) + priv->et_coalesce.rx_max_coalesced_frames = 1; + + netsec_write(priv, NETSEC_REG_NRM_RX_RXINT_PKTCNT, + priv->et_coalesce.rx_max_coalesced_frames); + netsec_write(priv, NETSEC_REG_NRM_RX_RXINT_TMR, + priv->et_coalesce.rx_coalesce_usecs); + netsec_write(priv, NETSEC_REG_NRM_RX_INTEN_SET, NRM_RX_ST_PKTCNT); + netsec_write(priv, NETSEC_REG_NRM_RX_INTEN_SET, NRM_RX_ST_TMREXP); + + return 0; +} + +static u32 netsec_et_get_msglevel(struct net_device *dev) +{ + struct netsec_priv *priv = netdev_priv(dev); + + return priv->msg_enable; +} + +static void netsec_et_set_msglevel(struct net_device *dev, u32 datum) +{ + struct netsec_priv *priv = netdev_priv(dev); + + priv->msg_enable = datum; +} + +static const struct ethtool_ops netsec_ethtool_ops = { + .get_drvinfo = netsec_et_get_drvinfo, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_link = ethtool_op_get_link, + .get_coalesce = netsec_et_get_coalesce, + .set_coalesce = netsec_et_set_coalesce, + .get_msglevel = netsec_et_get_msglevel, + .set_msglevel = netsec_et_set_msglevel, +}; + +/************* NETDEV_OPS FOLLOW *************/ + +static struct sk_buff *netsec_alloc_skb(struct netsec_priv *priv, + struct netsec_desc *desc) +{ + struct sk_buff *skb; + + if (device_get_dma_attr(priv->dev) == DEV_DMA_COHERENT) { + skb = netdev_alloc_skb_ip_align(priv->ndev, desc->len); + } else { + desc->len = L1_CACHE_ALIGN(desc->len); + skb = netdev_alloc_skb(priv->ndev, desc->len); + } + if (!skb) + return NULL; + + desc->addr = skb->data; + desc->dma_addr = dma_map_single(priv->dev, desc->addr, desc->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(priv->dev, desc->dma_addr)) { + dev_kfree_skb_any(skb); + return NULL; + } + return skb; +} + +static void netsec_set_rx_de(struct netsec_priv *priv, + struct netsec_desc_ring *dring, u16 idx, + const struct netsec_desc *desc, + struct sk_buff *skb) +{ + struct netsec_de *de = dring->vaddr + DESC_SZ * idx; + u32 attr = (1 << NETSEC_RX_PKT_OWN_FIELD) | + (1 << NETSEC_RX_PKT_FS_FIELD) | + (1 << NETSEC_RX_PKT_LS_FIELD); + + if (idx == DESC_NUM - 1) + attr |= (1 << NETSEC_RX_PKT_LD_FIELD); + + de->data_buf_addr_up = upper_32_bits(desc->dma_addr); + de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); + de->buf_len_info = desc->len; + de->attr = attr; + dma_wmb(); + + dring->desc[idx].dma_addr = desc->dma_addr; + dring->desc[idx].addr = desc->addr; + dring->desc[idx].len = desc->len; + dring->desc[idx].skb = skb; +} + +static struct sk_buff *netsec_get_rx_de(struct netsec_priv *priv, + struct netsec_desc_ring *dring, + u16 idx, + struct netsec_rx_pkt_info *rxpi, + struct netsec_desc *desc, u16 *len) +{ + struct netsec_de de = {}; + + memcpy(&de, dring->vaddr + DESC_SZ * idx, DESC_SZ); + + *len = de.buf_len_info >> 16; + + rxpi->err_flag = (de.attr >> NETSEC_RX_PKT_ER_FIELD) & 1; + rxpi->rx_cksum_result = (de.attr >> NETSEC_RX_PKT_CO_FIELD) & 3; + rxpi->err_code = (de.attr >> NETSEC_RX_PKT_ERR_FIELD) & + NETSEC_RX_PKT_ERR_MASK; + *desc = dring->desc[idx]; + return desc->skb; +} + +static struct sk_buff *netsec_get_rx_pkt_data(struct netsec_priv *priv, + struct netsec_rx_pkt_info *rxpi, + struct netsec_desc *desc, + u16 *len) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; + struct sk_buff *tmp_skb, *skb = NULL; + struct netsec_desc td; + int tail; + + *rxpi = (struct netsec_rx_pkt_info){}; + + td.len = priv->ndev->mtu + 22; + + tmp_skb = netsec_alloc_skb(priv, &td); + + dma_rmb(); + + tail = dring->tail; + + if (!tmp_skb) { + netsec_set_rx_de(priv, dring, tail, &dring->desc[tail], + dring->desc[tail].skb); + } else { + skb = netsec_get_rx_de(priv, dring, tail, rxpi, desc, len); + netsec_set_rx_de(priv, dring, tail, &td, tmp_skb); + } + + /* move tail ahead */ + dring->tail = (dring->tail + 1) % DESC_NUM; + + dring->pkt_cnt--; + + return skb; +} + +static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; + unsigned int pkts, bytes; + + dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); + + if (dring->pkt_cnt < budget) + budget = dring->pkt_cnt; + + pkts = 0; + bytes = 0; + + while (pkts < budget) { + struct netsec_desc *desc; + struct netsec_de *entry; + int tail, eop; + + tail = dring->tail; + + /* move tail ahead */ + dring->tail = (tail + 1) % DESC_NUM; + + desc = &dring->desc[tail]; + entry = dring->vaddr + DESC_SZ * tail; + + eop = (entry->attr >> NETSEC_TX_LAST) & 1; + + dma_unmap_single(priv->dev, desc->dma_addr, desc->len, + DMA_TO_DEVICE); + if (eop) { + pkts++; + bytes += desc->skb->len; + dev_kfree_skb(desc->skb); + } + *desc = (struct netsec_desc){}; + } + dring->pkt_cnt -= budget; + + priv->ndev->stats.tx_packets += budget; + priv->ndev->stats.tx_bytes += bytes; + + netdev_completed_queue(priv->ndev, budget, bytes); + + return budget; +} + +static int netsec_process_tx(struct netsec_priv *priv, int budget) +{ + struct net_device *ndev = priv->ndev; + int new, done = 0; + + do { + new = netsec_clean_tx_dring(priv, budget); + done += new; + budget -= new; + } while (new); + + if (done && netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + + return done; +} + +static int netsec_process_rx(struct netsec_priv *priv, int budget) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; + struct net_device *ndev = priv->ndev; + struct netsec_rx_pkt_info rx_info; + int done = 0, rx_num = 0; + struct netsec_desc desc; + struct sk_buff *skb; + u16 len; + + while (done < budget) { + if (!rx_num) { + rx_num = netsec_read(priv, NETSEC_REG_NRM_RX_PKTCNT); + dring->pkt_cnt += rx_num; + + /* move head 'rx_num' */ + dring->head = (dring->head + rx_num) % DESC_NUM; + + rx_num = dring->pkt_cnt; + if (!rx_num) + break; + } + done++; + rx_num--; + skb = netsec_get_rx_pkt_data(priv, &rx_info, &desc, &len); + if (unlikely(!skb) || rx_info.err_flag) { + netif_err(priv, drv, priv->ndev, + "%s: rx fail err(%d)\n", + __func__, rx_info.err_code); + ndev->stats.rx_dropped++; + continue; + } + + dma_unmap_single(priv->dev, desc.dma_addr, desc.len, + DMA_FROM_DEVICE); + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, priv->ndev); + + if (priv->rx_cksum_offload_flag && + rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) { + ndev->stats.rx_packets++; + ndev->stats.rx_bytes += len; + } + } + + return done; +} + +static int netsec_napi_poll(struct napi_struct *napi, int budget) +{ + struct netsec_priv *priv; + struct net_device *ndev; + int tx, rx, done, todo; + + priv = container_of(napi, struct netsec_priv, napi); + ndev = priv->ndev; + + todo = budget; + do { + if (!todo) + break; + + tx = netsec_process_tx(priv, todo); + todo -= tx; + + if (!todo) + break; + + rx = netsec_process_rx(priv, todo); + todo -= rx; + } while (rx || tx); + + done = budget - todo; + + if (done < budget && napi_complete_done(napi, done)) { + unsigned long flags; + + spin_lock_irqsave(&priv->reglock, flags); + netsec_write(priv, NETSEC_REG_INTEN_SET, + NETSEC_IRQ_RX | NETSEC_IRQ_TX); + spin_unlock_irqrestore(&priv->reglock, flags); + } + + return done; +} + +static void netsec_set_tx_de(struct netsec_priv *priv, + struct netsec_desc_ring *dring, + const struct netsec_tx_pkt_ctrl *tx_ctrl, + const struct netsec_desc *desc, + struct sk_buff *skb) +{ + int idx = dring->head; + struct netsec_de *de; + u32 attr; + + de = dring->vaddr + (DESC_SZ * idx); + + attr = (1 << NETSEC_TX_SHIFT_OWN_FIELD) | + (1 << NETSEC_TX_SHIFT_PT_FIELD) | + (NETSEC_RING_GMAC << NETSEC_TX_SHIFT_TDRID_FIELD) | + (1 << NETSEC_TX_SHIFT_FS_FIELD) | + (1 << NETSEC_TX_LAST) | + (tx_ctrl->cksum_offload_flag << NETSEC_TX_SHIFT_CO) | + (tx_ctrl->tcp_seg_offload_flag << NETSEC_TX_SHIFT_SO) | + (1 << NETSEC_TX_SHIFT_TRS_FIELD); + if (idx == DESC_NUM - 1) + attr |= (1 << NETSEC_TX_SHIFT_LD_FIELD); + + de->data_buf_addr_up = upper_32_bits(desc->dma_addr); + de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); + de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len; + de->attr = attr; + dma_wmb(); + + dring->desc[idx] = *desc; + dring->desc[idx].skb = skb; + + /* move head ahead */ + dring->head = (dring->head + 1) % DESC_NUM; +} + +static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct netsec_priv *priv = netdev_priv(ndev); + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; + struct netsec_tx_pkt_ctrl tx_ctrl = {}; + struct netsec_desc tx_desc; + u16 tso_seg_len = 0; + int filled; + + /* differentiate between full/emtpy ring */ + if (dring->head >= dring->tail) + filled = dring->head - dring->tail; + else + filled = dring->head + DESC_NUM - dring->tail; + + if (DESC_NUM - filled < 2) { /* if less than 2 available */ + netif_err(priv, drv, priv->ndev, "%s: TxQFull!\n", __func__); + netif_stop_queue(priv->ndev); + dma_wmb(); + return NETDEV_TX_BUSY; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_ctrl.cksum_offload_flag = true; + + if (skb_is_gso(skb)) + tso_seg_len = skb_shinfo(skb)->gso_size; + + if (tso_seg_len > 0) { + if (skb->protocol == htons(ETH_P_IP)) { + ip_hdr(skb)->tot_len = 0; + tcp_hdr(skb)->check = + ~tcp_v4_check(0, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0); + } else { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + } + + tx_ctrl.tcp_seg_offload_flag = true; + tx_ctrl.tcp_seg_len = tso_seg_len; + } + + tx_desc.dma_addr = dma_map_single(priv->dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) { + netif_err(priv, drv, priv->ndev, + "%s: DMA mapping failed\n", __func__); + ndev->stats.tx_dropped++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + tx_desc.addr = skb->data; + tx_desc.len = skb_headlen(skb); + + skb_tx_timestamp(skb); + netdev_sent_queue(priv->ndev, skb->len); + + netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb); + netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */ + + return NETDEV_TX_OK; +} + +static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[id]; + struct netsec_desc *desc; + u16 idx; + + if (!dring->vaddr || !dring->desc) + return; + + for (idx = 0; idx < DESC_NUM; idx++) { + desc = &dring->desc[idx]; + if (!desc->addr) + continue; + + dma_unmap_single(priv->dev, desc->dma_addr, desc->len, + id == NETSEC_RING_RX ? DMA_FROM_DEVICE : + DMA_TO_DEVICE); + dev_kfree_skb(desc->skb); + } + + memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM); + memset(dring->vaddr, 0, DESC_SZ * DESC_NUM); + + dring->head = 0; + dring->tail = 0; + dring->pkt_cnt = 0; +} + +static void netsec_free_dring(struct netsec_priv *priv, int id) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[id]; + + if (dring->vaddr) { + dma_free_coherent(priv->dev, DESC_SZ * DESC_NUM, + dring->vaddr, dring->desc_phys); + dring->vaddr = NULL; + } + + kfree(dring->desc); + dring->desc = NULL; +} + +static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[id]; + int ret = 0; + + dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM, + &dring->desc_phys, GFP_KERNEL); + if (!dring->vaddr) { + ret = -ENOMEM; + goto err; + } + + dring->desc = kzalloc(DESC_NUM * sizeof(*dring->desc), GFP_KERNEL); + if (!dring->desc) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + netsec_free_dring(priv, id); + + return ret; +} + +static int netsec_setup_rx_dring(struct netsec_priv *priv) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; + struct netsec_desc desc; + struct sk_buff *skb; + int n; + + desc.len = priv->ndev->mtu + 22; + + for (n = 0; n < DESC_NUM; n++) { + skb = netsec_alloc_skb(priv, &desc); + if (!skb) { + netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); + return -ENOMEM; + } + netsec_set_rx_de(priv, dring, n, &desc, skb); + } + + return 0; +} + +static int netsec_netdev_load_ucode_region(struct netsec_priv *priv, u32 reg, + u32 addr_h, u32 addr_l, u32 size) +{ + u64 base = (u64)addr_h << 32 | addr_l; + void __iomem *ucode; + u32 i; + + ucode = ioremap(base, size * sizeof(u32)); + if (!ucode) + return -ENOMEM; + + for (i = 0; i < size; i++) + netsec_write(priv, reg, readl(ucode + i * 4)); + + iounmap(ucode); + return 0; +} + +static int netsec_netdev_load_microcode(struct netsec_priv *priv) +{ + u32 addr_h, addr_l, size; + int err; + + addr_h = readl(priv->eeprom_base + NETSEC_EEPROM_HM_ME_ADDRESS_H); + addr_l = readl(priv->eeprom_base + NETSEC_EEPROM_HM_ME_ADDRESS_L); + size = readl(priv->eeprom_base + NETSEC_EEPROM_HM_ME_SIZE); + err = netsec_netdev_load_ucode_region(priv, NETSEC_REG_DMAC_HM_CMD_BUF, + addr_h, addr_l, size); + if (err) + return err; + + addr_h = readl(priv->eeprom_base + NETSEC_EEPROM_MH_ME_ADDRESS_H); + addr_l = readl(priv->eeprom_base + NETSEC_EEPROM_MH_ME_ADDRESS_L); + size = readl(priv->eeprom_base + NETSEC_EEPROM_MH_ME_SIZE); + err = netsec_netdev_load_ucode_region(priv, NETSEC_REG_DMAC_MH_CMD_BUF, + addr_h, addr_l, size); + if (err) + return err; + + addr_h = 0; + addr_l = readl(priv->eeprom_base + NETSEC_EEPROM_PKT_ME_ADDRESS); + size = readl(priv->eeprom_base + NETSEC_EEPROM_PKT_ME_SIZE); + err = netsec_netdev_load_ucode_region(priv, NETSEC_REG_PKT_CMD_BUF, + addr_h, addr_l, size); + if (err) + return err; + + return 0; +} + +static int netsec_reset_hardware(struct netsec_priv *priv) +{ + u32 value; + int err; + + /* stop DMA engines */ + if (!netsec_read(priv, NETSEC_REG_ADDR_DIS_CORE)) { + netsec_write(priv, NETSEC_REG_DMA_HM_CTRL, + NETSEC_DMA_CTRL_REG_STOP); + netsec_write(priv, NETSEC_REG_DMA_MH_CTRL, + NETSEC_DMA_CTRL_REG_STOP); + + while (netsec_read(priv, NETSEC_REG_DMA_HM_CTRL) & + NETSEC_DMA_CTRL_REG_STOP) + cpu_relax(); + + while (netsec_read(priv, NETSEC_REG_DMA_MH_CTRL) & + NETSEC_DMA_CTRL_REG_STOP) + cpu_relax(); + } + + netsec_write(priv, NETSEC_REG_SOFT_RST, NETSEC_SOFT_RST_REG_RESET); + netsec_write(priv, NETSEC_REG_SOFT_RST, NETSEC_SOFT_RST_REG_RUN); + netsec_write(priv, NETSEC_REG_COM_INIT, NETSEC_COM_INIT_REG_ALL); + + while (netsec_read(priv, NETSEC_REG_COM_INIT) != 0) + cpu_relax(); + + /* set desc_start addr */ + netsec_write(priv, NETSEC_REG_NRM_RX_DESC_START_UP, + upper_32_bits(priv->desc_ring[NETSEC_RING_RX].desc_phys)); + netsec_write(priv, NETSEC_REG_NRM_RX_DESC_START_LW, + lower_32_bits(priv->desc_ring[NETSEC_RING_RX].desc_phys)); + + netsec_write(priv, NETSEC_REG_NRM_TX_DESC_START_UP, + upper_32_bits(priv->desc_ring[NETSEC_RING_TX].desc_phys)); + netsec_write(priv, NETSEC_REG_NRM_TX_DESC_START_LW, + lower_32_bits(priv->desc_ring[NETSEC_RING_TX].desc_phys)); + + /* set normal tx dring ring config */ + netsec_write(priv, NETSEC_REG_NRM_TX_CONFIG, + 1 << NETSEC_REG_DESC_ENDIAN); + netsec_write(priv, NETSEC_REG_NRM_RX_CONFIG, + 1 << NETSEC_REG_DESC_ENDIAN); + + err = netsec_netdev_load_microcode(priv); + if (err) { + netif_err(priv, probe, priv->ndev, + "%s: failed to load microcode (%d)\n", __func__, err); + return err; + } + + /* start DMA engines */ + netsec_write(priv, NETSEC_REG_DMA_TMR_CTRL, priv->freq / 1000000 - 1); + netsec_write(priv, NETSEC_REG_ADDR_DIS_CORE, 0); + + usleep_range(1000, 2000); + + if (!(netsec_read(priv, NETSEC_REG_TOP_STATUS) & + NETSEC_TOP_IRQ_REG_CODE_LOAD_END)) { + netif_err(priv, probe, priv->ndev, + "microengine start failed\n"); + return -ENXIO; + } + netsec_write(priv, NETSEC_REG_TOP_STATUS, + NETSEC_TOP_IRQ_REG_CODE_LOAD_END); + + value = NETSEC_PKT_CTRL_REG_MODE_NRM; + if (priv->ndev->mtu > ETH_DATA_LEN) + value |= NETSEC_PKT_CTRL_REG_EN_JUMBO; + + /* change to normal mode */ + netsec_write(priv, NETSEC_REG_DMA_MH_CTRL, MH_CTRL__MODE_TRANS); + netsec_write(priv, NETSEC_REG_PKT_CTRL, value); + + while ((netsec_read(priv, NETSEC_REG_MODE_TRANS_COMP_STATUS) & + NETSEC_MODE_TRANS_COMP_IRQ_T2N) == 0) + cpu_relax(); + + /* clear any pending EMPTY/ERR irq status */ + netsec_write(priv, NETSEC_REG_NRM_TX_STATUS, ~0); + + /* Disable TX & RX intr */ + netsec_write(priv, NETSEC_REG_INTEN_CLR, ~0); + + return 0; +} + +static int netsec_start_gmac(struct netsec_priv *priv) +{ + struct phy_device *phydev = priv->ndev->phydev; + u32 value = 0; + int ret; + + if (phydev->speed != SPEED_1000) + value = (NETSEC_GMAC_MCR_REG_CST | + NETSEC_GMAC_MCR_REG_HALF_DUPLEX_COMMON); + + if (netsec_mac_write(priv, GMAC_REG_MCR, value)) + return -ETIMEDOUT; + if (netsec_mac_write(priv, GMAC_REG_BMR, + NETSEC_GMAC_BMR_REG_RESET)) + return -ETIMEDOUT; + + /* Wait soft reset */ + usleep_range(1000, 5000); + + ret = netsec_mac_read(priv, GMAC_REG_BMR, &value); + if (ret) + return ret; + if (value & NETSEC_GMAC_BMR_REG_SWR) + return -EAGAIN; + + netsec_write(priv, MAC_REG_DESC_SOFT_RST, 1); + if (netsec_wait_while_busy(priv, MAC_REG_DESC_SOFT_RST, 1)) + return -ETIMEDOUT; + + netsec_write(priv, MAC_REG_DESC_INIT, 1); + if (netsec_wait_while_busy(priv, MAC_REG_DESC_INIT, 1)) + return -ETIMEDOUT; + + if (netsec_mac_write(priv, GMAC_REG_BMR, + NETSEC_GMAC_BMR_REG_COMMON)) + return -ETIMEDOUT; + if (netsec_mac_write(priv, GMAC_REG_RDLAR, + NETSEC_GMAC_RDLAR_REG_COMMON)) + return -ETIMEDOUT; + if (netsec_mac_write(priv, GMAC_REG_TDLAR, + NETSEC_GMAC_TDLAR_REG_COMMON)) + return -ETIMEDOUT; + if (netsec_mac_write(priv, GMAC_REG_MFFR, 0x80000001)) + return -ETIMEDOUT; + + ret = netsec_mac_update_to_phy_state(priv); + if (ret) + return ret; + + ret = netsec_mac_read(priv, GMAC_REG_OMR, &value); + if (ret) + return ret; + + value |= NETSEC_GMAC_OMR_REG_SR; + value |= NETSEC_GMAC_OMR_REG_ST; + + netsec_write(priv, NETSEC_REG_NRM_RX_INTEN_CLR, ~0); + netsec_write(priv, NETSEC_REG_NRM_TX_INTEN_CLR, ~0); + + netsec_et_set_coalesce(priv->ndev, &priv->et_coalesce); + + if (netsec_mac_write(priv, GMAC_REG_OMR, value)) + return -ETIMEDOUT; + + return 0; +} + +static int netsec_stop_gmac(struct netsec_priv *priv) +{ + u32 value; + int ret; + + ret = netsec_mac_read(priv, GMAC_REG_OMR, &value); + if (ret) + return ret; + value &= ~NETSEC_GMAC_OMR_REG_SR; + value &= ~NETSEC_GMAC_OMR_REG_ST; + + /* disable all interrupts */ + netsec_write(priv, NETSEC_REG_NRM_RX_INTEN_CLR, ~0); + netsec_write(priv, NETSEC_REG_NRM_TX_INTEN_CLR, ~0); + + return netsec_mac_write(priv, GMAC_REG_OMR, value); +} + +static void netsec_phy_adjust_link(struct net_device *ndev) +{ + struct netsec_priv *priv = netdev_priv(ndev); + + if (ndev->phydev->link) + netsec_start_gmac(priv); + else + netsec_stop_gmac(priv); + + phy_print_status(ndev->phydev); +} + +static irqreturn_t netsec_irq_handler(int irq, void *dev_id) +{ + struct netsec_priv *priv = dev_id; + u32 val, status = netsec_read(priv, NETSEC_REG_TOP_STATUS); + unsigned long flags; + + /* Disable interrupts */ + if (status & NETSEC_IRQ_TX) { + val = netsec_read(priv, NETSEC_REG_NRM_TX_STATUS); + netsec_write(priv, NETSEC_REG_NRM_TX_STATUS, val); + } + if (status & NETSEC_IRQ_RX) { + val = netsec_read(priv, NETSEC_REG_NRM_RX_STATUS); + netsec_write(priv, NETSEC_REG_NRM_RX_STATUS, val); + } + + spin_lock_irqsave(&priv->reglock, flags); + netsec_write(priv, NETSEC_REG_INTEN_CLR, NETSEC_IRQ_RX | NETSEC_IRQ_TX); + spin_unlock_irqrestore(&priv->reglock, flags); + + napi_schedule(&priv->napi); + + return IRQ_HANDLED; +} + +static int netsec_netdev_open(struct net_device *ndev) +{ + struct netsec_priv *priv = netdev_priv(ndev); + int ret; + + pm_runtime_get_sync(priv->dev); + + ret = netsec_setup_rx_dring(priv); + if (ret) { + netif_err(priv, probe, priv->ndev, + "%s: fail setup ring\n", __func__); + goto err1; + } + + ret = request_irq(priv->ndev->irq, netsec_irq_handler, + IRQF_SHARED, "netsec", priv); + if (ret) { + netif_err(priv, drv, priv->ndev, "request_irq failed\n"); + goto err2; + } + + if (dev_of_node(priv->dev)) { + if (!of_phy_connect(priv->ndev, priv->phy_np, + netsec_phy_adjust_link, 0, + priv->phy_interface)) { + netif_err(priv, link, priv->ndev, "missing PHY\n"); + goto err3; + } + } else { + ret = phy_connect_direct(priv->ndev, priv->phydev, + netsec_phy_adjust_link, + priv->phy_interface); + if (ret) { + netif_err(priv, link, priv->ndev, + "phy_connect_direct() failed (%d)\n", ret); + goto err3; + } + } + + phy_start(ndev->phydev); + + netsec_start_gmac(priv); + napi_enable(&priv->napi); + netif_start_queue(ndev); + + /* Enable RX intr. */ + netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX); + + return 0; +err3: + free_irq(priv->ndev->irq, priv); +err2: + netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); +err1: + pm_runtime_put_sync(priv->dev); + return ret; +} + +static int netsec_netdev_stop(struct net_device *ndev) +{ + struct netsec_priv *priv = netdev_priv(ndev); + + netif_stop_queue(priv->ndev); + dma_wmb(); + + napi_disable(&priv->napi); + + netsec_write(priv, NETSEC_REG_INTEN_CLR, ~0); + netsec_stop_gmac(priv); + + free_irq(priv->ndev->irq, priv); + + netsec_uninit_pkt_dring(priv, NETSEC_RING_TX); + netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); + + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); + + pm_runtime_put_sync(priv->dev); + + return 0; +} + +static int netsec_netdev_init(struct net_device *ndev) +{ + struct netsec_priv *priv = netdev_priv(ndev); + int ret; + + ret = netsec_alloc_dring(priv, NETSEC_RING_TX); + if (ret) + return ret; + + ret = netsec_alloc_dring(priv, NETSEC_RING_RX); + if (ret) + goto err1; + + ret = netsec_reset_hardware(priv); + if (ret) + goto err2; + + return 0; +err2: + netsec_free_dring(priv, NETSEC_RING_RX); +err1: + netsec_free_dring(priv, NETSEC_RING_TX); + return ret; +} + +static void netsec_netdev_uninit(struct net_device *ndev) +{ + struct netsec_priv *priv = netdev_priv(ndev); + + netsec_free_dring(priv, NETSEC_RING_RX); + netsec_free_dring(priv, NETSEC_RING_TX); +} + +static int netsec_netdev_set_features(struct net_device *ndev, + netdev_features_t features) +{ + struct netsec_priv *priv = netdev_priv(ndev); + + priv->rx_cksum_offload_flag = !!(features & NETIF_F_RXCSUM); + + return 0; +} + +static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr, + int cmd) +{ + return phy_mii_ioctl(ndev->phydev, ifr, cmd); +} + +static const struct net_device_ops netsec_netdev_ops = { + .ndo_init = netsec_netdev_init, + .ndo_uninit = netsec_netdev_uninit, + .ndo_open = netsec_netdev_open, + .ndo_stop = netsec_netdev_stop, + .ndo_start_xmit = netsec_netdev_start_xmit, + .ndo_set_features = netsec_netdev_set_features, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_do_ioctl = netsec_netdev_ioctl, +}; + +static int netsec_of_probe(struct platform_device *pdev, + struct netsec_priv *priv) +{ + priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!priv->phy_np) { + dev_err(&pdev->dev, "missing required property 'phy-handle'\n"); + return -EINVAL; + } + + priv->clk = devm_clk_get(&pdev->dev, NULL); /* get by 'phy_ref_clk' */ + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "phy_ref_clk not found\n"); + return PTR_ERR(priv->clk); + } + priv->freq = clk_get_rate(priv->clk); + + return 0; +} + +static int netsec_acpi_probe(struct platform_device *pdev, + struct netsec_priv *priv, u32 *phy_addr) +{ + int ret; + + if (!IS_ENABLED(CONFIG_ACPI)) + return -ENODEV; + + ret = device_property_read_u32(&pdev->dev, "phy-channel", phy_addr); + if (ret) { + dev_err(&pdev->dev, + "missing required property 'phy-channel'\n"); + return ret; + } + + ret = device_property_read_u32(&pdev->dev, + "socionext,phy-clock-frequency", + &priv->freq); + if (ret) + dev_err(&pdev->dev, + "missing required property 'socionext,phy-clock-frequency'\n"); + return ret; +} + +static void netsec_unregister_mdio(struct netsec_priv *priv) +{ + struct phy_device *phydev = priv->phydev; + + if (!dev_of_node(priv->dev) && phydev) { + phy_device_remove(phydev); + phy_device_free(phydev); + } + + mdiobus_unregister(priv->mii_bus); +} + +static int netsec_register_mdio(struct netsec_priv *priv, u32 phy_addr) +{ + struct mii_bus *bus; + int ret; + + bus = devm_mdiobus_alloc(priv->dev); + if (!bus) + return -ENOMEM; + + snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(priv->dev)); + bus->priv = priv; + bus->name = "SNI NETSEC MDIO"; + bus->read = netsec_phy_read; + bus->write = netsec_phy_write; + bus->parent = priv->dev; + priv->mii_bus = bus; + + if (dev_of_node(priv->dev)) { + struct device_node *mdio_node, *parent = dev_of_node(priv->dev); + + mdio_node = of_get_child_by_name(parent, "mdio"); + if (mdio_node) { + parent = mdio_node; + } else { + /* older f/w doesn't populate the mdio subnode, + * allow relaxed upgrade of f/w in due time. + */ + dev_info(priv->dev, "Upgrade f/w for mdio subnode!\n"); + } + + ret = of_mdiobus_register(bus, parent); + of_node_put(mdio_node); + + if (ret) { + dev_err(priv->dev, "mdiobus register err(%d)\n", ret); + return ret; + } + } else { + /* Mask out all PHYs from auto probing. */ + bus->phy_mask = ~0; + ret = mdiobus_register(bus); + if (ret) { + dev_err(priv->dev, "mdiobus register err(%d)\n", ret); + return ret; + } + + priv->phydev = get_phy_device(bus, phy_addr, false); + if (IS_ERR(priv->phydev)) { + ret = PTR_ERR(priv->phydev); + dev_err(priv->dev, "get_phy_device err(%d)\n", ret); + priv->phydev = NULL; + return -ENODEV; + } + + ret = phy_device_register(priv->phydev); + if (ret) { + mdiobus_unregister(bus); + dev_err(priv->dev, + "phy_device_register err(%d)\n", ret); + } + } + + return ret; +} + +static int netsec_probe(struct platform_device *pdev) +{ + struct resource *mmio_res, *eeprom_res, *irq_res; + u8 *mac, macbuf[ETH_ALEN]; + struct netsec_priv *priv; + u32 hw_ver, phy_addr = 0; + struct net_device *ndev; + int ret; + + mmio_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mmio_res) { + dev_err(&pdev->dev, "No MMIO resource found.\n"); + return -ENODEV; + } + + eeprom_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!eeprom_res) { + dev_info(&pdev->dev, "No EEPROM resource found.\n"); + return -ENODEV; + } + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!irq_res) { + dev_err(&pdev->dev, "No IRQ resource found.\n"); + return -ENODEV; + } + + ndev = alloc_etherdev(sizeof(*priv)); + if (!ndev) + return -ENOMEM; + + priv = netdev_priv(ndev); + + spin_lock_init(&priv->reglock); + SET_NETDEV_DEV(ndev, &pdev->dev); + platform_set_drvdata(pdev, priv); + ndev->irq = irq_res->start; + priv->dev = &pdev->dev; + priv->ndev = ndev; + + priv->msg_enable = NETIF_MSG_TX_ERR | NETIF_MSG_HW | NETIF_MSG_DRV | + NETIF_MSG_LINK | NETIF_MSG_PROBE; + + priv->phy_interface = device_get_phy_mode(&pdev->dev); + if (priv->phy_interface < 0) { + dev_err(&pdev->dev, "missing required property 'phy-mode'\n"); + ret = -ENODEV; + goto free_ndev; + } + + priv->ioaddr = devm_ioremap(&pdev->dev, mmio_res->start, + resource_size(mmio_res)); + if (!priv->ioaddr) { + dev_err(&pdev->dev, "devm_ioremap() failed\n"); + ret = -ENXIO; + goto free_ndev; + } + + priv->eeprom_base = devm_ioremap(&pdev->dev, eeprom_res->start, + resource_size(eeprom_res)); + if (!priv->eeprom_base) { + dev_err(&pdev->dev, "devm_ioremap() failed for EEPROM\n"); + ret = -ENXIO; + goto free_ndev; + } + + mac = device_get_mac_address(&pdev->dev, macbuf, sizeof(macbuf)); + if (mac) + ether_addr_copy(ndev->dev_addr, mac); + + if (priv->eeprom_base && + (!mac || !is_valid_ether_addr(ndev->dev_addr))) { + void __iomem *macp = priv->eeprom_base + + NETSEC_EEPROM_MAC_ADDRESS; + + ndev->dev_addr[0] = readb(macp + 3); + ndev->dev_addr[1] = readb(macp + 2); + ndev->dev_addr[2] = readb(macp + 1); + ndev->dev_addr[3] = readb(macp + 0); + ndev->dev_addr[4] = readb(macp + 7); + ndev->dev_addr[5] = readb(macp + 6); + } + + if (!is_valid_ether_addr(ndev->dev_addr)) { + dev_warn(&pdev->dev, "No MAC address found, using random\n"); + eth_hw_addr_random(ndev); + } + + if (dev_of_node(&pdev->dev)) + ret = netsec_of_probe(pdev, priv); + else + ret = netsec_acpi_probe(pdev, priv, &phy_addr); + if (ret) + goto free_ndev; + + if (!priv->freq) { + dev_err(&pdev->dev, "missing PHY reference clock frequency\n"); + ret = -ENODEV; + goto free_ndev; + } + + /* default for throughput */ + priv->et_coalesce.rx_coalesce_usecs = 500; + priv->et_coalesce.rx_max_coalesced_frames = 8; + priv->et_coalesce.tx_coalesce_usecs = 500; + priv->et_coalesce.tx_max_coalesced_frames = 8; + + ret = device_property_read_u32(&pdev->dev, "max-frame-size", + &ndev->max_mtu); + if (ret < 0) + ndev->max_mtu = ETH_DATA_LEN; + + /* runtime_pm coverage just for probe, open/close also cover it */ + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + hw_ver = netsec_read(priv, NETSEC_REG_F_TAIKI_VER); + /* this driver only supports F_TAIKI style NETSEC */ + if (NETSEC_F_NETSEC_VER_MAJOR_NUM(hw_ver) != + NETSEC_F_NETSEC_VER_MAJOR_NUM(NETSEC_REG_NETSEC_VER_F_TAIKI)) { + ret = -ENODEV; + goto pm_disable; + } + + dev_info(&pdev->dev, "hardware revision %d.%d\n", + hw_ver >> 16, hw_ver & 0xffff); + + netif_napi_add(ndev, &priv->napi, netsec_napi_poll, NAPI_BUDGET); + + ndev->netdev_ops = &netsec_netdev_ops; + ndev->ethtool_ops = &netsec_ethtool_ops; + + ndev->features |= NETIF_F_HIGHDMA | NETIF_F_RXCSUM | NETIF_F_GSO | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + ndev->hw_features = ndev->features; + + priv->rx_cksum_offload_flag = true; + + ret = netsec_register_mdio(priv, phy_addr); + if (ret) + goto unreg_napi; + + if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) + dev_warn(&pdev->dev, "Failed to enable 64-bit DMA\n"); + + ret = register_netdev(ndev); + if (ret) { + netif_err(priv, probe, ndev, "register_netdev() failed\n"); + goto unreg_mii; + } + + pm_runtime_put_sync(&pdev->dev); + return 0; + +unreg_mii: + netsec_unregister_mdio(priv); +unreg_napi: + netif_napi_del(&priv->napi); +pm_disable: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); +free_ndev: + free_netdev(ndev); + dev_err(&pdev->dev, "init failed\n"); + + return ret; +} + +static int netsec_remove(struct platform_device *pdev) +{ + struct netsec_priv *priv = platform_get_drvdata(pdev); + + unregister_netdev(priv->ndev); + + netsec_unregister_mdio(priv); + + netif_napi_del(&priv->napi); + + pm_runtime_disable(&pdev->dev); + free_netdev(priv->ndev); + + return 0; +} + +#ifdef CONFIG_PM +static int netsec_runtime_suspend(struct device *dev) +{ + struct netsec_priv *priv = dev_get_drvdata(dev); + + netsec_write(priv, NETSEC_REG_CLK_EN, 0); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static int netsec_runtime_resume(struct device *dev) +{ + struct netsec_priv *priv = dev_get_drvdata(dev); + + clk_prepare_enable(priv->clk); + + netsec_write(priv, NETSEC_REG_CLK_EN, NETSEC_CLK_EN_REG_DOM_D | + NETSEC_CLK_EN_REG_DOM_C | + NETSEC_CLK_EN_REG_DOM_G); + return 0; +} +#endif + +static const struct dev_pm_ops netsec_pm_ops = { + SET_RUNTIME_PM_OPS(netsec_runtime_suspend, netsec_runtime_resume, NULL) +}; + +static const struct of_device_id netsec_dt_ids[] = { + { .compatible = "socionext,synquacer-netsec" }, + { } +}; +MODULE_DEVICE_TABLE(of, netsec_dt_ids); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id netsec_acpi_ids[] = { + { "SCX0001" }, + { } +}; +MODULE_DEVICE_TABLE(acpi, netsec_acpi_ids); +#endif + +static struct platform_driver netsec_driver = { + .probe = netsec_probe, + .remove = netsec_remove, + .driver = { + .name = "netsec", + .pm = &netsec_pm_ops, + .of_match_table = netsec_dt_ids, + .acpi_match_table = ACPI_PTR(netsec_acpi_ids), + }, +}; +module_platform_driver(netsec_driver); + +MODULE_AUTHOR("Jassi Brar <jaswinder.singh@linaro.org>"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_DESCRIPTION("NETSEC Ethernet driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d9c98fd810bb..f99f14c35063 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) bool stmmac_eee_init(struct stmmac_priv *priv) { struct net_device *ndev = priv->dev; + int interface = priv->plat->interface; unsigned long flags; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 667c44f68dbc..195e0d0add8d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); + if (skb_dst(skb)) { + int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); @@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); + if (skb_dst(skb)) { + int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 1d025ab9568f..f522715c6595 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -393,7 +393,12 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define MACSEC_PORT_SCB (0x0000) #define MACSEC_UNDEF_SCI ((__force sci_t)0xffffffffffffffffULL) -#define DEFAULT_SAK_LEN 16 +#define MACSEC_GCM_AES_128_SAK_LEN 16 +#define MACSEC_GCM_AES_256_SAK_LEN 32 + +#define MAX_SAK_LEN MACSEC_GCM_AES_256_SAK_LEN + +#define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 @@ -1600,7 +1605,7 @@ static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, - .len = MACSEC_MAX_KEY_LEN, }, + .len = MAX_SAK_LEN, }, }; static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa) @@ -2362,15 +2367,26 @@ static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) { struct macsec_tx_sc *tx_sc = &secy->tx_sc; struct nlattr *secy_nest = nla_nest_start(skb, MACSEC_ATTR_SECY); + u64 csid; if (!secy_nest) return 1; + switch (secy->key_len) { + case MACSEC_GCM_AES_128_SAK_LEN: + csid = MACSEC_CIPHER_ID_GCM_AES_128; + break; + case MACSEC_GCM_AES_256_SAK_LEN: + csid = MACSEC_CIPHER_ID_GCM_AES_256; + break; + default: + goto cancel; + } + if (nla_put_sci(skb, MACSEC_SECY_ATTR_SCI, secy->sci, MACSEC_SECY_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_ATTR_CIPHER_SUITE, - MACSEC_DEFAULT_CIPHER_ID, - MACSEC_SECY_ATTR_PAD) || + csid, MACSEC_SECY_ATTR_PAD) || nla_put_u8(skb, MACSEC_SECY_ATTR_ICV_LEN, secy->icv_len) || nla_put_u8(skb, MACSEC_SECY_ATTR_OPER, secy->operational) || nla_put_u8(skb, MACSEC_SECY_ATTR_PROTECT, secy->protect_frames) || @@ -3015,8 +3031,8 @@ static void macsec_setup(struct net_device *dev) eth_zero_addr(dev->broadcast); } -static void macsec_changelink_common(struct net_device *dev, - struct nlattr *data[]) +static int macsec_changelink_common(struct net_device *dev, + struct nlattr *data[]) { struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; @@ -3056,6 +3072,22 @@ static void macsec_changelink_common(struct net_device *dev, if (data[IFLA_MACSEC_VALIDATION]) secy->validate_frames = nla_get_u8(data[IFLA_MACSEC_VALIDATION]); + + if (data[IFLA_MACSEC_CIPHER_SUITE]) { + switch (nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE])) { + case MACSEC_CIPHER_ID_GCM_AES_128: + case MACSEC_DEFAULT_CIPHER_ALT: + secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; + break; + case MACSEC_CIPHER_ID_GCM_AES_256: + secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; + break; + default: + return -EINVAL; + } + } + + return 0; } static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], @@ -3071,9 +3103,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], data[IFLA_MACSEC_PORT]) return -EINVAL; - macsec_changelink_common(dev, data); - - return 0; + return macsec_changelink_common(dev, data); } static void macsec_del_dev(struct macsec_dev *macsec) @@ -3270,8 +3300,11 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err) goto unlink; - if (data) - macsec_changelink_common(dev, data); + if (data) { + err = macsec_changelink_common(dev, data); + if (err) + goto del_dev; + } err = register_macsec_dev(real_dev, dev); if (err < 0) @@ -3320,7 +3353,8 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], } switch (csid) { - case MACSEC_DEFAULT_CIPHER_ID: + case MACSEC_CIPHER_ID_GCM_AES_128: + case MACSEC_CIPHER_ID_GCM_AES_256: case MACSEC_DEFAULT_CIPHER_ALT: if (icv_len < MACSEC_MIN_ICV_LEN || icv_len > MACSEC_STD_ICV_LEN) @@ -3390,12 +3424,24 @@ static int macsec_fill_info(struct sk_buff *skb, { struct macsec_secy *secy = &macsec_priv(dev)->secy; struct macsec_tx_sc *tx_sc = &secy->tx_sc; + u64 csid; + + switch (secy->key_len) { + case MACSEC_GCM_AES_128_SAK_LEN: + csid = MACSEC_CIPHER_ID_GCM_AES_128; + break; + case MACSEC_GCM_AES_256_SAK_LEN: + csid = MACSEC_CIPHER_ID_GCM_AES_256; + break; + default: + goto nla_put_failure; + } if (nla_put_sci(skb, IFLA_MACSEC_SCI, secy->sci, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ICV_LEN, secy->icv_len) || nla_put_u64_64bit(skb, IFLA_MACSEC_CIPHER_SUITE, - MACSEC_DEFAULT_CIPHER_ID, IFLA_MACSEC_PAD) || + csid, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ENCODING_SA, tx_sc->encoding_sa) || nla_put_u8(skb, IFLA_MACSEC_ENCRYPT, tx_sc->encrypt) || nla_put_u8(skb, IFLA_MACSEC_PROTECT, secy->protect_frames) || diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a178c5efd33e..a0f2be81d52e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, return 0; unregister_netdev: + /* macvlan_uninit would free the macvlan port */ unregister_netdevice(dev); + return err; destroy_macvlan_port: - if (create) + /* the macvlan port may be freed by macvlan_uninit when fail to register. + * so we destroy the macvlan port only when it's valid. + */ + if (create && macvlan_port_get_rtnl(dev)) macvlan_port_destroy(port->dev); return err; } diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 135296508a7e..6425ce04d3f9 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev) data->regulator = devm_regulator_get(&pdev->dev, "phy"); if (IS_ERR(data->regulator)) { - if (PTR_ERR(data->regulator) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(data->regulator) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err_out_free_mdiobus; + } dev_info(&pdev->dev, "no regulator found\n"); data->regulator = NULL; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index d1f9466f2fbf..6ac8b29b2dc3 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1525,6 +1525,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = pl->phydev->mdio.addr; + /* fall through */ case SIOCGMIIREG: ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num); @@ -1547,6 +1548,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = 0; + /* fall through */ case SIOCGMIIREG: ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num); @@ -1659,9 +1661,8 @@ static void phylink_sfp_link_down(void *upstream) ASSERT_RTNL(); set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); + queue_work(system_power_efficient_wq, &pl->resolve); flush_work(&pl->resolve); - - netif_carrier_off(pl->netdev); } static void phylink_sfp_link_up(void *upstream) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 3ecc378e0716..bdc4bb3c8288 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -489,7 +489,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream); void sfp_unregister_upstream(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->sfp) + sfp_unregister_bus(bus); bus->upstream = NULL; bus->netdev = NULL; rtnl_unlock(); @@ -592,7 +593,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket); void sfp_unregister_socket(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->netdev) + sfp_unregister_bus(bus); bus->sfp_dev = NULL; bus->sfp = NULL; bus->socket_ops = NULL; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 0a886fda0129..7c38659b2a76 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -330,7 +330,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) if (!q) return RX_HANDLER_PASS; - if (__skb_array_full(&q->skb_array)) + if (__ptr_ring_full(&q->ring)) goto drop; skb_push(skb, ETH_HLEN); @@ -348,7 +348,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) goto drop; if (!segs) { - if (skb_array_produce(&q->skb_array, skb)) + if (ptr_ring_produce(&q->ring, skb)) goto drop; goto wake_up; } @@ -358,7 +358,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) struct sk_buff *nskb = segs->next; segs->next = NULL; - if (skb_array_produce(&q->skb_array, segs)) { + if (ptr_ring_produce(&q->ring, segs)) { kfree_skb(segs); kfree_skb_list(nskb); break; @@ -375,7 +375,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; - if (skb_array_produce(&q->skb_array, skb)) + if (ptr_ring_produce(&q->ring, skb)) goto drop; } @@ -497,7 +497,7 @@ static void tap_sock_destruct(struct sock *sk) { struct tap_queue *q = container_of(sk, struct tap_queue, sk); - skb_array_cleanup(&q->skb_array); + ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb); } static int tap_open(struct inode *inode, struct file *file) @@ -517,7 +517,7 @@ static int tap_open(struct inode *inode, struct file *file) &tap_proto, 0); if (!q) goto err; - if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { + if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) { sk_free(&q->sk); goto err; } @@ -546,7 +546,7 @@ static int tap_open(struct inode *inode, struct file *file) err = tap_set_queue(tap, file, q); if (err) { - /* tap_sock_destruct() will take care of freeing skb_array */ + /* tap_sock_destruct() will take care of freeing ptr_ring */ goto err_put; } @@ -583,7 +583,7 @@ static unsigned int tap_poll(struct file *file, poll_table *wait) mask = 0; poll_wait(file, &q->wq.wait, wait); - if (!skb_array_empty(&q->skb_array)) + if (!ptr_ring_empty(&q->ring)) mask |= POLLIN | POLLRDNORM; if (sock_writeable(&q->sk) || @@ -844,7 +844,7 @@ static ssize_t tap_do_read(struct tap_queue *q, TASK_INTERRUPTIBLE); /* Read frames from the queue */ - skb = skb_array_consume(&q->skb_array); + skb = ptr_ring_consume(&q->ring); if (skb) break; if (noblock) { @@ -1176,7 +1176,7 @@ static int tap_peek_len(struct socket *sock) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); - return skb_array_peek_len(&q->skb_array); + return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag); } /* Ops structure to mimic raw sockets with tun */ @@ -1202,7 +1202,7 @@ struct socket *tap_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tap_get_socket); -struct skb_array *tap_get_skb_array(struct file *file) +struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q; @@ -1211,29 +1211,30 @@ struct skb_array *tap_get_skb_array(struct file *file) q = file->private_data; if (!q) return ERR_PTR(-EBADFD); - return &q->skb_array; + return &q->ring; } -EXPORT_SYMBOL_GPL(tap_get_skb_array); +EXPORT_SYMBOL_GPL(tap_get_ptr_ring); int tap_queue_resize(struct tap_dev *tap) { struct net_device *dev = tap->dev; struct tap_queue *q; - struct skb_array **arrays; + struct ptr_ring **rings; int n = tap->numqueues; int ret, i = 0; - arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); - if (!arrays) + rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); + if (!rings) return -ENOMEM; list_for_each_entry(q, &tap->queue_list, next) - arrays[i++] = &q->skb_array; + rings[i++] = &q->ring; - ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); + ret = ptr_ring_resize_multiple(rings, n, + dev->tx_queue_len, GFP_KERNEL, + __skb_array_destroy_skb); - kfree(arrays); + kfree(rings); return ret; } EXPORT_SYMBOL_GPL(tap_queue_resize); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e7c5f4b2a9a6..2fba3be5719e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -179,7 +179,7 @@ struct tun_file { struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; - struct skb_array tx_array; + struct ptr_ring tx_ring; struct xdp_rxq_info xdp_rxq; }; @@ -241,6 +241,24 @@ struct tun_struct { struct tun_steering_prog __rcu *steering_prog; }; +bool tun_is_xdp_buff(void *ptr) +{ + return (unsigned long)ptr & TUN_XDP_FLAG; +} +EXPORT_SYMBOL(tun_is_xdp_buff); + +void *tun_xdp_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr | TUN_XDP_FLAG); +} +EXPORT_SYMBOL(tun_xdp_to_ptr); + +void *tun_ptr_to_xdp(void *ptr) +{ + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); +} +EXPORT_SYMBOL(tun_ptr_to_xdp); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) return tun; } +static void tun_ptr_free(void *ptr) +{ + if (!ptr) + return; + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + put_page(virt_to_head_page(xdp->data)); + } else { + __skb_array_destroy_skb(ptr); + } +} + static void tun_queue_purge(struct tun_file *tfile) { - struct sk_buff *skb; + void *ptr; - while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) - kfree_skb(skb); + while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL) + tun_ptr_free(ptr); skb_queue_purge(&tfile->sk.sk_write_queue); skb_queue_purge(&tfile->sk.sk_error_queue); @@ -689,7 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) unregister_netdevice(tun->dev); } if (tun) { - skb_array_cleanup(&tfile->tx_array); + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); xdp_rxq_info_unreg(&tfile->xdp_rxq); } sock_put(&tfile->sk); @@ -782,7 +813,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, } if (!tfile->detached && - skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { + ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) { err = -ENOMEM; goto out; } @@ -1048,7 +1079,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset(skb); - if (skb_array_produce(&tfile->tx_array, skb)) + if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; /* Notify and wake up reader process */ @@ -1221,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_get_stats64 = tun_net_get_stats64, }; +static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +{ + struct tun_struct *tun = netdev_priv(dev); + struct xdp_buff *buff = xdp->data_hard_start; + int headroom = xdp->data - xdp->data_hard_start; + struct tun_file *tfile; + u32 numqueues; + int ret = 0; + + /* Assure headroom is available and buff is properly aligned */ + if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp))) + return -ENOSPC; + + *buff = *xdp; + + rcu_read_lock(); + + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) { + ret = -ENOSPC; + goto out; + } + + tfile = rcu_dereference(tun->tfiles[smp_processor_id() % + numqueues]); + /* Encode the XDP flag into lowest bit for consumer to differ + * XDP buffer from sk_buff. + */ + if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) { + this_cpu_inc(tun->pcpu_stats->tx_dropped); + ret = -ENOSPC; + } + +out: + rcu_read_unlock(); + return ret; +} + +static void tun_xdp_flush(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile; + u32 numqueues; + + rcu_read_lock(); + + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) + goto out; + + tfile = rcu_dereference(tun->tfiles[smp_processor_id() % + numqueues]); + /* Notify and wake up reader process */ + if (tfile->flags & TUN_FASYNC) + kill_fasync(&tfile->fasync, SIGIO, POLL_IN); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + +out: + rcu_read_unlock(); +} + static const struct net_device_ops tap_netdev_ops = { .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, @@ -1238,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, .ndo_bpf = tun_xdp, + .ndo_xdp_xmit = tun_xdp_xmit, + .ndo_xdp_flush = tun_xdp_flush, }; static void tun_flow_init(struct tun_struct *tun) @@ -1316,7 +1410,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) poll_wait(file, sk_sleep(sk), wait); - if (!skb_array_empty(&tfile->tx_array)) + if (!ptr_ring_empty(&tfile->tx_ring)) mask |= POLLIN | POLLRDNORM; if (tun->dev->flags & IFF_UP && @@ -1862,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) return result; } +static ssize_t tun_put_user_xdp(struct tun_struct *tun, + struct tun_file *tfile, + struct xdp_buff *xdp, + struct iov_iter *iter) +{ + int vnet_hdr_sz = 0; + size_t size = xdp->data_end - xdp->data; + struct tun_pcpu_stats *stats; + size_t ret; + + if (tun->flags & IFF_VNET_HDR) { + struct virtio_net_hdr gso = { 0 }; + + vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) + return -EINVAL; + if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != + sizeof(gso))) + return -EFAULT; + iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + } + + ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz; + + stats = get_cpu_ptr(tun->pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += ret; + u64_stats_update_end(&stats->syncp); + put_cpu_ptr(tun->pcpu_stats); + + return ret; +} + /* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_file *tfile, @@ -1959,15 +2087,14 @@ done: return total; } -static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, - int *err) +static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) { DECLARE_WAITQUEUE(wait, current); - struct sk_buff *skb = NULL; + void *ptr = NULL; int error = 0; - skb = skb_array_consume(&tfile->tx_array); - if (skb) + ptr = ptr_ring_consume(&tfile->tx_ring); + if (ptr) goto out; if (noblock) { error = -EAGAIN; @@ -1978,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, current->state = TASK_INTERRUPTIBLE; while (1) { - skb = skb_array_consume(&tfile->tx_array); - if (skb) + ptr = ptr_ring_consume(&tfile->tx_ring); + if (ptr) break; if (signal_pending(current)) { error = -ERESTARTSYS; @@ -1998,12 +2125,12 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, out: *err = error; - return skb; + return ptr; } static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *to, - int noblock, struct sk_buff *skb) + int noblock, void *ptr) { ssize_t ret; int err; @@ -2011,23 +2138,31 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, tun_debug(KERN_INFO, tun, "tun_do_read\n"); if (!iov_iter_count(to)) { - if (skb) - kfree_skb(skb); + tun_ptr_free(ptr); return 0; } - if (!skb) { + if (!ptr) { /* Read frames from ring */ - skb = tun_ring_recv(tfile, noblock, &err); - if (!skb) + ptr = tun_ring_recv(tfile, noblock, &err); + if (!ptr) return err; } - ret = tun_put_user(tun, tfile, skb, to); - if (unlikely(ret < 0)) - kfree_skb(skb); - else - consume_skb(skb); + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + ret = tun_put_user_xdp(tun, tfile, xdp, to); + put_page(virt_to_head_page(xdp->data)); + } else { + struct sk_buff *skb = ptr; + + ret = tun_put_user(tun, tfile, skb, to); + if (unlikely(ret < 0)) + kfree_skb(skb); + else + consume_skb(skb); + } return ret; } @@ -2164,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); - struct sk_buff *skb = m->msg_control; + void *ptr = m->msg_control; int ret; if (!tun) { ret = -EBADFD; - goto out_free_skb; + goto out_free; } if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { @@ -2181,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); + ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; @@ -2192,12 +2327,25 @@ out: out_put_tun: tun_put(tun); -out_free_skb: - if (skb) - kfree_skb(skb); +out_free: + tun_ptr_free(ptr); return ret; } +static int tun_ptr_peek_len(void *ptr) +{ + if (likely(ptr)) { + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + return xdp->data_end - xdp->data; + } + return __skb_array_len_with_tag(ptr); + } else { + return 0; + } +} + static int tun_peek_len(struct socket *sock) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); @@ -2208,7 +2356,7 @@ static int tun_peek_len(struct socket *sock) if (!tun) return 0; - ret = skb_array_peek_len(&tfile->tx_array); + ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len); tun_put(tun); return ret; @@ -3114,25 +3262,26 @@ static int tun_queue_resize(struct tun_struct *tun) { struct net_device *dev = tun->dev; struct tun_file *tfile; - struct skb_array **arrays; + struct ptr_ring **rings; int n = tun->numqueues + tun->numdisabled; int ret, i; - arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); - if (!arrays) + rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); + if (!rings) return -ENOMEM; for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - arrays[i] = &tfile->tx_array; + rings[i] = &tfile->tx_ring; } list_for_each_entry(tfile, &tun->disabled, next) - arrays[i++] = &tfile->tx_array; + rings[i++] = &tfile->tx_ring; - ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); + ret = ptr_ring_resize_multiple(rings, n, + dev->tx_queue_len, GFP_KERNEL, + tun_ptr_free); - kfree(arrays); + kfree(rings); return ret; } @@ -3218,7 +3367,7 @@ struct socket *tun_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tun_get_socket); -struct skb_array *tun_get_skb_array(struct file *file) +struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile; @@ -3227,9 +3376,9 @@ struct skb_array *tun_get_skb_array(struct file *file) tfile = file->private_data; if (!tfile) return ERR_PTR(-EBADFD); - return &tfile->tx_array; + return &tfile->tx_ring; } -EXPORT_SYMBOL_GPL(tun_get_skb_array); +EXPORT_SYMBOL_GPL(tun_get_tx_ring); module_init(tun_init); module_exit(tun_cleanup); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cfaa07f230e5..ae0580b577b8 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, + {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ed8299343728..12dfc5fee58e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1906,6 +1906,24 @@ static void virtnet_init_settings(struct net_device *dev) vi->duplex = DUPLEX_UNKNOWN; } +static void virtnet_update_settings(struct virtnet_info *vi) +{ + u32 speed; + u8 duplex; + + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) + return; + + speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config, + speed)); + if (ethtool_validate_speed(speed)) + vi->speed = speed; + duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config, + duplex)); + if (ethtool_validate_duplex(duplex)) + vi->duplex = duplex; +} + static const struct ethtool_ops virtnet_ethtool_ops = { .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, @@ -2159,6 +2177,7 @@ static void virtnet_config_changed_work(struct work_struct *work) vi->status = v; if (vi->status & VIRTIO_NET_S_LINK_UP) { + virtnet_update_settings(vi); netif_carrier_on(vi->dev); netif_tx_wake_all_queues(vi->dev); } else { @@ -2707,6 +2726,7 @@ static int virtnet_probe(struct virtio_device *vdev) schedule_work(&vi->config_work); } else { vi->status = VIRTIO_NET_S_LINK_UP; + virtnet_update_settings(vi); netif_carrier_on(dev); } @@ -2808,7 +2828,8 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ VIRTIO_NET_F_CTRL_MAC_ADDR, \ - VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS + VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ + VIRTIO_NET_F_SPEED_DUPLEX static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c5a34671abda..9bd7ddeeb6a5 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); + xenbus_switch_state(dev, XenbusStateInitialising); return netdev; exit: diff --git a/drivers/nvmem/meson-mx-efuse.c b/drivers/nvmem/meson-mx-efuse.c index a346b4923550..41d3a3c1104e 100644 --- a/drivers/nvmem/meson-mx-efuse.c +++ b/drivers/nvmem/meson-mx-efuse.c @@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset, MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE, MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE); - for (i = offset; i < offset + bytes; i += efuse->config.word_size) { - addr = i / efuse->config.word_size; + for (i = 0; i < bytes; i += efuse->config.word_size) { + addr = (offset + i) / efuse->config.word_size; err = meson_mx_efuse_read_addr(efuse, addr, &tmp); if (err) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 0b3fb99d9b89..7390fb8ca9d1 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -303,7 +303,7 @@ static void dino_mask_irq(struct irq_data *d) struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); - DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); + DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq); /* Clear the matching bit in the IMR register */ dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq)); @@ -316,7 +316,7 @@ static void dino_unmask_irq(struct irq_data *d) int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); u32 tmp; - DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); + DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq); /* ** clear pending IRQ bits @@ -396,7 +396,7 @@ ilr_again: if (mask) { if (--ilr_loop > 0) goto ilr_again; - printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n", + printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n", dino_dev->hba.base_addr, mask); return IRQ_NONE; } @@ -553,7 +553,7 @@ dino_fixup_bus(struct pci_bus *bus) struct pci_dev *dev; struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge)); - DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n", + DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n", __func__, bus, bus->busn_res.start, bus->bridge->platform_data); @@ -854,7 +854,7 @@ static int __init dino_common_init(struct parisc_device *dev, res->flags = IORESOURCE_IO; /* do not mark it busy ! */ if (request_resource(&ioport_resource, res) < 0) { printk(KERN_ERR "%s: request I/O Port region failed " - "0x%lx/%lx (hpa 0x%p)\n", + "0x%lx/%lx (hpa 0x%px)\n", name, (unsigned long)res->start, (unsigned long)res->end, dino_dev->hba.base_addr); return 1; diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 4dd9b1308128..99a80da6fd2e 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -106,7 +106,7 @@ static int __init eisa_eeprom_init(void) return retval; } - printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr); + printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr); return 0; } diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 04dac6a42c9f..6b8d060d07de 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = - (apic->irq_delivery_mode == dest_LowestPrio) ? - dest_LowestPrio : dest_Fixed; + int_pkt->int_desc.delivery_mode = dest_Fixed; /* * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in @@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = - (apic->irq_delivery_mode == dest_LowestPrio) ? - dest_LowestPrio : dest_Fixed; + int_pkt->int_desc.delivery_mode = dest_Fixed; /* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index accaaaccb662..6601ad0dfb3a 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev, int irq, error; irq = platform_get_irq_byname(pdev, name); - if (!irq) + if (irq < 0) return -ENODEV; error = devm_request_threaded_irq(ddata->dev, irq, NULL, diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig index cb09245e9b4c..c845facacb06 100644 --- a/drivers/phy/renesas/Kconfig +++ b/drivers/phy/renesas/Kconfig @@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver" depends on ARCH_RENESAS depends on EXTCON + depends on USB_SUPPORT select GENERIC_PHY + select USB_COMMON help Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs. diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index ee85fa0ca4b0..7492c8978217 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev) if (IS_ERR(phy)) { dev_err(dev, "failed to create phy: %s\n", child_np->name); + pm_runtime_disable(dev); return PTR_ERR(phy); } @@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); if (IS_ERR(phy_provider)) { dev_err(dev, "Failed to register phy provider\n"); + pm_runtime_disable(dev); return PTR_ERR(phy_provider); } diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 4307bf0013e1..63e916d4d069 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match); static struct device_node * tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(padctl->dev->of_node); + struct device_node *pads, *np; + + pads = of_get_child_by_name(padctl->dev->of_node, "pads"); + if (!pads) + return NULL; - np = of_find_node_by_name(np, "pads"); - if (np) - np = of_find_node_by_name(np, name); + np = of_get_child_by_name(pads, name); + of_node_put(pads); return np; } @@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) static struct device_node * tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(pad->dev.of_node); + struct device_node *np, *lanes; - np = of_find_node_by_name(np, "lanes"); - if (!np) + lanes = of_get_child_by_name(pad->dev.of_node, "lanes"); + if (!lanes) return NULL; - return of_find_node_by_name(np, pad->soc->lanes[index].name); + np = of_get_child_by_name(lanes, pad->soc->lanes[index].name); + of_node_put(lanes); + + return np; } static int @@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad, unsigned int i; int err; - children = of_find_node_by_name(pad->dev.of_node, "lanes"); + children = of_get_child_by_name(pad->dev.of_node, "lanes"); if (!children) return -ENODEV; @@ -444,21 +444,21 @@ static struct device_node * tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, unsigned int index) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(padctl->dev->of_node); + struct device_node *ports, *np; + char *name; - np = of_find_node_by_name(np, "ports"); - if (np) { - char *name; + ports = of_get_child_by_name(padctl->dev->of_node, "ports"); + if (!ports) + return NULL; - name = kasprintf(GFP_KERNEL, "%s-%u", type, index); - if (!name) - return ERR_PTR(-ENOMEM); - np = of_find_node_by_name(np, name); - kfree(name); + name = kasprintf(GFP_KERNEL, "%s-%u", type, index); + if (!name) { + of_node_put(ports); + return ERR_PTR(-ENOMEM); } + np = of_get_child_by_name(ports, name); + kfree(name); + of_node_put(ports); return np; } @@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl) static int tegra_xusb_padctl_probe(struct platform_device *pdev) { - struct device_node *np = of_node_get(pdev->dev.of_node); + struct device_node *np = pdev->dev.of_node; const struct tegra_xusb_padctl_soc *soc; struct tegra_xusb_padctl *padctl; const struct of_device_id *match; @@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev) int err; /* for backwards compatibility with old device trees */ - np = of_find_node_by_name(np, "pads"); + np = of_get_child_by_name(np, "pads"); if (!np) { dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n"); return tegra_xusb_padctl_legacy_probe(pdev); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index e6cd8de793e2..3501491e5bfc 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = { */ static struct lock_class_key pcs_lock_class; +/* Class for the IRQ request mutex */ +static struct lock_class_key pcs_request_class; + /* * REVISIT: Reads and writes could eventually use regmap or something * generic. But at least on omaps, some mux registers are performance @@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, pcs_soc); irq_set_chip_and_handler(irq, &pcs->chip, handle_level_irq); - irq_set_lockdep_class(irq, &pcs_lock_class); + irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class); irq_set_noprobe(irq); return 0; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index a276c61be217..e62ab087bfd8 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d, } static int stm32_gpio_domain_activate(struct irq_domain *d, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct stm32_gpio_bank *bank = d->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 791449a2370f..daa68acbc900 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1458,5 +1458,5 @@ static void __exit acpi_wmi_exit(void) class_unregister(&wmi_bus_class); } -subsys_initcall(acpi_wmi_init); +subsys_initcall_sync(acpi_wmi_init); module_exit(acpi_wmi_exit); diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index c94b606e0df8..ee14d8e45c97 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2803,6 +2803,16 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) erp = dasd_3990_erp_handle_match_erp(cqr, erp); } + + /* + * For path verification work we need to stick with the path that was + * originally chosen so that the per path configuration data is + * assigned correctly. + */ + if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) { + erp->lpm = cqr->lpm; + } + if (device->features & DASD_FEATURE_ERPLOG) { /* print current erp_chain */ dev_err(&device->cdev->dev, diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 05ac6ba15a53..614b44e70a28 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -17,6 +17,8 @@ CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) CFLAGS_sclp_early_core.o += -march=z900 endif +CFLAGS_sclp_early_core.o += -D__NO_FORTIFY + obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \ sclp_early.o sclp_early_core.o diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index a9996c16f4ae..26ce17178401 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1415,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget) * check. */ if (sdev->channel != starget->channel || - sdev->id != starget->id || + sdev->id != starget->id) + continue; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL || !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 1b06cf0375dc..3b3d1d050cac 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, case TEST_UNIT_READY: break; default: - set_host_byte(scmnd, DID_TARGET_FAILURE); + set_host_byte(scmnd, DID_ERROR); } break; case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); do_work = true; process_err_fn = storvsc_remove_lun; break; diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig index a517b2d29f1b..8f6494158d3d 100644 --- a/drivers/staging/android/ion/Kconfig +++ b/drivers/staging/android/ion/Kconfig @@ -37,7 +37,7 @@ config ION_CHUNK_HEAP config ION_CMA_HEAP bool "Ion CMA heap support" - depends on ION && CMA + depends on ION && DMA_CMA help Choose this option to enable CMA heaps with Ion. This heap is backed by the Contiguous Memory Allocator (CMA). If your system has these diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index a7d9b0e98572..f480885e346b 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents, - DMA_BIDIRECTIONAL); + direction); } mutex_unlock(&buffer->lock); @@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents, - DMA_BIDIRECTIONAL); + direction); } mutex_unlock(&buffer->lock); diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c index dd5545d9990a..86196ffd2faf 100644 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ b/drivers/staging/android/ion/ion_cma_heap.c @@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, struct ion_cma_heap *cma_heap = to_cma_heap(heap); struct sg_table *table; struct page *pages; + unsigned long size = PAGE_ALIGN(len); + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long align = get_order(size); int ret; - pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL); + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL); if (!pages) return -ENOMEM; @@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, if (ret) goto free_mem; - sg_set_page(table->sgl, pages, len, 0); + sg_set_page(table->sgl, pages, size, 0); buffer->priv_virt = pages; buffer->sg_table = table; @@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, free_mem: kfree(table); err: - cma_release(cma_heap->cma, pages, buffer->size); + cma_release(cma_heap->cma, pages, nr_pages); return -ENOMEM; } @@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer) { struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap); struct page *pages = buffer->priv_virt; + unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT; /* release memory */ - cma_release(cma_heap->cma, pages, buffer->size); + cma_release(cma_heap->cma, pages, nr_pages); /* release sg table */ sg_free_table(buffer->sg_table); kfree(buffer->sg_table); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index 986c2a40d978..8267119ccc8e 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, ksocknal_nid2peerlist(id.nid)); } - route2 = NULL; list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - if (!route2) { - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; - } else { - ksocknal_route_decref(route); - route2->ksnr_share_count++; + if (route2->ksnr_ipaddr == ipaddr) { + /* Route already exists, use the old one */ + ksocknal_route_decref(route); + route2->ksnr_share_count++; + goto out; + } } - + /* Route doesn't already exist, add the new one */ + ksocknal_add_route_locked(peer, route); + route->ksnr_share_count++; +out: write_unlock_bh(&ksocknal_data.ksnd_global_lock); return 0; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 419a7a90bce0..f45bcbc63738 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring) return; if (ring->start_poll) { - __ring_interrupt_mask(ring, false); + __ring_interrupt_mask(ring, true); ring->start_poll(ring->poll_data); } else { schedule_work(&ring->work); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 427e0d5d8f13..539b49adb6af 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { struct n_tty_data *ldata = tty->disc_data; - if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { + if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->line_start = ldata->read_tail; if (!L_ICANON(tty) || !read_cnt(ldata)) { @@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file, return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: down_write(&tty->termios_rwsem); - if (L_ICANON(tty)) + if (L_ICANON(tty) && !L_EXTPROC(tty)) retval = inq_canon(ldata); else retval = read_cnt(ldata); diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c index 3593ce0ec641..880009987460 100644 --- a/drivers/usb/chipidea/ci_hdrc_msm.c +++ b/drivers/usb/chipidea/ci_hdrc_msm.c @@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev) if (ret) goto err_mux; - ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi"); + ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi"); if (ulpi_node) { phy_node = of_get_next_available_child(ulpi_node, NULL); ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy"); diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 78e92d29f8d9..c821b4b9647e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) case USB_SSP_CAP_TYPE: ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; ssac = (le32_to_cpu(ssp_cap->bmAttributes) & - USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; + USB_SSP_SUBLINK_SPEED_ATTRIBS); if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) dev->bos->ssp_cap = ssp_cap; break; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a10b346b9777..4024926c1d68 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920, C920-C and C930e */ + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, @@ -149,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + /* ELSA MicroLink 56K */ + { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 4f7895dbcf88..e26e685d8a57 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id, static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) { dma_addr_t dma; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); seq_printf(s, "%pad\n", &dma); @@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) { dma_addr_t dma; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); seq_printf(s, "%pad\n", &dma); @@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) static int xhci_ring_cycle_show(struct seq_file *s, void *unused) { - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; seq_printf(s, "%d\n", ring->cycle_state); @@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci, } static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci, - struct xhci_ring *ring, + struct xhci_ring **ring, const char *name, struct dentry *parent) { @@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci, snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); epriv->root = xhci_debugfs_create_ring_dir(xhci, - dev->eps[ep_index].new_ring, + &dev->eps[ep_index].new_ring, epriv->name, spriv->root); spriv->eps[ep_index] = epriv; @@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id) priv->dev = dev; dev->debugfs_private = priv; - xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring, + xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring, "ep00", priv->root); xhci_debugfs_create_context_files(xhci, priv->root, slot_id); @@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci) ARRAY_SIZE(xhci_extcap_dbc), "reg-ext-dbc"); - xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring, "command-ring", xhci->debugfs_root); - xhci_debugfs_create_ring_dir(xhci, xhci->event_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring, "event-ring", xhci->debugfs_root); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7ef1274ef7f7..1aad89b8aba0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -178,6 +178,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_BROKEN_STREAMS; } if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + pdev->device == 0x0014) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) xhci->quirks |= XHCI_RESET_ON_RESUME; if (pdev->vendor == PCI_VENDOR_ID_VIA) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2424d3020ca3..da6dbe3ebd8b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_slot_ctx *slot_ctx; int i, ret; - xhci_debugfs_remove_slot(xhci, udev->slot_id); - #ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. @@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) } ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) + if (ret) { + xhci_debugfs_remove_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); + } } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1aba9105b369..fc68952c994a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, + { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4faa09fe308c..8b4ecd2bd297 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -915,6 +915,12 @@ #define ICPDAS_I7563U_PID 0x0105 /* + * Airbus Defence and Space + */ +#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */ +#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */ + +/* * RT Systems programming cables for various ham radios */ #define RTSYSTEMS_VID 0x2100 /* Vendor ID */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3b3513874cfd..b6320e3be429 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Qualcomm's vendor ID */ #define QUECTEL_PRODUCT_UC20 0x9003 #define QUECTEL_PRODUCT_UC15 0x9090 +/* These Yuga products use Qualcomm's vendor ID */ +#define YUGA_PRODUCT_CLM920_NC5 0x9625 #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ @@ -280,6 +282,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_ME910 0x1100 +#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -645,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = { .reserved = BIT(1) | BIT(3), }; +static const struct option_blacklist_info telit_me910_dual_modem_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -674,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = { .reserved = BIT(4) | BIT(5), }; +static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { + .reserved = BIT(1) | BIT(4), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1178,6 +1190,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Yuga products use Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5), + .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, @@ -1244,6 +1259,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), + .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index e3892541a489..613f91add03d 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ + {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ @@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case 2: dev_dbg(dev, "NMEA GPS interface found\n"); + sendsetup = true; break; case 3: dev_dbg(dev, "Modem port found\n"); diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index a3df8ee82faf..e31a6f204397 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) * step 1? */ if (ud->tcp_socket) { - dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", - ud->tcp_socket); + dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 4f48b306713f..c31c8402a0c5 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev) struct stub_priv *priv; struct urb *urb; - dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); + dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); + dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", + priv->seqnum); usb_kill_urb(urb); kmem_cache_free(stub_priv_cache, priv); diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 493ac2928391..6c5a59313999 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, if (priv->seqnum != pdu->u.cmd_unlink.seqnum) continue; - dev_info(&priv->urb->dev->dev, "unlink urb %p\n", - priv->urb); - /* * This matched urb is not completed yet (i.e., be in * flight in usb hcd hardware/driver). Now we are @@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, ret = usb_unlink_urb(priv->urb); if (ret != -EINPROGRESS) dev_err(&priv->urb->dev->dev, - "failed to unlink a urb %p, ret %d\n", - priv->urb, ret); + "failed to unlink a urb # %lu, ret %d\n", + priv->seqnum, ret); return 0; } @@ -342,14 +339,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) epd = &ep->desc; - /* validate transfer_buffer_length */ - if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { - dev_err(&sdev->udev->dev, - "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", - pdu->u.cmd_submit.transfer_buffer_length); - return -1; - } - if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -482,8 +471,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0 && - pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index 53172b1f6257..f0ec41a50cbc 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -88,7 +88,7 @@ void stub_complete(struct urb *urb) /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { - usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); + usbip_dbg_stub_tx("ignore urb for closed connection\n"); /* It will be freed in stub_device_cleanup_urbs(). */ } else if (priv->unlinking) { stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); @@ -190,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index f7978933b402..7b219d9109b4 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -317,26 +317,20 @@ int usbip_recv(struct socket *sock, void *buf, int size) struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; int total = 0; + if (!sock || !buf || !size) + return -EINVAL; + iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); usbip_dbg_xmit("enter\n"); - if (!sock || !buf || !size) { - pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf, - size); - return -EINVAL; - } - do { - int sz = msg_data_left(&msg); + msg_data_left(&msg); sock->sk->sk_allocation = GFP_NOIO; result = sock_recvmsg(sock, &msg, MSG_WAITALL); - if (result <= 0) { - pr_debug("receive sock %p buf %p size %u ret %d total %d\n", - sock, buf + total, sz, result, total); + if (result <= 0) goto err; - } total += result; } while (msg_data_left(&msg)); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 6b3278c4b72a..c3e1008aa491 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct vhci_device *vdev; unsigned long flags; - usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n", - hcd, urb, mem_flags); - if (portnum > VHCI_HC_PORTS) { pr_err("invalid port number %d\n", portnum); return -ENODEV; @@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) struct vhci_device *vdev; unsigned long flags; - pr_info("dequeue a urb %p\n", urb); - spin_lock_irqsave(&vhci->lock, flags); priv = urb->hcpriv; @@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* tcp connection is closed */ spin_lock(&vdev->priv_lock); - pr_info("device %p seems to be disconnected\n", vdev); list_del(&priv->list); kfree(priv); urb->hcpriv = NULL; @@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * vhci_rx will receive RET_UNLINK and give back the URB. * Otherwise, we give back it here. */ - pr_info("gives back urb %p\n", urb); - usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&vhci->lock, flags); @@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) unlink->unlink_seqnum = priv->seqnum; - pr_info("device %p seems to be still connected\n", vdev); - /* send cmd_unlink and try to cancel the pending URB in the * peer */ list_add_tail(&unlink->list, &vdev->unlink_tx); @@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) /* need this? see stub_dev.c */ if (ud->tcp_socket) { - pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); + pr_debug("shutdown tcp_socket %d\n", ud->sockfd); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index 90577e8b2282..112ebb90d8c9 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum) urb = priv->urb; status = urb->status; - usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", - urb, priv, seqnum); + usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum); switch (status) { case -ENOENT: /* fall through */ case -ECONNRESET: - dev_info(&urb->dev->dev, - "urb %p was unlinked %ssynchronuously.\n", urb, - status == -ENOENT ? "" : "a"); + dev_dbg(&urb->dev->dev, + "urb seq# %u was unlinked %ssynchronuously\n", + seqnum, status == -ENOENT ? "" : "a"); break; case -EINPROGRESS: /* no info output */ break; default: - dev_info(&urb->dev->dev, - "urb %p may be in a error, status %d\n", urb, - status); + dev_dbg(&urb->dev->dev, + "urb seq# %u may be in a error, status %d\n", + seqnum, status); } list_del(&priv->list); @@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, spin_unlock_irqrestore(&vdev->priv_lock, flags); if (!urb) { - pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); - pr_info("max seqnum %d\n", + pr_err("cannot find a urb of seqnum %u max seqnum %d\n", + pdu->base.seqnum, atomic_read(&vhci_hcd->seqnum)); usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return; @@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb); @@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev, pr_info("the urb (seqnum %d) was already given back\n", pdu->base.seqnum); } else { - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum); /* If unlink is successful, status is -ECONNRESET */ urb->status = pdu->u.ret_unlink.status; diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index d625a2ff4b71..9aed15a358b7 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) memset(&msg, 0, sizeof(msg)); memset(&iov, 0, sizeof(iov)); - usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); + usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", + priv->seqnum); /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c7bdeb655646..a5a1db647635 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -89,7 +89,7 @@ struct vhost_net_ubuf_ref { #define VHOST_RX_BATCH 64 struct vhost_net_buf { - struct sk_buff **queue; + void **queue; int tail; int head; }; @@ -108,7 +108,7 @@ struct vhost_net_virtqueue { /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; - struct skb_array *rx_array; + struct ptr_ring *rx_ring; struct vhost_net_buf rxq; }; @@ -158,7 +158,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) struct vhost_net_buf *rxq = &nvq->rxq; rxq->head = 0; - rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, + rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, VHOST_RX_BATCH); return rxq->tail; } @@ -167,13 +167,25 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; - if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) { - skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head, - vhost_net_buf_get_size(rxq)); + if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { + ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, + vhost_net_buf_get_size(rxq), + __skb_array_destroy_skb); rxq->head = rxq->tail = 0; } } +static int vhost_net_buf_peek_len(void *ptr) +{ + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + return xdp->data_end - xdp->data; + } + + return __skb_array_len_with_tag(ptr); +} + static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; @@ -185,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) return 0; out: - return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); + return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq)); } static void vhost_net_buf_init(struct vhost_net_buf *rxq) @@ -583,7 +595,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) int len = 0; unsigned long flags; - if (rvq->rx_array) + if (rvq->rx_ring) return vhost_net_buf_peek(rvq); spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); @@ -790,7 +802,7 @@ static void handle_rx(struct vhost_net *net) * they refilled. */ goto out; } - if (nvq->rx_array) + if (nvq->rx_ring) msg.msg_control = vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { @@ -896,7 +908,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; - struct sk_buff **queue; + void **queue; int i; n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); @@ -908,7 +920,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) return -ENOMEM; } - queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *), + queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *), GFP_KERNEL); if (!queue) { kfree(vqs); @@ -1046,23 +1058,23 @@ err: return ERR_PTR(r); } -static struct skb_array *get_tap_skb_array(int fd) +static struct ptr_ring *get_tap_ptr_ring(int fd) { - struct skb_array *array; + struct ptr_ring *ring; struct file *file = fget(fd); if (!file) return NULL; - array = tun_get_skb_array(file); - if (!IS_ERR(array)) + ring = tun_get_tx_ring(file); + if (!IS_ERR(ring)) goto out; - array = tap_get_skb_array(file); - if (!IS_ERR(array)) + ring = tap_get_ptr_ring(file); + if (!IS_ERR(ring)) goto out; - array = NULL; + ring = NULL; out: fput(file); - return array; + return ring; } static struct socket *get_tap_socket(int fd) @@ -1143,7 +1155,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vq->private_data = sock; vhost_net_buf_unproduce(nvq); if (index == VHOST_NET_VQ_RX) - nvq->rx_array = get_tap_skb_array(fd); + nvq->rx_ring = get_tap_ptr_ring(fd); r = vhost_vq_init_access(vq); if (r) goto err_used; diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index d1e1d8d2b9d5..4c789e61554b 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -805,7 +805,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) pvcalls_exit(); return ret; } - map2 = kzalloc(sizeof(*map2), GFP_KERNEL); + map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); if (map2 == NULL) { clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ff8d5bf4354f..23c7f395d718 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -895,20 +895,38 @@ error: * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +static int afs_dir_remove_link(struct dentry *dentry, struct key *key, + unsigned long d_version_before, + unsigned long d_version_after) { + bool dir_valid; int ret = 0; + /* There were no intervening changes on the server if the version + * number we got back was incremented by exactly 1. + */ + dir_valid = (d_version_after == d_version_before + 1); + if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - kdebug("AFS_VNODE_DELETED"); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - - ret = afs_validate(vnode, key); - if (ret == -ESTALE) + if (dir_valid) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } ret = 0; + } else { + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + } _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); } @@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; + unsigned long d_version = (unsigned long)dentry->d_fsdata; int ret; _enter("{%x:%u},{%pd}", @@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_vnode_commit_status(&fc, dvnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); if (ret == 0) - ret = afs_dir_remove_link(dentry, key); + ret = afs_dir_remove_link( + dentry, key, d_version, + (unsigned long)dvnode->status.data_version); } error_key: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3415eb7484f6..1e81864ef0b2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } read_sequnlock_excl(&vnode->cb_lock); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + clear_nlink(&vnode->vfs_inode); + if (valid) goto valid; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ea1460b9b71a..e1126659f043 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, { struct afs_net *net = call->net; enum afs_call_state state; - u32 remote_abort; + u32 remote_abort = 0; int ret; _enter("{%s,%zu},,%zu,%d", diff --git a/fs/afs/write.c b/fs/afs/write.c index cb5f8a3df577..9370e2feb999 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, ret = afs_fill_page(vnode, key, pos + copied, len - copied, page); if (ret < 0) - return ret; + goto out; } SetPageUptodate(page); } @@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); + ret = copied; + +out: unlock_page(page); put_page(page); - - return copied; + return ret; } /* diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5d73f79ded8b..056276101c63 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( spin_lock(&root->inode_lock); node = radix_tree_lookup(&root->delayed_nodes_tree, ino); + if (node) { if (btrfs_inode->delayed_node) { refcount_inc(&node->refs); /* can be accessed */ @@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( spin_unlock(&root->inode_lock); return node; } - btrfs_inode->delayed_node = node; - /* can be accessed and cached in the inode */ - refcount_add(2, &node->refs); + + /* + * It's possible that we're racing into the middle of removing + * this node from the radix tree. In this case, the refcount + * was zero and it should never go back to one. Just return + * NULL like it was never in the radix at all; our release + * function is in the process of removing it. + * + * Some implementations of refcount_inc refuse to bump the + * refcount once it has hit zero. If we don't do this dance + * here, refcount_inc() may decide to just WARN_ONCE() instead + * of actually bumping the refcount. + * + * If this node is properly in the radix, we want to bump the + * refcount twice, once for the inode and once for this get + * operation. + */ + if (refcount_inc_not_zero(&node->refs)) { + refcount_inc(&node->refs); + btrfs_inode->delayed_node = node; + } else { + node = NULL; + } + spin_unlock(&root->inode_lock); return node; } @@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node( mutex_unlock(&delayed_node->mutex); if (refcount_dec_and_test(&delayed_node->refs)) { - bool free = false; struct btrfs_root *root = delayed_node->root; + spin_lock(&root->inode_lock); - if (refcount_read(&delayed_node->refs) == 0) { - radix_tree_delete(&root->delayed_nodes_tree, - delayed_node->inode_id); - free = true; - } + /* + * Once our refcount goes to zero, nobody is allowed to bump it + * back up. We can delete it now. + */ + ASSERT(refcount_read(&delayed_node->refs) == 0); + radix_tree_delete(&root->delayed_nodes_tree, + delayed_node->inode_id); spin_unlock(&root->inode_lock); - if (free) - kmem_cache_free(delayed_node_cache, delayed_node); + kmem_cache_free(delayed_node_cache, delayed_node); } } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 49810b70afd3..a25684287501 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void) kfree(dev); return ERR_PTR(-ENOMEM); } - bio_get(dev->flush_bio); INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); diff --git a/fs/exec.c b/fs/exec.c index 5688b5e1b937..7eb8d21bcab9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1349,9 +1349,14 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; - /* Figure out dumpability. */ + /* + * Figure out dumpability. Note that this checking only of current + * is wrong, but userspace depends on it. This should be testing + * bprm->secureexec instead. + */ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || - bprm->secureexec) + !(uid_eq(current_euid(), current_uid()) && + gid_eq(current_egid(), current_gid()))) set_dumpable(current->mm, suid_dumpable); else set_dumpable(current->mm, SUID_DUMP_USER); diff --git a/fs/super.c b/fs/super.c index 7ff1349609e4..06bd25d90ba5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -517,7 +517,11 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker(&s->s_shrink); + err = register_shrinker(&s->s_shrink); + if (err) { + deactivate_locked_super(s); + s = ERR_PTR(err); + } return s; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ac9a4e65ca49..41a75f9f23fd 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -570,11 +570,14 @@ out: static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { + struct userfaultfd_ctx *release_new_ctx; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); + release_new_ctx = NULL; spin_lock(&ctx->event_wqh.lock); /* @@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, new = (struct userfaultfd_ctx *) (unsigned long) ewq->msg.arg.reserved.reserved1; - - userfaultfd_ctx_put(new); + release_new_ctx = new; } break; } @@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, __set_current_state(TASK_RUNNING); spin_unlock(&ctx->event_wqh.lock); + if (release_new_ctx) { + struct vm_area_struct *vma; + struct mm_struct *mm = release_new_ctx->mm; + + /* the various vma->vm_userfaultfd_ctx still points to it */ + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + up_write(&mm->mmap_sem); + + userfaultfd_ctx_put(release_new_ctx); + } + /* * ctx may go away after this if the userfault pseudo fd is * already released. diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 21e2d70884e1..4fc526a27a94 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - count) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - size) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7ab52a8bc0a9..66e1edbfb2b2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1006,7 +1006,7 @@ xfs_file_iomap_begin( } ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - length) length = mp->m_super->s_maxbytes - offset; offset_fsb = XFS_B_TO_FSBT(mp, offset); end_fsb = XFS_B_TO_FSB(mp, offset + length); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ec952dfad359..b897b11afb2c 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -48,7 +48,7 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); - +STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi); STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it @@ -695,9 +695,17 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&qinf->qi_shrinker); + + error = register_shrinker(&qinf->qi_shrinker); + if (error) + goto out_free_inos; + return 0; +out_free_inos: + mutex_destroy(&qinf->qi_quotaofflock); + mutex_destroy(&qinf->qi_tree_lock); + xfs_qm_destroy_quotainos(qinf); out_free_lru: list_lru_destroy(&qinf->qi_lru); out_free_qinf: @@ -706,7 +714,6 @@ out_free_qinf: return error; } - /* * Gets called when unmounting a filesystem or when all quotas get * turned off. @@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo( unregister_shrinker(&qi->qi_shrinker); list_lru_destroy(&qi->qi_lru); - - if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); - qi->qi_uquotaip = NULL; /* paranoia */ - } - if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); - qi->qi_gquotaip = NULL; - } - if (qi->qi_pquotaip) { - IRELE(qi->qi_pquotaip); - qi->qi_pquotaip = NULL; - } + xfs_qm_destroy_quotainos(qi); + mutex_destroy(&qi->qi_tree_lock); mutex_destroy(&qi->qi_quotaofflock); kmem_free(qi); mp->m_quotainfo = NULL; @@ -1600,6 +1596,24 @@ error_rele: } STATIC void +xfs_qm_destroy_quotainos( + xfs_quotainfo_t *qi) +{ + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + if (qi->qi_pquotaip) { + IRELE(qi->qi_pquotaip); + qi->qi_pquotaip = NULL; + } +} + +STATIC void xfs_qm_dqfree_one( struct xfs_dquot *dqp) { diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 38d9c5861ed8..f38227a78eae 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -18,6 +18,7 @@ #include <linux/if_alg.h> #include <linux/scatterlist.h> #include <linux/types.h> +#include <linux/atomic.h> #include <net/sock.h> #include <crypto/aead.h> @@ -150,7 +151,7 @@ struct af_alg_ctx { struct crypto_wait wait; size_t used; - size_t rcvused; + atomic_t rcvused; bool more; bool merge; @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk) struct af_alg_ctx *ctx = ask->private; return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - - ctx->rcvused, 0); + atomic_read(&ctx->rcvused), 0); } /** diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9e03046d1df2..6be837c063c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -434,6 +434,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -521,6 +523,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, { return 0; } + +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -529,6 +537,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 201ab7267986..1a32e558eb11 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,7 +86,7 @@ enum cpuhp_state { CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, diff --git a/include/linux/efi.h b/include/linux/efi.h index d813f7b04da7..29fdf8029cf6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -140,11 +140,13 @@ struct efi_boot_memmap { struct capsule_info { efi_capsule_header_t header; + efi_capsule_header_t *capsule; int reset_type; long index; size_t count; size_t total_size; - phys_addr_t *pages; + struct page **pages; + phys_addr_t *phys; size_t page_bytes_remain; }; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4ff47d4a893..fe0c349684fa 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 55e672592fa9..7258cd676df4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -66,9 +66,10 @@ struct gpio_irq_chip { /** * @lock_key: * - * Per GPIO IRQ chip lockdep class. + * Per GPIO IRQ chip lockdep classes. */ struct lock_class_key *lock_key; + struct lock_class_key *request_key; /** * @parent_handler: @@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(chip, data) ({ \ - static struct lock_class_key key; \ - gpiochip_add_data_with_key(chip, data, &key); \ + static struct lock_class_key lock_key; \ + static struct lock_class_key request_key; \ + gpiochip_add_data_with_key(chip, data, &lock_key, \ + &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) #endif static inline int gpiochip_add(struct gpio_chip *chip) @@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); #ifdef CONFIG_LOCKDEP @@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, &key); + handler, type, false, + &lock_key, &request_key); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, &key); + handler, type, true, + &lock_key, &request_key); } #else static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, @@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, NULL); + handler, type, false, NULL, NULL); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 3ecef57c31e3..8e66866c11be 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -4,7 +4,7 @@ #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); -struct skb_array *tap_get_skb_array(struct file *file); +struct ptr_ring *tap_get_ptr_ring(struct file *file); #else #include <linux/err.h> #include <linux/errno.h> @@ -14,7 +14,7 @@ static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } -static inline struct skb_array *tap_get_skb_array(struct file *f) +static inline struct ptr_ring *tap_get_ptr_ring(struct file *f) { return ERR_PTR(-EINVAL); } @@ -70,7 +70,7 @@ struct tap_queue { u16 queue_index; bool enabled; struct list_head next; - struct skb_array skb_array; + struct ptr_ring ring; }; rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index bf9bdf42d577..08e66827ad8e 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -17,9 +17,14 @@ #include <uapi/linux/if_tun.h> +#define TUN_XDP_FLAG 0x1UL + #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); -struct skb_array *tun_get_skb_array(struct file *file); +struct ptr_ring *tun_get_tx_ring(struct file *file); +bool tun_is_xdp_buff(void *ptr); +void *tun_xdp_to_ptr(void *ptr); +void *tun_ptr_to_xdp(void *ptr); #else #include <linux/err.h> #include <linux/errno.h> @@ -29,9 +34,21 @@ static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } -static inline struct skb_array *tun_get_skb_array(struct file *f) +static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); } +static inline bool tun_is_xdp_buff(void *ptr) +{ + return false; +} +void *tun_xdp_to_ptr(void *ptr) +{ + return NULL; +} +void *tun_ptr_to_xdp(void *ptr) +{ + return NULL; +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ diff --git a/include/linux/irq.h b/include/linux/irq.h index e140f69163b6..a0231e96a578 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,6 +212,7 @@ struct irq_data { * mask. Applies only to affinity managed irqs. * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -233,6 +234,7 @@ enum { IRQD_MANAGED_SHUTDOWN = (1 << 23), IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), + IRQD_CAN_RESERVE = (1 << 26), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } +static inline void irqd_set_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_CAN_RESERVE; +} + +static inline void irqd_clr_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_CAN_RESERVE; +} + +static inline bool irqd_can_reserve(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_CAN_RESERVE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 39fb3700f7a9..25b33b664537 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq) } static inline void -irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) +irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) - lockdep_set_class(&desc->lock, class); + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } } #ifdef CONFIG_IRQ_PREFLOW_FASTEOI diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a34355d19546..48c7e86bb556 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -113,7 +113,7 @@ struct irq_domain_ops { unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); diff --git a/include/linux/pti.h b/include/linux/pti.h new file mode 100644 index 000000000000..0174883a935a --- /dev/null +++ b/include/linux/pti.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _INCLUDE_PTI_H +#define _INCLUDE_PTI_H + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +#include <asm/pti.h> +#else +static inline void pti_init(void) { } +#endif + +#endif diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index dd5d69323701..6dfda97a6c1a 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -14,7 +14,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif diff --git a/include/linux/tick.h b/include/linux/tick.h index f442d1a42025..7cc35921218e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); +extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 04af640ea95b..2448f9cc48a3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8ac4d5cdbfed..02369e379d35 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -993,7 +993,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *t); -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 13223396dc64..f96391e84a8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -146,7 +146,7 @@ struct vxlanhdr_gpe { np_applied:1, instance_applied:1, version:2, -reserved_flags2:2; + reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 87b7529fcdfe..f8cb5760ea4f 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include <linux/types.h> +#include <linux/libc-compat.h> /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -150,11 +151,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 719d243471f4..2e522835a4af 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -18,12 +18,17 @@ #define MACSEC_GENL_NAME "macsec" #define MACSEC_GENL_VERSION 1 -#define MACSEC_MAX_KEY_LEN 128 +#define MACSEC_MAX_KEY_LEN 256 #define MACSEC_KEYID_LEN 16 -#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL -#define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL +/* cipher IDs as per IEEE802.1AEbn-2011 */ +#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL +#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL + +#define MACSEC_DEFAULT_CIPHER_ID MACSEC_CIPHER_ID_GCM_AES_128 +/* deprecated cipher ID for GCM-AES-128 */ +#define MACSEC_DEFAULT_CIPHER_ALT 0x0080020001000001ULL #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 282875cf8056..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -168,47 +168,106 @@ /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything - * that we need. */ + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ #else /* !defined(__GLIBC__) */ /* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF #define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP #define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ #define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ #define __UAPI_DEF_IF_IFREQ 1 +#endif /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR #define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO #define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO #define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ #define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN #define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS #define __UAPI_DEF_IN_CLASS 1 +#endif /* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR #define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT #define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 #define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ #define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 #define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS #define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO #define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO #define __UAPI_DEF_IP6_MTUINFO 1 +#endif /* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX #define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA #define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF #define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif /* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 +#endif #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index fc8c15a24a43..9574bd40870b 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -36,7 +36,7 @@ enum ip_conntrack_info { #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << 6) /* Bitset representing status of connection. */ enum ip_conntrack_status { diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 35f79d1f8c3a..14bacc7e6cef 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -117,10 +117,9 @@ static inline unsigned int tipc_node(__u32 addr) /* * Publication scopes when binding port names and port name sequences */ - -#define TIPC_ZONE_SCOPE 1 -#define TIPC_CLUSTER_SCOPE 2 -#define TIPC_NODE_SCOPE 3 +#define TIPC_ZONE_SCOPE 1 +#define TIPC_CLUSTER_SCOPE 2 +#define TIPC_NODE_SCOPE 3 /* * Limiting values for messages diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index fc353b518288..5de6ed37695b 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,8 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ + #ifndef VIRTIO_NET_NO_LEGACY #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ #endif /* VIRTIO_NET_NO_LEGACY */ @@ -76,6 +78,17 @@ struct virtio_net_config { __u16 max_virtqueue_pairs; /* Default maximum transmit unit advice */ __u16 mtu; + /* + * speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Any other value stands for unknown. + */ + __u32 speed; + /* + * 0x00 - half duplex + * 0x01 - full duplex + * Any other value stands for unknown. + */ + __u8 duplex; } __attribute__((packed)); /* diff --git a/init/Kconfig b/init/Kconfig index 2934249fba46..690a381adee0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -461,10 +461,14 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" + default y help Make sure that CPUs running critical tasks are not disturbed by any source of "noise" such as unbound workqueues, timers, kthreads... - Unbound jobs get offloaded to housekeeping CPUs. + Unbound jobs get offloaded to housekeeping CPUs. This is driven by + the "isolcpus=" boot parameter. + + Say Y if unsure. source "kernel/rcu/Kconfig" diff --git a/init/main.c b/init/main.c index 7b606fc48482..a8100b954839 100644 --- a/init/main.c +++ b/init/main.c @@ -75,6 +75,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <linux/ptrace.h> +#include <linux/pti.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/sched_clock.h> @@ -506,6 +507,8 @@ static void __init mm_init(void) ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); + /* Should be run after espfix64 is set up. */ + pti_init(); } asmlinkage __visible void __init start_kernel(void) diff --git a/kernel/acct.c b/kernel/acct.c index d15c0ee4d955..addf7732fb56 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -102,7 +102,7 @@ static int check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_before_jiffies(acct->needcheck)) + if (time_is_after_jiffies(acct->needcheck)) goto out; /* May block */ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 01aaef1a77c5..5bb5e49ef4c3 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -368,7 +368,45 @@ out: putname(pname); return ret; } -EXPORT_SYMBOL_GPL(bpf_obj_get_user); + +static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + int ret = inode_permission(inode, MAY_READ | MAY_WRITE); + if (ret) + return ERR_PTR(ret); + + if (inode->i_op == &bpf_map_iops) + return ERR_PTR(-EINVAL); + if (inode->i_op != &bpf_prog_iops) + return ERR_PTR(-EACCES); + + prog = inode->i_private; + + ret = security_bpf_prog(prog); + if (ret < 0) + return ERR_PTR(ret); + + if (!bpf_prog_get_ok(prog, &type, false)) + return ERR_PTR(-EINVAL); + + return bpf_prog_inc(prog); +} + +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + struct path path; + int ret = kern_path(name, LOOKUP_FOLLOW, &path); + if (ret) + return ERR_PTR(ret); + prog = __get_prog_inode(d_backing_inode(path.dentry), type); + if (!IS_ERR(prog)) + touch_atime(&path); + path_put(&path); + return prog; +} +EXPORT_SYMBOL(bpf_prog_get_type_path); static void bpf_evict_inode(struct inode *inode) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ebf0fb23e237..2bac0dc8baba 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1068,7 +1068,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static bool bpf_prog_get_ok(struct bpf_prog *prog, +bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv) { /* not an attachment, just a refcount inc, always allow */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 024085daab1a..a2c05d2476ac 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -123,7 +123,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) */ do { css_task_iter_start(&from->self, 0, &it); - task = css_task_iter_next(&it); + + do { + task = css_task_iter_next(&it); + } while (task && (task->flags & PF_EXITING)); + if (task) get_task_struct(task); css_task_iter_end(&it); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0b1ffe147f24..2cf06c274e4c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, cft->name); else - strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX); + strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX); return buf; } @@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) root->flags = opts->flags; if (opts->release_agent) - strcpy(root->release_agent_path, opts->release_agent); + strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX); if (opts->name) - strcpy(root->name, opts->name); + strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); if (opts->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } @@ -4125,26 +4125,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) static void css_task_iter_advance(struct css_task_iter *it) { - struct list_head *l = it->task_pos; + struct list_head *next; lockdep_assert_held(&css_set_lock); - WARN_ON_ONCE(!l); - repeat: /* * Advance iterator to find next entry. cset->tasks is consumed * first and then ->mg_tasks. After ->mg_tasks, we move onto the * next cset. */ - l = l->next; + next = it->task_pos->next; - if (l == it->tasks_head) - l = it->mg_tasks_head->next; + if (next == it->tasks_head) + next = it->mg_tasks_head->next; - if (l == it->mg_tasks_head) + if (next == it->mg_tasks_head) css_task_iter_advance_css_set(it); else - it->task_pos = l; + it->task_pos = next; /* if PROCS, skip over tasks which aren't group leaders */ if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && diff --git a/kernel/cpu.c b/kernel/cpu.c index 41376c3ac93b..53f7dc65f9a3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); -static void inline cpuhp_lock_acquire(bool bringup) +static inline void cpuhp_lock_acquire(bool bringup) { lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } -static void inline cpuhp_lock_release(bool bringup) +static inline void cpuhp_lock_release(bool bringup) { lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } #else -static void inline cpuhp_lock_acquire(bool bringup) { } -static void inline cpuhp_lock_release(bool bringup) { } +static inline void cpuhp_lock_acquire(bool bringup) { } +static inline void cpuhp_lock_release(bool bringup) { } #endif @@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ - [CPUHP_TIMERS_DEAD] = { + [CPUHP_TIMERS_PREPARE] = { .name = "timers:dead", - .startup.single = NULL, + .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, /* Kicks the plugged cpu into life */ diff --git a/kernel/exit.c b/kernel/exit.c index df0c91d5606c..995453d9fb55 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1763,3 +1763,4 @@ __weak void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } +EXPORT_SYMBOL(abort); diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index 17f05ef8f575..e4d3819a91cc 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h @@ -12,6 +12,11 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + if (!__ratelimit(&ratelimit)) + return; + printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); printk("->handle_irq(): %p, ", desc->handle_irq); diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 7f608ac39653..acfaaef8672a 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c26c5bb6b491..508c03dfef25 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); /* - * Separate lockdep class for interrupt chip which can nest irq_desc - * lock. + * Separate lockdep classes for interrupt chip which can nest irq_desc + * lock and request mutex. */ static struct lock_class_key irq_nested_lock_class; +static struct lock_class_key irq_nested_request_class; /* * irq_map_generic_chip - Map a generic chip for an irq domain @@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, set_bit(idx, &gc->installed); if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(virq, &irq_nested_lock_class); + irq_set_lockdep_class(virq, &irq_nested_lock_class, + &irq_nested_request_class); if (chip->irq_calc_mask) chip->irq_calc_mask(data); @@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(i, &irq_nested_lock_class); + irq_set_lockdep_class(i, &irq_nested_lock_class, + &irq_nested_request_class); if (!(flags & IRQ_GC_NO_MASK)) { struct irq_data *d = irq_get_irq_data(i); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 07d08ca701ec..ab19371eab9b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) -static inline int irq_domain_activate_irq(struct irq_data *data, bool early) +static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) { irqd_set_activated(data); return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4f4f60015e8a..62068ad46930 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data) } } -static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) +static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) { int ret = 0; @@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) if (irqd->parent_data) ret = __irq_domain_activate_irq(irqd->parent_data, - early); + reserve); if (!ret && domain->ops->activate) { - ret = domain->ops->activate(domain, irqd, early); + ret = domain->ops->activate(domain, irqd, reserve); /* Rollback in case of error */ if (ret && irqd->parent_data) __irq_domain_deactivate_irq(irqd->parent_data); @@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt - * @irq_data: outermost irq_data associated with interrupt + * @irq_data: Outermost irq_data associated with interrupt + * @reserve: If set only reserve an interrupt vector instead of assigning one * * This is the second step to call domain_ops->activate to program interrupt * controllers, so the interrupt could actually get delivered. */ -int irq_domain_activate_irq(struct irq_data *irq_data, bool early) +int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) { int ret = 0; if (!irqd_is_activated(irq_data)) - ret = __irq_domain_activate_irq(irq_data, early); + ret = __irq_domain_activate_irq(irq_data, reserve); if (!ret) irqd_set_activated(irq_data); return ret; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index edb987b2c58d..2f3c4f5382cc 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, return ret; } +/* + * Carefully check whether the device can use reservation mode. If + * reservation mode is enabled then the early activation will assign a + * dummy vector to the device. If the PCI/MSI device does not support + * masking of the entry then this can result in spurious interrupts when + * the device driver is not absolutely careful. But even then a malfunction + * of the hardware could result in a spurious interrupt on the dummy vector + * and render the device unusable. If the entry can be masked then the core + * logic will prevent the spurious interrupt and reservation mode can be + * used. For now reservation mode is restricted to PCI/MSI. + */ +static bool msi_check_reservation_mode(struct irq_domain *domain, + struct msi_domain_info *info, + struct device *dev) +{ + struct msi_desc *desc; + + if (domain->bus_token != DOMAIN_BUS_PCI_MSI) + return false; + + if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) + return false; + + if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask) + return false; + + /* + * Checking the first MSI descriptor is sufficient. MSIX supports + * masking and MSI does so when the maskbit is set. + */ + desc = first_msi_entry(dev); + return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit; +} + /** * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain * @domain: The domain to allocate from @@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - msi_alloc_info_t arg; + struct irq_data *irq_data; struct msi_desc *desc; + msi_alloc_info_t arg; int i, ret, virq; + bool can_reserve; ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); if (ret) @@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, if (ops->msi_finish) ops->msi_finish(&arg, 0); + can_reserve = msi_check_reservation_mode(domain, info, dev); + for_each_msi_entry(desc, dev) { virq = desc->irq; if (desc->nvec_used == 1) @@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, * the MSI entries before the PCI layer enables MSI in the * card. Otherwise the card latches a random msi message. */ - if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { - struct irq_data *irq_data; + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) + continue; + irq_data = irq_domain_get_irq_data(domain, desc->irq); + if (!can_reserve) + irqd_clr_can_reserve(irq_data); + ret = irq_domain_activate_irq(irq_data, can_reserve); + if (ret) + goto cleanup; + } + + /* + * If these interrupts use reservation mode, clear the activated bit + * so request_irq() will assign the final vector. + */ + if (can_reserve) { + for_each_msi_entry(desc, dev) { irq_data = irq_domain_get_irq_data(domain, desc->irq); - ret = irq_domain_activate_irq(irq_data, true); - if (ret) - goto cleanup; - if (info->flags & MSI_FLAG_MUST_REACTIVATE) - irqd_clr_activated(irq_data); + irqd_clr_activated(irq_data); } } return 0; diff --git a/kernel/pid.c b/kernel/pid.c index b13b624e2c49..1e8bb6550ec4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -193,10 +193,8 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) { - disable_pid_allocation(ns); + if (pid_ns_prepare_proc(ns)) goto out_free; - } } get_pid_ns(ns); @@ -226,6 +224,10 @@ out_free: while (++i <= ns->level) idr_remove(&ns->idr, (pid->numbers + i)->nr); + /* On failure to allocate the first pid, reset the state */ + if (ns->pid_allocated == PIDNS_ADDING) + idr_set_cursor(&ns->idr, 0); + spin_unlock_irq(&pidmap_lock); kmem_cache_free(ns->pid_cachep, pid); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2f52ec0f1539..d6717a3331a1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, #ifdef CONFIG_NO_HZ_COMMON static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { - unsigned long idle_calls = tick_nohz_get_idle_calls(); + unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); bool ret = idle_calls == sg_cpu->saved_idle_calls; sg_cpu->saved_idle_calls = idle_calls; diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index e776fc8cc1df..f6b5f19223d6 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -95,6 +95,7 @@ config NO_HZ_FULL select RCU_NOCB_CPU select VIRT_CPU_ACCOUNTING_GEN select IRQ_WORK + select CPU_ISOLATION help Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99578f06c8d4..f7cc7abfcf25 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) ts->next_tick = 0; } +static inline bool local_timer_softirq_pending(void) +{ + return local_softirq_pending() & TIMER_SOFTIRQ; +} + static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now, int cpu) { @@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); ts->last_jiffies = basejiff; - if (rcu_needs_cpu(basemono, &next_rcu) || - arch_needs_cpu() || irq_work_needs_cpu()) { + /* + * Keep the periodic tick, when RCU, architecture or irq_work + * requests it. + * Aside of that check whether the local timer softirq is + * pending. If so its a bad idea to call get_next_timer_interrupt() + * because there is an already expired timer, so it will request + * immeditate expiry, which rearms the hardware timer with a + * minimal delta which brings us back to this place + * immediately. Lather, rinse and repeat... + */ + if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* @@ -986,6 +1001,19 @@ ktime_t tick_nohz_get_sleep_length(void) } /** + * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value + * for a particular CPU. + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls_cpu(int cpu) +{ + struct tick_sched *ts = tick_get_tick_sched(cpu); + + return ts->idle_calls; +} + +/** * tick_nohz_get_idle_calls - return the current idle calls counter value * * Called from the schedutil frequency scaling governor in scheduler context. diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ffebcf878fba..89a9e1b4264a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); return base; } @@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = this_cpu_ptr(&timer_bases[BASE_DEF]); return base; } @@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option if (!ret && (options & MOD_TIMER_PENDING_ONLY)) goto out_unlock; - debug_activate(timer, expires); - new_base = get_target_base(base, timer->flags); if (base != new_base) { @@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option } } + debug_activate(timer, expires); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) base->must_forward_clk = false; __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } @@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } +int timers_prepare_cpu(unsigned int cpu) +{ + struct timer_base *base; + int b; + + for (b = 0; b < NR_BASES; b++) { + base = per_cpu_ptr(&timer_bases[b], cpu); + base->clk = jiffies; + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->is_idle = false; + base->must_forward_clk = true; + } + return 0; +} + int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index c3e84edc47c9..2615074d3de5 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; - int i; + size_t len; + int i, j; for (i = 0; i < env->envp_idx;) { if (strncmp(env->envp[i], modalias_prefix, @@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env) continue; } - if (i != env->envp_idx - 1) - memmove(&env->envp[i], &env->envp[i + 1], - sizeof(env->envp[i]) * env->envp_idx - 1); + len = strlen(env->envp[i]) + 1; + + if (i != env->envp_idx - 1) { + memmove(env->envp[i], env->envp[i + 1], + env->buflen - len); + + for (j = i; j < env->envp_idx - 1; j++) + env->envp[j] = env->envp[j + 1] - len; + } env->envp_idx--; + env->buflen -= len; } } diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 57fd45ab7af1..08c60d10747f 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -671,7 +671,23 @@ do { \ ************** MIPS/64 ************** ***************************************/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ + : "=d" ((UDItype)(w0)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ + : "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ diff --git a/lib/timerqueue.c b/lib/timerqueue.c index 4a720ed4fdaf..0d54bcbc8170 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -33,8 +33,9 @@ * @head: head of timerqueue * @node: timer node to be added * - * Adds the timer node to the timerqueue, sorted by the - * node's expires value. + * Adds the timer node to the timerqueue, sorted by the node's expires + * value. Returns true if the newly added timer is the first expiring timer in + * the queue. */ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { @@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add); * @head: head of timerqueue * @node: timer node to be removed * - * Removes the timer node from the timerqueue. + * Removes the timer node from the timerqueue. Returns true if the queue is + * not empty after the remove. */ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) { diff --git a/mm/debug.c b/mm/debug.c index d947f3e03b0d..56e2d9125ea5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason) */ int mapcount = PageSlab(page) ? 0 : page_mapcount(page); - pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", + pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx", page, page_ref_count(page), mapcount, page->mapping, page_to_pgoff(page)); if (PageCompound(page)) @@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason) #ifdef CONFIG_MEMCG if (page->mem_cgroup) - pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); + pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup); #endif } @@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page); void dump_vma(const struct vm_area_struct *vma) { - pr_emerg("vma %p start %p end %p\n" - "next %p prev %p mm %p\n" - "prot %lx anon_vma %p vm_ops %p\n" - "pgoff %lx file %p private_data %p\n" + pr_emerg("vma %px start %px end %px\n" + "next %px prev %px mm %px\n" + "prot %lx anon_vma %px vm_ops %px\n" + "pgoff %lx file %px private_data %px\n" "flags: %#lx(%pGv)\n", vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, vma->vm_prev, vma->vm_mm, @@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma); void dump_mm(const struct mm_struct *mm) { - pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" + pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n" #ifdef CONFIG_MMU - "get_unmapped_area %p\n" + "get_unmapped_area %px\n" #endif "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" - "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" + "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" "start_code %lx end_code %lx start_data %lx end_data %lx\n" "start_brk %lx brk %lx start_stack %lx\n" "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" - "binfmt %p flags %lx core_state %p\n" + "binfmt %px flags %lx core_state %px\n" #ifdef CONFIG_AIO - "ioctx_table %p\n" + "ioctx_table %px\n" #endif #ifdef CONFIG_MEMCG - "owner %p " + "owner %px " #endif - "exe_file %p\n" + "exe_file %px\n" #ifdef CONFIG_MMU_NOTIFIER - "mmu_notifier_mm %p\n" + "mmu_notifier_mm %px\n" #endif #ifdef CONFIG_NUMA_BALANCING "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" diff --git a/mm/mprotect.c b/mm/mprotect.c index ec39f730a0bf..58b629bb70de 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, next = pmd_addr_end(addr, end); if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) && pmd_none_or_clear_bad(pmd)) - continue; + goto next; /* invoke the mmu notifier if the pmd is populated */ if (!mni_start) { @@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, } /* huge pmd was handled */ - continue; + goto next; } } /* fall through, the trans huge pmd just split */ @@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, this_pages = change_pte_range(vma, pmd, addr, next, newprot, dirty_accountable, prot_numa); pages += this_pages; +next: + cond_resched(); } while (pmd++, addr = next, addr != end); if (mni_start) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e5e775e97f4..76c9688b6a0a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6260,6 +6260,8 @@ void __paginginit zero_resv_unavail(void) pgcnt = 0; for_each_resv_unavail_range(i, &start, &end) { for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { + if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) + continue; mm_zero_struct_page(pfn_to_page(pfn)); pgcnt++; } diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..2609aba121e8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) if (unlikely(!mem_section)) { unsigned long size, align; - size = sizeof(struct mem_section) * NR_SECTION_ROOTS; + size = sizeof(struct mem_section*) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); mem_section = memblock_virt_alloc(size, align); } diff --git a/mm/vmscan.c b/mm/vmscan.c index c02c850ea349..47d5ced51f2d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + if (!shrinker->nr_deferred) + return; down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 685049a9048d..683c0651098c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -53,6 +53,7 @@ #include <linux/mount.h> #include <linux/migrate.h> #include <linux/pagemap.h> +#include <linux/fs.h> #define ZSPAGE_MAGIC 0x58 diff --git a/net/core/dev.c b/net/core/dev.c index d7925ef8743d..5cb782f074d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name); int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - return dev_alloc_name_ns(net, dev, name); + BUG_ON(!net); + + if (!dev_valid_name(name)) + return -EINVAL; + + if (strchr(name, '%')) + return dev_alloc_name_ns(net, dev, name); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (dev->name != name) + strlcpy(dev->name, name, IFNAMSIZ); + + return 0; } EXPORT_SYMBOL(dev_get_valid_name); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 50a79203043b..107b122c8969 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -771,15 +771,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -806,10 +797,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; @@ -1704,14 +1693,23 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_ringparam ringparam; + struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; - if (!dev->ethtool_ops->set_ringparam) + if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) return -EFAULT; + dev->ethtool_ops->get_ringparam(dev, &max); + + /* ensure new ring parameters are within the maximums */ + if (ringparam.rx_pending > max.rx_max_pending || + ringparam.rx_mini_pending > max.rx_mini_max_pending || + ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending || + ringparam.tx_pending > max.tx_max_pending) + return -EINVAL; + return dev->ethtool_ops->set_ringparam(dev, &ringparam); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5421a3fd3ba1..16d644a4f974 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1759,18 +1759,18 @@ static bool link_dump_filtered(struct net_device *dev, return false; } -static struct net *get_target_net(struct sk_buff *skb, int netnsid) +static struct net *get_target_net(struct sock *sk, int netnsid) { struct net *net; - net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + net = get_net_ns_by_id(sock_net(sk), netnsid); if (!net) return ERR_PTR(-EINVAL); /* For now, the caller is required to have CAP_NET_ADMIN in * the user namespace owning the target net ns. */ - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EACCES); } @@ -1811,7 +1811,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) ifla_policy, NULL) >= 0) { if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(skb->sk, netnsid); if (IS_ERR(tgt_net)) { tgt_net = net; netnsid = -1; @@ -2993,7 +2993,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); if (IS_ERR(tgt_net)) return PTR_ERR(tgt_net); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index edda5ad3b405..b5f19703fca6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1243,23 +1243,28 @@ out: * If fib6_add_1 has cleared the old leaf pointer in the * super-tree leaf node we have to find a new one for it. */ - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, - lockdep_is_held(&table->tb6_lock)); - if (pn != fn && pn_leaf == rt) { - pn_leaf = NULL; - RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); - } - if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); - pn_leaf = info->nl_net->ipv6.ip6_null_entry; + if (pn != fn) { + struct rt6_info *pn_leaf = + rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (pn_leaf == rt) { + pn_leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); + atomic_dec(&rt->rt6i_ref); } + if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn_leaf = fib6_find_prefix(info->nl_net, table, + pn); +#if RT6_DEBUG >= 2 + if (!pn_leaf) { + WARN_ON(!pn_leaf); + pn_leaf = + info->nl_net->ipv6.ip6_null_entry; + } #endif - atomic_inc(&pn_leaf->rt6i_ref); - rcu_assign_pointer(pn->leaf, pn_leaf); + atomic_inc(&pn_leaf->rt6i_ref); + rcu_assign_pointer(pn->leaf, pn_leaf); + } } #endif goto failure; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8a4610e84e58..8071f42cd8a0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1077,10 +1077,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } @@ -1679,11 +1680,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); - if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < ETH_MIN_MTU) + if (tnl->parms.proto == IPPROTO_IPV6) { + if (new_mtu < IPV6_MIN_MTU) return -EINVAL; } else { - if (new_mtu < IPV6_MIN_MTU) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } if (new_mtu > 0xFFF8 - dev->hard_header_len) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 825b8e01f947..ba3767ef5e93 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action) struct seg6_action_desc *desc; int i, count; - count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc); + count = ARRAY_SIZE(seg6_action_table); for (i = 0; i < count; i++) { desc = &seg6_action_table[i]; if (desc->action == action) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b3cff69bfd66..fd580614085b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3625,6 +3625,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) } return true; case NL80211_IFTYPE_MESH_POINT: + if (ether_addr_equal(sdata->vif.addr, hdr->addr2)) + return false; if (multicast) return true; return ether_addr_equal(sdata->vif.addr, hdr->addr1); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index db0933256ec9..336b81689ac9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2107,7 +2107,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain[0] && + if (ctx && ctx->chain && strcmp(ctx->chain, chain->name) != 0) continue; @@ -4700,8 +4700,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_filter *filter = cb->data; - kfree(filter->table); - kfree(filter); + if (filter) { + kfree(filter->table); + kfree(filter); + } return 0; } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 1f7fbd3c7e5a..06b090d8e901 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -55,21 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) { - mm_segment_t oldfs = get_fs(); - int retval, fd; - if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) return -EINVAL; - set_fs(KERNEL_DS); - fd = bpf_obj_get_user(path, 0); - set_fs(oldfs); - if (fd < 0) - return fd; - - retval = __bpf_mt_check_fd(fd, ret); - sys_close(fd); - return retval; + *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); + return PTR_ERR_OR_ZERO(*ret); } static int bpf_mt_check(const struct xt_mtchk_param *par) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index bc2f1e0977d6..634cfcb7bba6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], @@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 9d632e92cad0..b56986d41c87 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 37e5e4decbd6..e6ff88f72900 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -231,7 +231,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sctp/input.c b/net/sctp/input.c index 621b5ca3fd1c..141c9c466ec1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; } - if (t->param_flags & SPP_PMTUD_ENABLE) { - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); - - /* Update association pmtu. */ - sctp_assoc_sync_pmtu(asoc); - } + if (!(t->param_flags & SPP_PMTUD_ENABLE)) + /* We can't allow retransmitting in such case, as the + * retransmission would be sized just as before, and thus we + * would get another icmp, and retransmit again. + */ + return; - /* Retransmit with the new pmtu setting. - * Normally, if PMTU discovery is disabled, an ICMP Fragmentation - * Needed will never be sent, but if a message was sent before - * PMTU discovery was disabled that was larger than the PMTU, it - * would not be fragmented, so it must be re-transmitted fragmented. + /* Update transports view of the MTU. Return if no update was needed. + * If an update wasn't needed/possible, it also doesn't make sense to + * try to retransmit now. */ + if (!sctp_transport_update_pmtu(t, pmtu)) + return; + + /* Update association pmtu. */ + sctp_assoc_sync_pmtu(asoc); + + /* Retransmit with the new pmtu setting. */ sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 06b644dd858c..cedf672487f9 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, sctp_stream_outq_migrate(stream, NULL, outcnt); sched->sched_all(stream); - i = sctp_stream_alloc_out(stream, outcnt, gfp); - if (i) - return i; + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) + goto out; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -171,19 +171,17 @@ in: if (!incnt) goto out; - i = sctp_stream_alloc_in(stream, incnt, gfp); - if (i) { - ret = -ENOMEM; - goto free; + ret = sctp_stream_alloc_in(stream, incnt, gfp); + if (ret) { + sched->free(stream); + kfree(stream->out); + stream->out = NULL; + stream->outcnt = 0; + goto out; } stream->incnt = incnt; - goto out; -free: - sched->free(stream); - kfree(stream->out); - stream->out = NULL; out: return ret; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1e5a22430cf5..47f82bd794d9 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - } else { - t->pathmtu = pmtu; + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment instead */ + pmtu = SCTP_DEFAULT_MINSEGMENT; } + pmtu = SCTP_TRUNC4(pmtu); if (dst) { dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); } - if (!dst) + if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + dst = t->dst; + } + + if (dst) { + /* Re-fetch, as under layers may have a higher minimum size */ + pmtu = SCTP_TRUNC4(dst_mtu(dst)); + change = t->pathmtu != pmtu; + } + t->pathmtu = pmtu; + + return change; } /* Caches the dst entry and source address for a transport's destination diff --git a/net/tipc/group.c b/net/tipc/group.c index fb7fe971e51b..497ee34bfab9 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -49,8 +49,6 @@ #define ADV_ACTIVE (ADV_UNIT * 12) enum mbr_state { - MBR_QUARANTINED, - MBR_DISCOVERED, MBR_JOINING, MBR_PUBLISHED, MBR_JOINED, @@ -65,7 +63,6 @@ struct tipc_member { struct rb_node tree_node; struct list_head list; struct list_head small_win; - struct sk_buff *event_msg; struct sk_buff_head deferredq; struct tipc_group *group; u32 node; @@ -77,7 +74,6 @@ struct tipc_member { u16 bc_rcv_nxt; u16 bc_syncpt; u16 bc_acked; - bool usr_pending; }; struct tipc_group { @@ -85,13 +81,11 @@ struct tipc_group { struct list_head small_win; struct list_head pending; struct list_head active; - struct list_head reclaiming; struct tipc_nlist dests; struct net *net; int subid; u32 type; u32 instance; - u32 domain; u32 scope; u32 portid; u16 member_cnt; @@ -101,15 +95,32 @@ struct tipc_group { u16 bc_ackers; bool loopback; bool events; + bool open; }; static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); +bool tipc_group_is_open(struct tipc_group *grp) +{ + return grp->open; +} + +static void tipc_group_open(struct tipc_member *m, bool *wakeup) +{ + *wakeup = false; + if (list_empty(&m->small_win)) + return; + list_del_init(&m->small_win); + m->group->open = true; + *wakeup = true; +} + static void tipc_group_decr_active(struct tipc_group *grp, struct tipc_member *m) { - if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) grp->active_cnt--; } @@ -138,12 +149,12 @@ u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) static bool tipc_group_is_receiver(struct tipc_member *m) { - return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING; + return m && m->state != MBR_JOINING && m->state != MBR_LEAVING; } static bool tipc_group_is_sender(struct tipc_member *m) { - return m && m->state >= MBR_JOINED; + return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED; } u32 tipc_group_exclude(struct tipc_group *grp) @@ -161,6 +172,8 @@ int tipc_group_size(struct tipc_group *grp) struct tipc_group *tipc_group_create(struct net *net, u32 portid, struct tipc_group_req *mreq) { + u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; + bool global = mreq->scope != TIPC_NODE_SCOPE; struct tipc_group *grp; u32 type = mreq->type; @@ -171,22 +184,37 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, INIT_LIST_HEAD(&grp->small_win); INIT_LIST_HEAD(&grp->active); INIT_LIST_HEAD(&grp->pending); - INIT_LIST_HEAD(&grp->reclaiming); grp->members = RB_ROOT; grp->net = net; grp->portid = portid; - grp->domain = addr_domain(net, mreq->scope); grp->type = type; grp->instance = mreq->instance; grp->scope = mreq->scope; grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; - if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid)) + filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, + filter, &grp->subid)) return grp; kfree(grp); return NULL; } +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + skb_queue_head_init(&xmitq); + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); + tipc_group_update_member(m, 0); + } + tipc_node_distr_xmit(net, &xmitq); + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + void tipc_group_delete(struct net *net, struct tipc_group *grp) { struct rb_root *tree = &grp->members; @@ -277,7 +305,7 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 node, u32 port, - int state) + u32 instance, int state) { struct tipc_member *m; @@ -290,6 +318,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->group = grp; m->node = node; m->port = port; + m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; grp->member_cnt++; tipc_group_add_to_tree(grp, m); @@ -298,9 +327,10 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, return m; } -void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port) +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance) { - tipc_group_create_member(grp, node, port, MBR_DISCOVERED); + tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED); } static void tipc_group_delete_member(struct tipc_group *grp, @@ -391,20 +421,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int adv, state; m = tipc_group_find_dest(grp, dnode, dport); - *mbr = m; - if (!m) + if (!tipc_group_is_receiver(m)) { + *mbr = NULL; return false; - if (m->usr_pending) - return true; + } + *mbr = m; + if (m->window >= len) return false; - m->usr_pending = true; + + grp->open = false; /* If not fully advertised, do it now to prevent mutual blocking */ adv = m->advertised; state = m->state; - if (state < MBR_JOINED) - return true; if (state == MBR_JOINED && adv == ADV_IDLE) return true; if (state == MBR_ACTIVE && adv == ADV_ACTIVE) @@ -422,9 +452,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) struct tipc_member *m = NULL; /* If prev bcast was replicast, reject until all receivers have acked */ - if (grp->bc_ackers) + if (grp->bc_ackers) { + grp->open = false; return true; - + } if (list_empty(&grp->small_win)) return false; @@ -560,7 +591,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, int max_active = grp->max_active; int reclaim_limit = max_active * 3 / 4; int active_cnt = grp->active_cnt; - struct tipc_member *m, *rm; + struct tipc_member *m, *rm, *pm; m = tipc_group_find_member(grp, node, port); if (!m) @@ -570,24 +601,34 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, switch (m->state) { case MBR_JOINED: - /* Reclaim advertised space from least active member */ - if (!list_empty(active) && active_cnt >= reclaim_limit) { + /* First, decide if member can go active */ + if (active_cnt <= max_active) { + m->state = MBR_ACTIVE; + list_add_tail(&m->list, active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } else { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + } + + if (active_cnt < reclaim_limit) + break; + + /* Reclaim from oldest active member, if possible */ + if (!list_empty(active)) { rm = list_first_entry(active, struct tipc_member, list); rm->state = MBR_RECLAIMING; - list_move_tail(&rm->list, &grp->reclaiming); + list_del_init(&rm->list); tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); - } - /* If max active, become pending and wait for reclaimed space */ - if (active_cnt >= max_active) { - m->state = MBR_PENDING; - list_add_tail(&m->list, &grp->pending); break; } - /* Otherwise become active */ - m->state = MBR_ACTIVE; - list_add_tail(&m->list, &grp->active); - grp->active_cnt++; - /* Fall through */ + /* Nobody to reclaim from; - revert oldest pending to JOINED */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + list_del_init(&pm->list); + pm->state = MBR_JOINED; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; case MBR_ACTIVE: if (!list_is_last(&m->list, &grp->active)) list_move_tail(&m->list, &grp->active); @@ -599,13 +640,23 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, if (m->advertised > ADV_IDLE) break; m->state = MBR_JOINED; + grp->active_cnt--; if (m->advertised < ADV_IDLE) { pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); } + + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); break; case MBR_RECLAIMING: - case MBR_DISCOVERED: case MBR_JOINING: case MBR_LEAVING: default: @@ -613,6 +664,40 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, } } +static void tipc_group_create_event(struct tipc_group *grp, + struct tipc_member *m, + u32 event, u16 seqno, + struct sk_buff_head *inputq) +{ u32 dnode = tipc_own_addr(grp->net); + struct tipc_event evt; + struct sk_buff *skb; + struct tipc_msg *hdr; + + evt.event = event; + evt.found_lower = m->instance; + evt.found_upper = m->instance; + evt.port.ref = m->port; + evt.port.node = m->node; + evt.s.seq.type = grp->type; + evt.s.seq.lower = m->instance; + evt.s.seq.upper = m->instance; + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT, + GROUP_H_SIZE, sizeof(evt), dnode, m->node, + grp->portid, m->port, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + msg_set_dest_droppable(hdr, true); + msg_set_grp_bc_seqno(hdr, seqno); + memcpy(msg_data(hdr), &evt, sizeof(evt)); + TIPC_SKB_CB(skb)->orig_member = m->instance; + __skb_queue_tail(inputq, skb); +} + static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq) { @@ -658,107 +743,95 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, u32 node = msg_orignode(hdr); u32 port = msg_origport(hdr); struct tipc_member *m, *pm; - struct tipc_msg *ehdr; u16 remitted, in_flight; if (!grp) return; + if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net)) + return; + m = tipc_group_find_member(grp, node, port); switch (msg_type(hdr)) { case GRP_JOIN_MSG: if (!m) m = tipc_group_create_member(grp, node, port, - MBR_QUARANTINED); + 0, MBR_JOINING); if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); m->bc_rcv_nxt = m->bc_syncpt; m->window += msg_adv_win(hdr); - /* Wait until PUBLISH event is received */ - if (m->state == MBR_DISCOVERED) { - m->state = MBR_JOINING; - } else if (m->state == MBR_PUBLISHED) { - m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; - tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); - ehdr = buf_msg(m->event_msg); - msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); - __skb_queue_tail(inputq, m->event_msg); - } - list_del_init(&m->small_win); + /* Wait until PUBLISH event is received if necessary */ + if (m->state != MBR_PUBLISHED) + return; + + /* Member can be taken into service */ + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); return; case GRP_LEAVE_MSG: if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); list_del_init(&m->list); - list_del_init(&m->small_win); - *usr_wakeup = true; - - /* Wait until WITHDRAW event is received */ - if (m->state != MBR_LEAVING) { - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - return; - } - /* Otherwise deliver already received WITHDRAW event */ - ehdr = buf_msg(m->event_msg); - msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); - __skb_queue_tail(inputq, m->event_msg); + tipc_group_open(m, usr_wakeup); + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); return; case GRP_ADV_MSG: if (!m) return; m->window += msg_adv_win(hdr); - *usr_wakeup = m->usr_pending; - m->usr_pending = false; - list_del_init(&m->small_win); + tipc_group_open(m, usr_wakeup); return; case GRP_ACK_MSG: if (!m) return; m->bc_acked = msg_grp_bc_acked(hdr); if (--grp->bc_ackers) - break; + return; + list_del_init(&m->small_win); + m->group->open = true; *usr_wakeup = true; - m->usr_pending = false; + tipc_group_update_member(m, 0); return; case GRP_RECLAIM_MSG: if (!m) return; - *usr_wakeup = m->usr_pending; - m->usr_pending = false; tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); m->window = ADV_IDLE; + tipc_group_open(m, usr_wakeup); return; case GRP_REMIT_MSG: if (!m || m->state != MBR_RECLAIMING) return; - list_del_init(&m->list); - grp->active_cnt--; remitted = msg_grp_remitted(hdr); /* Messages preceding the REMIT still in receive queue */ if (m->advertised > remitted) { m->state = MBR_REMITTED; in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; } - /* All messages preceding the REMIT have been read */ - if (m->advertised <= remitted) { - m->state = MBR_JOINED; - in_flight = 0; - } - /* ..and the REMIT overtaken by more messages => re-advertise */ + /* This should never happen */ if (m->advertised < remitted) - tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + pr_warn_ratelimited("Unexpected REMIT msg\n"); - m->advertised = ADV_IDLE + in_flight; + /* All messages preceding the REMIT have been read */ + m->state = MBR_JOINED; + grp->active_cnt--; + m->advertised = ADV_IDLE; /* Set oldest pending member to active and advertise */ if (list_empty(&grp->pending)) @@ -780,11 +853,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, void tipc_group_member_evt(struct tipc_group *grp, bool *usr_wakeup, int *sk_rcvbuf, - struct sk_buff *skb, + struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb); struct tipc_event *evt = (void *)msg_data(hdr); u32 instance = evt->found_lower; u32 node = evt->port.node; @@ -792,86 +864,59 @@ void tipc_group_member_evt(struct tipc_group *grp, int event = evt->event; struct tipc_member *m; struct net *net; - bool node_up; u32 self; if (!grp) - goto drop; + return; net = grp->net; self = tipc_own_addr(net); if (!grp->loopback && node == self && port == grp->portid) - goto drop; - - /* Convert message before delivery to user */ - msg_set_hdr_sz(hdr, GROUP_H_SIZE); - msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); - msg_set_type(hdr, TIPC_GRP_MEMBER_EVT); - msg_set_origport(hdr, port); - msg_set_orignode(hdr, node); - msg_set_nametype(hdr, grp->type); - msg_set_grp_evt(hdr, event); + return; m = tipc_group_find_member(grp, node, port); - if (event == TIPC_PUBLISHED) { - if (!m) - m = tipc_group_create_member(grp, node, port, - MBR_DISCOVERED); - if (!m) - goto drop; - - /* Hold back event if JOIN message not yet received */ - if (m->state == MBR_DISCOVERED) { - m->event_msg = skb; - m->state = MBR_PUBLISHED; - } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - __skb_queue_tail(inputq, skb); - m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; + switch (event) { + case TIPC_PUBLISHED: + /* Send and wait for arrival of JOIN message if necessary */ + if (!m) { + m = tipc_group_create_member(grp, node, port, instance, + MBR_PUBLISHED); + if (!m) + break; + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + break; } + + if (m->state != MBR_JOINING) + break; + + /* Member can be taken into service */ m->instance = instance; - TIPC_SKB_CB(skb)->orig_member = m->instance; - tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); tipc_group_update_member(m, 0); - } else if (event == TIPC_WITHDRAWN) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + break; + case TIPC_WITHDRAWN: if (!m) - goto drop; - - TIPC_SKB_CB(skb)->orig_member = m->instance; + break; - *usr_wakeup = true; - m->usr_pending = false; - node_up = tipc_node_is_up(net, node); - m->event_msg = NULL; - - if (node_up) { - /* Hold back event if a LEAVE msg should be expected */ - if (m->state != MBR_LEAVING) { - m->event_msg = skb; - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - __skb_queue_tail(inputq, skb); - } - } else { - if (m->state != MBR_LEAVING) { - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); - } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - } - __skb_queue_tail(inputq, skb); - } + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; list_del_init(&m->list); - list_del_init(&m->small_win); + tipc_group_open(m, usr_wakeup); + + /* Only send event if no LEAVE message can be expected */ + if (!tipc_node_is_up(net, node)) + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_rcv_nxt, inputq); + break; + default: + break; } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); - return; -drop: - kfree_skb(skb); } diff --git a/net/tipc/group.h b/net/tipc/group.h index d525e1cd7de5..f4a596ed9848 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -44,8 +44,10 @@ struct tipc_msg; struct tipc_group *tipc_group_create(struct net *net, u32 portid, struct tipc_group_req *mreq); +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); void tipc_group_delete(struct net *net, struct tipc_group *grp); -void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance); struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, int *scope); @@ -54,7 +56,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, - int *sk_rcvbuf, struct sk_buff *skb, + int *sk_rcvbuf, struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, @@ -65,9 +67,9 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int len, struct tipc_member **m); bool tipc_group_bc_cong(struct tipc_group *grp, int len); +bool tipc_group_is_open(struct tipc_group *grp); void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); void tipc_group_update_member(struct tipc_member *m, int len); -int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index b3829bcf63c7..64cdd3c302b0 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -328,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_PUBLISHED, publ->ref, - publ->node, created_subseq); + publ->node, publ->scope, + created_subseq); } return publ; } @@ -398,19 +399,21 @@ found: list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_WITHDRAWN, publ->ref, - publ->node, removed_subseq); + publ->node, publ->scope, + removed_subseq); } return publ; } /** - * tipc_nameseq_subscribe - attach a subscription, and issue - * the prescribed number of events if there is any sub- + * tipc_nameseq_subscribe - attach a subscription, and optionally + * issue the prescribed number of events if there is any sub- * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s, + bool status) { struct sub_seq *sseq = nseq->sseqs; struct tipc_name_seq ns; @@ -420,7 +423,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); - if (!sseq) + if (!status || !sseq) return; while (sseq != &nseq->sseqs[nseq->first_free]) { @@ -434,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, sseq->upper, TIPC_PUBLISHED, crs->ref, crs->node, + crs->scope, must_report); must_report = 0; } @@ -597,7 +601,7 @@ not_found: return ref; } -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, struct list_head *dsts, int *dstcnt, u32 exclude, bool all) { @@ -607,9 +611,6 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, struct name_seq *seq; struct sub_seq *sseq; - if (!tipc_in_scope(domain, self)) - return false; - *dstcnt = 0; rcu_read_lock(); seq = nametbl_find_seq(net, type); @@ -620,7 +621,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, if (likely(sseq)) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (!tipc_in_scope(domain, publ->node)) + if (publ->scope != scope) continue; if (publ->ref == exclude && publ->node == self) continue; @@ -638,13 +639,14 @@ exit: return !list_empty(dsts); } -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports) +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) { - struct name_seq *seq; - struct sub_seq *sseq; struct sub_seq *sseq_stop; struct name_info *info; + struct publication *p; + struct name_seq *seq; + struct sub_seq *sseq; int res = 0; rcu_read_lock(); @@ -656,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); sseq_stop = seq->sseqs + seq->first_free; for (; sseq != sseq_stop; sseq++) { - struct publication *publ; - if (sseq->lower > upper) break; - info = sseq->info; - list_for_each_entry(publ, &info->node_list, node_list) { - if (publ->scope <= limit) - tipc_dest_push(dports, 0, publ->ref); + list_for_each_entry(p, &info->node_list, node_list) { + if (p->scope == scope || (!exact && p->scope < scope)) + tipc_dest_push(dports, 0, p->ref); } if (info->cluster_list_size != info->node_list_size) @@ -681,7 +680,7 @@ exit: * - Determines if any node local ports overlap */ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, + u32 upper, u32 scope, struct tipc_nlist *nodes) { struct sub_seq *sseq, *stop; @@ -700,7 +699,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (tipc_in_scope(domain, publ->node)) + if (publ->scope == scope) tipc_nlist_add(nodes, publ->node); } } @@ -712,7 +711,7 @@ exit: /* tipc_nametbl_build_group - build list of communication group members */ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 domain) + u32 type, u32 scope) { struct sub_seq *sseq, *stop; struct name_info *info; @@ -730,9 +729,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, for (; sseq != stop; sseq++) { info = sseq->info; list_for_each_entry(p, &info->zone_list, zone_list) { - if (!tipc_in_scope(domain, p->node)) + if (p->scope != scope) continue; - tipc_group_add_member(grp, p->node, p->ref); + tipc_group_add_member(grp, p->node, p->ref, p->lower); } } spin_unlock_bh(&seq->lock); @@ -811,7 +810,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, /** * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct tipc_subscription *s) +void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); @@ -825,7 +824,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); - tipc_nameseq_subscribe(seq, s); + tipc_nameseq_subscribe(seq, s, status); spin_unlock_bh(&seq->lock); } else { tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 71926e429446..b595d8aa00f0 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -100,8 +100,8 @@ struct name_table { int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports); +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, @@ -121,7 +121,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 lower, u32 node, u32 ref, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s); +void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); diff --git a/net/tipc/server.c b/net/tipc/server.c index d60c30342327..8ee5e86b7870 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -489,8 +489,8 @@ void tipc_conn_terminate(struct tipc_server *s, int conid) } } -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 lower, u32 upper, int *conid) +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) { struct tipc_subscriber *scbr; struct tipc_subscr sub; @@ -501,7 +501,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, sub.seq.lower = lower; sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; - sub.filter = TIPC_SUB_PORTS; + sub.filter = filter; *(u32 *)&sub.usr_handle = port; con = tipc_alloc_conn(tipc_topsrv(net)); diff --git a/net/tipc/server.h b/net/tipc/server.h index 2113c9192633..17f49ee44cfd 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -41,6 +41,9 @@ #include <net/net_namespace.h> #define TIPC_SERVER_NAME_LEN 32 +#define TIPC_SUB_CLUSTER_SCOPE 0x20 +#define TIPC_SUB_NODE_SCOPE 0x40 +#define TIPC_SUB_NO_STATUS 0x80 /** * struct tipc_server - TIPC server structure @@ -83,8 +86,8 @@ struct tipc_server { int tipc_conn_sendmsg(struct tipc_server *s, int conid, struct sockaddr_tipc *addr, void *data, size_t len); -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 lower, u32 upper, int *conid); +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid); void tipc_topsrv_kern_unsubscr(struct net *net, int conid); /** diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b51d5cba5094..1f236271766c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -715,7 +715,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; + struct tipc_group *grp; u32 revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); @@ -736,9 +736,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - if (!grp || tipc_group_size(grp)) - if (!tsk->cong_link_cnt) - revents |= POLLOUT; + grp = tsk->group; + if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt) + revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty(&sk->sk_receive_queue)) @@ -928,21 +928,22 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; - u32 type, inst, domain; struct list_head dsts; + u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong; INIT_LIST_HEAD(&dsts); - type = dest->addr.name.name.type; + type = msg_nametype(hdr); inst = dest->addr.name.name.instance; - domain = addr_domain(net, dest->scope); + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); while (++lookups < 4) { @@ -950,7 +951,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); @@ -1079,22 +1080,23 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - struct tipc_name_seq *seq = &dest->addr.nameseq; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 domain, exclude, dstcnt; + u32 type, inst, scope, exclude; struct list_head dsts; + u32 dstcnt; INIT_LIST_HEAD(&dsts); - if (seq->lower != seq->upper) - return -ENOTSUPP; - - domain = addr_domain(net, dest->scope); + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, - &dsts, &dstcnt, exclude, true)) + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) return -EHOSTUNREACH; if (dstcnt == 1) { @@ -1116,24 +1118,29 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - u32 scope = TIPC_CLUSTER_SCOPE; u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; - u32 lower = 0, upper = ~0; - struct sk_buff_head tmpq; u32 portid, oport, onode; + struct sk_buff_head tmpq; struct list_head dports; - struct tipc_msg *msg; - int user, mtyp, hsz; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { - msg = buf_msg(skb); - user = msg_user(msg); - mtyp = msg_type(msg); + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + oport = msg_origport(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { @@ -1144,21 +1151,31 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_unlock_bh(&inputq->lock); continue; } - hsz = skb_headroom(skb) + msg_hdr_sz(msg); - oport = msg_origport(msg); - onode = msg_orignode(msg); - if (onode == self) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list and message clones: */ - if (!msg_in_group(msg)) { - lower = msg_namelower(msg); - upper = msg_nameupper(msg); + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); } - tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, - scope, &dports); + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { - _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); @@ -1933,8 +1950,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, break; case TOP_SRV: tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, - skb, inputq, xmitq); - skb = NULL; + hdr, inputq, xmitq); break; default: break; @@ -2732,7 +2748,6 @@ void tipc_sk_rht_destroy(struct net *net) static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) { struct net *net = sock_net(&tsk->sk); - u32 domain = addr_domain(net, mreq->scope); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq seq; @@ -2740,6 +2755,8 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq); @@ -2752,16 +2769,16 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) seq.type = mreq->type; seq.lower = mreq->instance; seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, domain); + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); rc = tipc_sk_publish(tsk, mreq->scope, &seq); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; } - - /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); return rc; } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 251065dfd8df..44df528ed6ab 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -118,15 +118,19 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) + u32 node, u32 scope, int must) { + u32 filter = htohl(sub->evt.s.filter, sub->swap); struct tipc_name_seq seq; tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; - if (!must && - !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) + if (!must && !(filter & TIPC_SUB_PORTS)) + return; + if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + return; + if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) return; tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, @@ -286,7 +290,8 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, } static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, int swap) + struct tipc_subscriber *subscriber, int swap, + bool status) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub = NULL; @@ -299,7 +304,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); sub->subscriber = subscriber; - tipc_nametbl_subscribe(sub); + tipc_nametbl_subscribe(sub, status); tipc_subscrb_get(subscriber); spin_unlock_bh(&subscriber->lock); @@ -323,6 +328,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscr *s = (struct tipc_subscr *)buf; + bool status; int swap; /* Determine subscriber's endianness */ @@ -334,8 +340,8 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); return tipc_subscrp_cancel(s, subscriber); } - - tipc_subscrp_subscribe(net, s, subscriber, swap); + status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap)); + tipc_subscrp_subscribe(net, s, subscriber, swap, status); } /* Handle one request to establish a new subscriber */ diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ee52957dc952..f3edca775d9f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper); void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, - u32 port_ref, u32 node, int must); + u32 port_ref, u32 node, u32 scope, int must); void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, struct tipc_name_seq *out); u32 tipc_subscrp_convert_seq_type(u32 type, int swap); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 79a9ff682b7e..c084dd2205ac 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11387,7 +11387,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb, break; case NL80211_NAN_FUNC_FOLLOW_UP: if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || - !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { err = -EINVAL; goto out; } diff --git a/security/Kconfig b/security/Kconfig index e8e449444e65..3d4debd0257e 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,6 +54,17 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on X86_64 && !UML + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pagetable-isolation.txt for more details. + config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index ed9b4d0f9f7e..8c558cbce930 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -329,6 +329,9 @@ static int match_mnt_path_str(struct aa_profile *profile, AA_BUG(!mntpath); AA_BUG(!buffer); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer, &mntpnt, &info, profile->disconnected); if (error) @@ -380,6 +383,9 @@ static int match_mnt(struct aa_profile *profile, const struct path *path, AA_BUG(!profile); AA_BUG(devpath && !devbuffer); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + if (devpath) { error = aa_path_name(devpath, path_flags(profile, devpath), devbuffer, &devname, &info, @@ -558,6 +564,9 @@ static int profile_umount(struct aa_profile *profile, struct path *path, AA_BUG(!profile); AA_BUG(!path); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + error = aa_path_name(path, path_flags(profile, path), buffer, &name, &info, profile->disconnected); if (error) @@ -613,7 +622,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, AA_BUG(!new_path); AA_BUG(!old_path); - if (profile_unconfined(profile)) + if (profile_unconfined(profile) || + !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) return aa_get_newest_label(&profile->label); error = aa_path_name(old_path, path_flags(profile, old_path), diff --git a/security/commoncap.c b/security/commoncap.c index 4f8e09340956..48620c93d697 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -348,21 +348,18 @@ static __u32 sansflags(__u32 m) return m & ~VFS_CAP_FLAGS_EFFECTIVE; } -static bool is_v2header(size_t size, __le32 magic) +static bool is_v2header(size_t size, const struct vfs_cap_data *cap) { - __u32 m = le32_to_cpu(magic); if (size != XATTR_CAPS_SZ_2) return false; - return sansflags(m) == VFS_CAP_REVISION_2; + return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2; } -static bool is_v3header(size_t size, __le32 magic) +static bool is_v3header(size_t size, const struct vfs_cap_data *cap) { - __u32 m = le32_to_cpu(magic); - if (size != XATTR_CAPS_SZ_3) return false; - return sansflags(m) == VFS_CAP_REVISION_3; + return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3; } /* @@ -405,7 +402,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; - if (is_v2header((size_t) ret, cap->magic_etc)) { + if (is_v2header((size_t) ret, cap)) { /* If this is sizeof(vfs_cap_data) then we're ok with the * on-disk value, so return that. */ if (alloc) @@ -413,7 +410,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, else kfree(tmpbuf); return ret; - } else if (!is_v3header((size_t) ret, cap->magic_etc)) { + } else if (!is_v3header((size_t) ret, cap)) { kfree(tmpbuf); return -EINVAL; } @@ -470,9 +467,9 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, return make_kuid(task_ns, rootid); } -static bool validheader(size_t size, __le32 magic) +static bool validheader(size_t size, const struct vfs_cap_data *cap) { - return is_v2header(size, magic) || is_v3header(size, magic); + return is_v2header(size, cap) || is_v3header(size, cap); } /* @@ -495,7 +492,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) if (!*ivalue) return -EINVAL; - if (!validheader(size, cap->magic_etc)) + if (!validheader(size, cap)) return -EINVAL; if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) return -EPERM; diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..d17dd9e5d516 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/perf_regs.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_S390_PERF_REGS_H +#define _ASM_S390_PERF_REGS_H + +enum perf_event_s390_regs { + PERF_REG_S390_R0, + PERF_REG_S390_R1, + PERF_REG_S390_R2, + PERF_REG_S390_R3, + PERF_REG_S390_R4, + PERF_REG_S390_R5, + PERF_REG_S390_R6, + PERF_REG_S390_R7, + PERF_REG_S390_R8, + PERF_REG_S390_R9, + PERF_REG_S390_R10, + PERF_REG_S390_R11, + PERF_REG_S390_R12, + PERF_REG_S390_R13, + PERF_REG_S390_R14, + PERF_REG_S390_R15, + PERF_REG_S390_FP0, + PERF_REG_S390_FP1, + PERF_REG_S390_FP2, + PERF_REG_S390_FP3, + PERF_REG_S390_FP4, + PERF_REG_S390_FP5, + PERF_REG_S390_FP6, + PERF_REG_S390_FP7, + PERF_REG_S390_FP8, + PERF_REG_S390_FP9, + PERF_REG_S390_FP10, + PERF_REG_S390_FP11, + PERF_REG_S390_FP12, + PERF_REG_S390_FP13, + PERF_REG_S390_FP14, + PERF_REG_S390_FP15, + PERF_REG_S390_MASK, + PERF_REG_S390_PC, + + PERF_REG_S390_MAX +}; + +#endif /* _ASM_S390_PERF_REGS_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 8acfc47af70e..540a209b78ab 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, *type = INSN_STACK; op->src.type = OP_SRC_ADD; op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_SRC_REG; + op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } break; diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 4c6b5c9ef073..91e8e19ff5e0 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv) const char *objname; argc--; argv++; + if (argc <= 0) + usage_with_options(orc_usage, check_options); + if (!strncmp(argv[0], "gen", 3)) { argc = parse_options(argc, argv, check_options, orc_usage, 0); if (argc != 1) @@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv) objname = argv[0]; return check(objname, no_fp, no_unreachable, true); - } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e5ca31429c9b..e61fe703197b 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + if (!sec) + return -1; ip_relasec = elf_create_rela_section(file->elf, sec); if (!ip_relasec) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ed65e82f034e..0294bfb6c5f8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC_NO_CLANG), 1) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -576,14 +574,15 @@ ifndef NO_GTK2 endif endif - ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL else PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) - PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index d2df54a6bc5a..bcfbaed78cc2 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -3,7 +3,7 @@ #include <stdlib.h> #include <linux/types.h> -#include <../../../../arch/s390/include/uapi/asm/perf_regs.h> +#include <asm/perf_regs.h> void perf_regs_load(u64 *regs); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 6db9d809fe97..3e64f10b6d66 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h arch/arm/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h +arch/s390/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm_perf.h diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index cf36de7ea255..0c6d1002b524 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym, } int -jvmti_write_debug_info(void *agent, uint64_t code, const char *file, - jvmti_line_info_t *li, int nr_lines) +jvmti_write_debug_info(void *agent, uint64_t code, + int nr_lines, jvmti_line_info_t *li, + const char * const * file_names) { struct jr_code_debug_info rec; - size_t sret, len, size, flen; + size_t sret, len, size, flen = 0; uint64_t addr; - const char *fn = file; FILE *fp = agent; int i; @@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, return -1; } - flen = strlen(file) + 1; + for (i = 0; i < nr_lines; ++i) { + flen += strlen(file_names[i]) + 1; + } rec.p.id = JIT_CODE_DEBUG_INFO; size = sizeof(rec); @@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, * file[] : source file name */ size += nr_lines * sizeof(struct debug_entry); - size += flen * nr_lines; + size += flen; rec.p.total_size = size; /* @@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, if (sret != 1) goto error; - sret = fwrite_unlocked(fn, flen, 1, fp); + sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp); if (sret != 1) goto error; } diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h index fe32d8344a82..6ed82f6c06dd 100644 --- a/tools/perf/jvmti/jvmti_agent.h +++ b/tools/perf/jvmti/jvmti_agent.h @@ -14,6 +14,7 @@ typedef struct { unsigned long pc; int line_number; int discrim; /* discriminator -- 0 for now */ + jmethodID methodID; } jvmti_line_info_t; void *jvmti_open(void); @@ -22,11 +23,9 @@ int jvmti_write_code(void *agent, char const *symbol_name, uint64_t vma, void const *code, const unsigned int code_size); -int jvmti_write_debug_info(void *agent, - uint64_t code, - const char *file, +int jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines, jvmti_line_info_t *li, - int nr_lines); + const char * const * file_names); #if defined(__cplusplus) } diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index c62c9fc9a525..6add3e982614 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, tab[lines].pc = (unsigned long)pc; tab[lines].line_number = loc_tab[i].line_number; tab[lines].discrim = 0; /* not yet used */ + tab[lines].methodID = m; lines++; } else { break; @@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t ** return JVMTI_ERROR_NONE; } +static void +copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length) +{ + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + result[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++) + result[i] = file_name[j]; + + result[i] = '\0'; + } else { + /* fallback case */ + size_t file_name_len = strlen(file_name); + strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length); + } +} + +static jvmtiError +get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer) +{ + jvmtiError ret; + jclass decl_class; + char *file_name = NULL; + char *class_sign = NULL; + char fn[PATH_MAX]; + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetMethodDeclaringClass", ret); + return ret; + } + + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetSourceFileName", ret); + return ret; + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetClassSignature", ret); + goto free_file_name_error; + } + + copy_class_filename(class_sign, file_name, fn, PATH_MAX); + len = strlen(fn); + *buffer = malloc((len + 1) * sizeof(char)); + if (!*buffer) { + print_error(jvmti, "GetClassSignature", ret); + ret = JVMTI_ERROR_OUT_OF_MEMORY; + goto free_class_sign_error; + } + strcpy(*buffer, fn); + ret = JVMTI_ERROR_NONE; + +free_class_sign_error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); +free_file_name_error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + + return ret; +} + +static jvmtiError +fill_source_filenames(jvmtiEnv *jvmti, int nr_lines, + const jvmti_line_info_t * line_tab, + char ** file_names) +{ + int index; + jvmtiError ret; + + for (index = 0; index < nr_lines; ++index) { + ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index])); + if (ret != JVMTI_ERROR_NONE) + return ret; + } + + return JVMTI_ERROR_NONE; +} + static void JNICALL compiled_method_load_cb(jvmtiEnv *jvmti, jmethodID method, @@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti, const void *compile_info) { jvmti_line_info_t *line_tab = NULL; + char ** line_file_names = NULL; jclass decl_class; char *class_sign = NULL; char *func_name = NULL; char *func_sign = NULL; - char *file_name= NULL; + char *file_name = NULL; char fn[PATH_MAX]; uint64_t addr = (uint64_t)(uintptr_t)code_addr; jvmtiError ret; int nr_lines = 0; /* in line_tab[] */ size_t len; + int output_debug_info = 0; ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, &decl_class); @@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti, if (ret != JVMTI_ERROR_NONE) { warnx("jvmti: cannot get line table for method"); nr_lines = 0; + } else if (nr_lines > 0) { + line_file_names = malloc(sizeof(char*) * nr_lines); + if (!line_file_names) { + warnx("jvmti: cannot allocate space for line table method names"); + } else { + memset(line_file_names, 0, sizeof(char*) * nr_lines); + ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: fill_source_filenames failed"); + } else { + output_debug_info = 1; + } + } } } @@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti, goto error; } - /* - * Assume path name is class hierarchy, this is a common practice with Java programs - */ - if (*class_sign == 'L') { - int j, i = 0; - char *p = strrchr(class_sign, '/'); - if (p) { - /* drop the 'L' prefix and copy up to the final '/' */ - for (i = 0; i < (p - class_sign); i++) - fn[i] = class_sign[i+1]; - } - /* - * append file name, we use loops and not string ops to avoid modifying - * class_sign which is used later for the symbol name - */ - for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) - fn[i] = file_name[j]; - fn[i] = '\0'; - } else { - /* fallback case */ - strcpy(fn, file_name); - } + copy_class_filename(class_sign, file_name, fn, PATH_MAX); + /* * write source line info record if we have it */ - if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) - warnx("jvmti: write_debug_info() failed"); + if (output_debug_info) + if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names)) + warnx("jvmti: write_debug_info() failed"); len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; { @@ -223,6 +313,13 @@ error: (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); free(line_tab); + while (line_file_names && (nr_lines > 0)) { + if (line_file_names[nr_lines - 1]) { + free(line_file_names[nr_lines - 1]); + } + nr_lines -= 1; + } + free(line_file_names); } static void JNICALL diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 0304ffb714f2..1aef72df20a1 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt, * NB: Different Linux versions do different things with the * accessed bit in set_thread_area(). */ - if (ar != expected_ar && - (ldt || ar != (expected_ar | AR_ACCESSED))) { + if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) { printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", (ldt ? "LDT" : "GDT"), index, ar, expected_ar); nerrs++; diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c index 2b3d6d235015..3d7b42e77299 100644 --- a/tools/usb/usbip/src/utils.c +++ b/tools/usb/usbip/src/utils.c @@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add) char command[SYSFS_BUS_ID_SIZE + 4]; char match_busid_attr_path[SYSFS_PATH_MAX]; int rc; + int cmd_size; snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, @@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add) attr_name); if (add) - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", + busid); else - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", + busid); rc = write_sysfs_attribute(match_busid_attr_path, command, - sizeof(command)); + cmd_size); if (rc < 0) { dbg("failed to write match_busid: %s", strerror(errno)); return -1; |