diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2018-10-18 11:13:01 -0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2018-10-18 11:13:01 -0300 |
commit | ce6c9da1113ad681bff27a3e376d2017f7f8a59c (patch) | |
tree | 922ee99522f8e47e4fa5c735d4ea378dfdb79085 | |
parent | d4ae552982de39417d17f823df1f06b1cbc3686c (diff) | |
parent | 20e8e72d0fa8e26202932c30d592bade73fdc701 (diff) | |
download | linux-ce6c9da1113ad681bff27a3e376d2017f7f8a59c.tar.bz2 |
Merge remote-tracking branch 'tip/perf/urgent' into perf/core
To pick up fixes.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
448 files changed, 4041 insertions, 2428 deletions
diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst index 4b3825da48d9..82b6dbbd31cd 100644 --- a/Documentation/driver-api/fpga/fpga-mgr.rst +++ b/Documentation/driver-api/fpga/fpga-mgr.rst @@ -184,6 +184,11 @@ API for implementing a new FPGA Manager driver API for programming an FPGA --------------------------- +FPGA Manager flags + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :doc: FPGA Manager flags + .. kernel-doc:: include/linux/fpga/fpga-mgr.h :functions: fpga_image_info diff --git a/Documentation/fb/uvesafb.txt b/Documentation/fb/uvesafb.txt index f6362d88763b..aa924196c366 100644 --- a/Documentation/fb/uvesafb.txt +++ b/Documentation/fb/uvesafb.txt @@ -15,7 +15,8 @@ than x86. Check the v86d documentation for a list of currently supported arches. v86d source code can be downloaded from the following website: - http://dev.gentoo.org/~spock/projects/uvesafb + + https://github.com/mjanusz/v86d Please refer to the v86d documentation for detailed configuration and installation instructions. @@ -177,7 +178,7 @@ from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo. -- Michal Januszewski <spock@gentoo.org> - Last updated: 2009-03-30 + Last updated: 2017-10-10 Documentation of the uvesafb options is loosely based on vesafb.txt. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 8313a636dd53..960de8fe3f40 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -425,7 +425,7 @@ tcp_mtu_probing - INTEGER 1 - Disabled by default, enabled when an ICMP black hole detected 2 - Always enabled, use initial MSS of tcp_base_mss. -tcp_probe_interval - INTEGER +tcp_probe_interval - UNSIGNED INTEGER Controls how often to start TCP Packetization-Layer Path MTU Discovery reprobe. The default is reprobing every 10 minutes as per RFC4821. diff --git a/MAINTAINERS b/MAINTAINERS index a255240d1452..556f902b3766 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -324,7 +324,6 @@ F: Documentation/ABI/testing/sysfs-bus-acpi F: Documentation/ABI/testing/configfs-acpi F: drivers/pci/*acpi* F: drivers/pci/*/*acpi* -F: drivers/pci/*/*/*acpi* F: tools/power/acpi/ ACPI APEI @@ -1251,7 +1250,7 @@ N: meson ARM/Annapurna Labs ALPINE ARCHITECTURE M: Tsahee Zidenberg <tsahee@annapurnalabs.com> -M: Antoine Tenart <antoine.tenart@free-electrons.com> +M: Antoine Tenart <antoine.tenart@bootlin.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-alpine/ @@ -2956,7 +2955,6 @@ F: include/linux/bcm963xx_tag.h BROADCOM BNX2 GIGABIT ETHERNET DRIVER M: Rasesh Mody <rasesh.mody@cavium.com> -M: Harish Patil <harish.patil@cavium.com> M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported @@ -2977,6 +2975,7 @@ F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER M: Ariel Elior <ariel.elior@cavium.com> +M: Sudarsana Kalluru <sudarsana.kalluru@cavium.com> M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org S: Supported @@ -5470,7 +5469,8 @@ S: Odd Fixes F: drivers/net/ethernet/agere/ ETHERNET BRIDGE -M: Stephen Hemminger <stephen@networkplumber.org> +M: Roopa Prabhu <roopa@cumulusnetworks.com> +M: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> L: bridge@lists.linux-foundation.org (moderated for non-subscribers) L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net:Bridge @@ -8598,7 +8598,6 @@ F: include/linux/spinlock*.h F: arch/*/include/asm/spinlock*.h F: include/linux/rwlock*.h F: include/linux/mutex*.h -F: arch/*/include/asm/mutex*.h F: include/linux/rwsem*.h F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h @@ -9658,7 +9657,8 @@ MIPS/LOONGSON2 ARCHITECTURE M: Jiaxun Yang <jiaxun.yang@flygoat.com> L: linux-mips@linux-mips.org S: Maintained -F: arch/mips/loongson64/*{2e/2f}* +F: arch/mips/loongson64/fuloong-2e/ +F: arch/mips/loongson64/lemote-2f/ F: arch/mips/include/asm/mach-loongson64/ F: drivers/*/*loongson2* F: drivers/*/*/*loongson2* @@ -9865,7 +9865,7 @@ M: Peter Rosin <peda@axentia.se> S: Maintained F: Documentation/ABI/testing/sysfs-class-mux* F: Documentation/devicetree/bindings/mux/ -F: include/linux/dt-bindings/mux/ +F: include/dt-bindings/mux/ F: include/linux/mux/ F: drivers/mux/ @@ -10942,7 +10942,7 @@ M: Willy Tarreau <willy@haproxy.com> M: Ksenija Stanojevic <ksenija.stanojevic@gmail.com> S: Odd Fixes F: Documentation/auxdisplay/lcd-panel-cgram.txt -F: drivers/misc/panel.c +F: drivers/auxdisplay/panel.c PARALLEL PORT SUBSYSTEM M: Sudip Mukherjee <sudipm.mukherjee@gmail.com> @@ -11979,7 +11979,7 @@ F: Documentation/scsi/LICENSE.qla4xxx F: drivers/scsi/qla4xxx/ QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Harish Patil <harish.patil@cavium.com> +M: Shahed Shaikh <Shahed.Shaikh@cavium.com> M: Manish Chopra <manish.chopra@cavium.com> M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org @@ -11987,7 +11987,6 @@ S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Harish Patil <harish.patil@cavium.com> M: Manish Chopra <manish.chopra@cavium.com> M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org @@ -13063,7 +13062,7 @@ SELINUX SECURITY MODULE M: Paul Moore <paul@paul-moore.com> M: Stephen Smalley <sds@tycho.nsa.gov> M: Eric Paris <eparis@parisplace.org> -L: selinux@tycho.nsa.gov (moderated for non-subscribers) +L: selinux@vger.kernel.org W: https://selinuxproject.org W: https://github.com/SELinuxProject T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git @@ -15395,7 +15394,7 @@ S: Maintained UVESAFB DRIVER M: Michal Januszewski <spock@gentoo.org> L: linux-fbdev@vger.kernel.org -W: http://dev.gentoo.org/~spock/projects/uvesafb/ +W: https://github.com/mjanusz/v86d S: Maintained F: Documentation/fb/uvesafb.txt F: drivers/video/fbdev/uvesafb.* @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc8 NAME = Merciless Moray # *DOCUMENTATION* @@ -483,13 +483,15 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b4441b0764d7..a045f3086047 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -149,7 +149,7 @@ config ARC_CPU_770 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4) - Shared Address Spaces (for sharing TLB entires in MMU) + Shared Address Spaces (for sharing TLB entries in MMU) -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 99cce77ab98f..644815c0516e 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,33 +6,11 @@ # published by the Free Software Foundation. # -ifeq ($(CROSS_COMPILE),) -ifndef CONFIG_CPU_BIG_ENDIAN -CROSS_COMPILE := arc-linux- -else -CROSS_COMPILE := arceb-linux- -endif -endif - KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs - -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif +cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -79,7 +57,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 4674541eba3f..8ce6e7235915 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index b10dccd0958f..3b1baa8605a7 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -11,6 +11,7 @@ #include "sama5d2-pinfunc.h" #include <dt-bindings/mfd/atmel-flexcom.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/pinctrl/at91.h> / { model = "Atmel SAMA5D2 PTC EK"; @@ -299,6 +300,7 @@ <PIN_PA30__NWE_NANDWE>, <PIN_PB2__NRD_NANDOE>; bias-pull-up; + atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>; }; ale_cle_rdy_cs { diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf..6df61518776f 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_EDGE_RISING)>; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_LEVEL_HIGH)>; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = <GIC_SPI 32 0>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = <GIC_SPI 33 0>; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = <GIC_SPI 38 0>; + interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "nand"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 7423d462d1e4..50dde84b72ed 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -123,6 +123,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 661be948ab74..185541a5b69f 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -1078,8 +1078,8 @@ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&rcc SPI6_K>; resets = <&rcc SPI6_R>; - dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>, - <&mdma1 35 0x0 0x40002 0x0 0x0 0>; + dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>, + <&mdma1 35 0x0 0x40002 0x0 0x0>; dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index ffd9f00f74a4..5f547c161baf 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -800,8 +800,7 @@ }; hdmi_phy: hdmi-phy@1ef0000 { - compatible = "allwinner,sun8i-r40-hdmi-phy", - "allwinner,sun50i-a64-hdmi-phy"; + compatible = "allwinner,sun8i-r40-hdmi-phy"; reg = <0x01ef0000 0x10000>; clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>, <&ccu 7>, <&ccu 16>; diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h index ae5fdff18406..8247bc15addc 100644 --- a/arch/arm/kernel/vmlinux.lds.h +++ b/arch/arm/kernel/vmlinux.lds.h @@ -49,6 +49,8 @@ #define ARM_DISCARD \ *(.ARM.exidx.exit.text) \ *(.ARM.extab.exit.text) \ + *(.ARM.exidx.text.exit) \ + *(.ARM.extab.text.exit) \ ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \ ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \ ARM_EXIT_DISCARD(EXIT_TEXT) \ diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 450c7a4fbc8a..cb094e55dc5f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, - /* ICC_ASGI1R */ - { CRm64(12), Op1( 1), is64, access_gic_sgi}, - /* ICC_SGI0R */ - { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* ICC_SRE */ { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre }, diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46c..5bf9443cfbaa 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index fbc74b5fa3ed..8edf93b4490f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -413,3 +413,4 @@ 396 common pkey_free sys_pkey_free 397 common statx sys_statx 398 common rseq sys_rseq +399 common io_pgetevents sys_io_pgetevents diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..b3354ff94e79 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a6c9fbaeaefc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 192b3ba07075..f58ea503ad01 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) { @@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } +/* + * huge_ptep_set_access_flags will update access flags (dirty, accesssed) + * and write permission. + * + * For a contiguous huge pte range we need to check whether or not write + * permission has to change only on the first pte in the set. Then for + * all the contiguous ptes we need to check whether or not there is a + * discrepancy between dirty or young. + */ +static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) +{ + int i; + + if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + return 1; + + for (i = 0; i < ncontig; i++) { + pte_t orig_pte = huge_ptep_get(ptep + i); + + if (pte_dirty(pte) != pte_dirty(orig_pte)) + return 1; + + if (pte_young(pte) != pte_young(orig_pte)) + return 1; + } + + return 0; +} + int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i, changed = 0; + int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; @@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; + if (!__cont_access_flags_changed(ptep, pte, ncontig)) + return 0; + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); - if (!pte_same(orig_pte, pte)) - changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); - return changed; + return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b2fa62922d88..49d6046ca1d0 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> +#include <linux/sizes.h> #include <linux/threads.h> #include <asm/cachectl.h> @@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count; #endif -/* - * One page above the stack is used for branch delay slot "emulation". - * See dsemul.c for details. - */ -#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) + +extern unsigned long mips_stack_top(void); +#define STACK_TOP mips_stack_top() /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8fc69891e117..d4f7fd4550e1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/nmi.h> #include <linux/cpu.h> +#include <asm/abi.h> #include <asm/asm.h> #include <asm/bootinfo.h> #include <asm/cpu.h> @@ -39,6 +40,7 @@ #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/irq.h> +#include <asm/mips-cps.h> #include <asm/msa.h> #include <asm/pgtable.h> #include <asm/mipsregs.h> @@ -645,6 +647,29 @@ out: return pc; } +unsigned long mips_stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* One page for branch delay slot "emulation" */ + top -= PAGE_SIZE; + + /* Space for the VDSO, data page & GIC user page */ + top -= PAGE_ALIGN(current->thread.abi->vdso->size); + top -= PAGE_SIZE; + top -= mips_gic_present() ? PAGE_SIZE : 0; + + /* Space for cache colour alignment */ + if (cpu_has_dc_aliases) + top -= shm_align_mask + 1; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + /* * Don't forget that the stack pointer must be aligned on a 8 bytes * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c71d1eb7da59..8aaaa42f91ed 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -846,6 +846,34 @@ static void __init arch_mem_init(char **cmdline_p) struct memblock_region *reg; extern void plat_mem_setup(void); + /* + * Initialize boot_command_line to an innocuous but non-empty string in + * order to prevent early_init_dt_scan_chosen() from copying + * CONFIG_CMDLINE into it without our knowledge. We handle + * CONFIG_CMDLINE ourselves below & don't want to duplicate its + * content because repeating arguments can be problematic. + */ + strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE); + + /* call board setup routine */ + plat_mem_setup(); + + /* + * Make sure all kernel memory is in the maps. The "UP" and + * "DOWN" are opposite for initdata since if it crosses over + * into another memory section you don't want that to be + * freed when the initdata is freed. + */ + arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, + BOOT_MEM_RAM); + arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, + PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, + BOOT_MEM_INIT_RAM); + + pr_info("Determined physical RAM map:\n"); + print_memory_map(); + #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); #else @@ -873,26 +901,6 @@ static void __init arch_mem_init(char **cmdline_p) } #endif #endif - - /* call board setup routine */ - plat_mem_setup(); - - /* - * Make sure all kernel memory is in the maps. The "UP" and - * "DOWN" are opposite for initdata since if it crosses over - * into another memory section you don't want that to be - * freed when the initdata is freed. - */ - arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, - PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, - BOOT_MEM_RAM); - arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, - PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, - BOOT_MEM_INIT_RAM); - - pr_info("Determined physical RAM map:\n"); - print_memory_map(); - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 8f845f6e5f42..48a9c6b90e07 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -15,6 +15,7 @@ #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/timekeeper_internal.h> @@ -97,6 +98,21 @@ void update_vsyscall_tz(void) } } +static unsigned long vdso_base(void) +{ + unsigned long base; + + /* Skip the delay slot emulation page */ + base = STACK_TOP + PAGE_SIZE; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; @@ -137,7 +153,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (cpu_has_dc_aliases) size += shm_align_mask + 1; - base = get_unmapped_area(NULL, 0, size, 0, 0); + base = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3a6f34ef5ffc..069acec3df9f 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -280,9 +280,11 @@ * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ + .set reorder PTR_SUBU a2, t1, a0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 + .set noreorder .endm diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2fdc865ca374..2a2486526d1f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -114,7 +114,7 @@ */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * user access blocked by key */ @@ -132,7 +132,7 @@ */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 913c5725cdb2..bb6ac471a784 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1306,6 +1306,16 @@ void show_user_instructions(struct pt_regs *regs) pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); + /* + * Make sure the NIP points at userspace, not kernel text/data or + * elsewhere. + */ + if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) { + pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", + current->comm, current->pid); + return; + } + pr_info("%s[%d]: code: ", current->comm, current->pid); for (i = 0; i < instructions_to_print; i++) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 933c574e1cf7..998f8d089ac7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -646,6 +646,16 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ local_irq_disable(); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + /* + * If the PTE disappeared temporarily due to a THP + * collapse, just return and let the guest try again. + */ + if (!ptep) { + local_irq_enable(); + if (page) + put_page(page); + return RESUME_GUEST; + } pte = *ptep; local_irq_enable(); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 6ae2777c220d..5ffee298745f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -28,12 +28,6 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, { int err; - /* Make sure we aren't patching a freed init section */ - if (init_mem_is_free && init_section_contains(exec_addr, 4)) { - pr_debug("Skipping init section patching addr: 0x%px\n", exec_addr); - return 0; - } - __put_user_size(instr, patch_addr, 4, err); if (err) return err; @@ -148,7 +142,7 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { int err; unsigned int *patch_addr = NULL; @@ -188,12 +182,22 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ + +int patch_instruction(unsigned int *addr, unsigned int instr) +{ + /* Make sure we aren't patching a freed init section */ + if (init_mem_is_free && init_section_contains(addr, 4)) { + pr_debug("Skipping init section patching addr: 0x%px\n", addr); + return 0; + } + return do_patch_instruction(addr, instr); +} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(unsigned int *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 59d07bd5374a..055b211b7126 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1217,9 +1217,10 @@ int find_and_online_cpu_nid(int cpu) * Need to ensure that NODE_DATA is initialized for a node from * available memory (see memblock_alloc_try_nid). If unable to * init the node, then default to nearest node that has memory - * installed. + * installed. Skip onlining a node if the subsystems are not + * yet initialized. */ - if (try_online_node(new_nid)) + if (!topology_inited || try_online_node(new_nid)) new_nid = first_online_node; #else /* diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index aee603123030..b2d26d9d8489 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -186,7 +186,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = pfn_base + PFN_DOWN(mem_size); + max_low_pfn = memblock_end_of_DRAM(); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 3cae9168f63c..e44a8d7959f5 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -108,7 +108,8 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void __sclp_early_printk(const char *s, unsigned int len); +void sclp_early_printk_force(const char *s); +void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 9431784d7796..40c1dfec944e 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len); + __sclp_early_printk(s, len, 0); } static struct console sclp_early_console = { diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index a049a7b9d6e8..c1a080b11ae9 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -198,12 +198,10 @@ pgm_check_entry: /* Suspend CPU not available -> panic */ larl %r15,init_thread_union - ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,-STACK_FRAME_OVERHEAD larl %r2,.Lpanic_string - lghi %r1,0 - sam31 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - brasl %r14,sclp_early_printk + brasl %r14,sclp_early_printk_force larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c0440..9c3fc03abe9a 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 09acf0ddec10..45b4bf1875e6 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -427,8 +427,9 @@ #define __NR_preadv2 358 #define __NR_pwritev2 359 #define __NR_statx 360 +#define __NR_io_pgetevents 361 -#define NR_syscalls 361 +#define NR_syscalls 362 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 5868fc333ea8..639c8e54530a 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->pc = addr; linux_regs->npc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index d5f7dc6323d5..a68bbddbdba4 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->tpc = addr; linux_regs->tnpc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d3149baaa33c..67b3e6b3ce5d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/cpudata.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/sched/clock.h> #include <asm/nmi.h> #include <asm/pcr.h> #include <asm/cacheflush.h> @@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, @@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index f6528884a2c8..4073e2b87dd0 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -84,8 +84,9 @@ __handle_signal: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 sethi %hi(0xf << 20), %l4 and %l1, %l4, %l4 + andn %l1, %l4, %l1 ba,pt %xcc, __handle_preemption_continue - andn %l1, %l4, %l1 + srl %l4, 20, %l4 /* When returning from a NMI (%pil==15) interrupt we want to * avoid running softirqs, doing IRQ tracing, preempting, etc. diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 12bee14b552c..621a363098ec 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -90,4 +90,4 @@ sys_call_table: /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen /*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .long sys_statx +/*360*/ .long sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 387ef993880a..bb68c805b891 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -91,7 +91,7 @@ sys_call_table32: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, compat_sys_io_pgetevents #endif /* CONFIG_COMPAT */ @@ -173,4 +173,4 @@ sys_call_table: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 635d67ffc9a3..7db5aabe9708 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -180,11 +180,17 @@ static int send_dreg(struct vio_driver_state *vio) struct vio_dring_register pkt; char all[sizeof(struct vio_dring_register) + (sizeof(struct ldc_trans_cookie) * - dr->ncookies)]; + VIO_MAX_RING_COOKIES)]; } u; + size_t bytes = sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies); int i; - memset(&u, 0, sizeof(u)); + if (WARN_ON(bytes > sizeof(u))) + return -EINVAL; + + memset(&u, 0, bytes); init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); u.pkt.dring_ident = 0; u.pkt.num_descr = dr->num_entries; @@ -206,7 +212,7 @@ static int send_dreg(struct vio_driver_state *vio) (unsigned long long) u.pkt.cookies[i].cookie_size); } - return send_ctrl(vio, &u.pkt.tag, sizeof(u)); + return send_ctrl(vio, &u.pkt.tag, bytes); } static int send_rdx(struct vio_driver_state *vio) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index dd0b5a92ffd0..dc85570d8839 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -31,23 +31,21 @@ obj-y += $(vdso_img_objs) targets += $(vdso_img_cfiles) targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \ $(DISABLE_LTO) -$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -define cmd_vdso2c - $(obj)/vdso2c $< $(<:%.dbg=%) $@ -endef + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fa3f439f0a92..141d415a8c80 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) @@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index f19856d95c60..e48ca3afa091 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -43,8 +43,9 @@ extern u8 hvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "=a" (ret), "=m" (*ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "=a" (ret), "=m" (*tv), "=m" (*tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b64acb08a62b..106b7d0e2dae 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index a80c0673798f..e60c45fd3679 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,8 +10,13 @@ struct cpumask; struct mm_struct; #ifdef CONFIG_X86_UV +#include <linux/efi.h> extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); +} extern int is_uv_system(void); extern int is_uv_hubless(void); extern void uv_cpu_init(void); @@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, #else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22ab408177b2..eeea634bee0a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c) static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 285eb3ec4200..3736f6dc9545 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive); + unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, - u32 cbm); + unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c index 676c8a3ecdb2..6d06e0b8ecf3 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c @@ -789,25 +789,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) /** * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked * @d: RDT domain - * @_cbm: CBM to test + * @cbm: CBM to test * - * @d represents a cache instance and @_cbm a capacity bitmask that is - * considered for it. Determine if @_cbm overlaps with any existing + * @d represents a cache instance and @cbm a capacity bitmask that is + * considered for it. Determine if @cbm overlaps with any existing * pseudo-locked region on @d. * - * Return: true if @_cbm overlaps with pseudo-locked region on @d, false + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *cbm_b; unsigned int cbm_len; + unsigned long cbm_b; if (d->plr) { cbm_len = d->plr->r->cache.cbm_len; - cbm_b = (unsigned long *)&d->plr->cbm; - if (bitmap_intersects(cbm, cbm_b, cbm_len)) + cbm_b = d->plr->cbm; + if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; } return false; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 1b8e86a5d5e1..b140c68bc14b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -975,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, * is false then overlaps with any resource group or hardware entities * will be considered. * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * * Return: false if CBM does not overlap, true if it does. */ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive) + unsigned long cbm, int closid, bool exclusive) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *ctrl_b; enum rdtgrp_mode mode; + unsigned long ctrl_b; u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ if (!exclusive) { - if (bitmap_intersects(cbm, - (unsigned long *)&r->cache.shareable_bits, - r->cache.cbm_len)) + ctrl_b = r->cache.shareable_bits; + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) return true; } /* Check for overlap with other resource groups */ ctrl = d->ctrl_val; for (i = 0; i < closids_supported(); i++, ctrl++) { - ctrl_b = (unsigned long *)ctrl; + ctrl_b = *ctrl; mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { - if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) { + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { if (exclusive) { if (mode == RDT_MODE_EXCLUSIVE) return true; @@ -1138,15 +1139,18 @@ out: * computed by first dividing the total cache size by the CBM length to * determine how many bytes each bit in the bitmask represents. The result * is multiplied with the number of bits set in the bitmask. + * + * @cbm is unsigned long, even if only 32 bits are used to make the + * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, u32 cbm) + struct rdt_domain *d, unsigned long cbm) { struct cpu_cacheinfo *ci; unsigned int size = 0; int num_b, i; - num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len); + num_b = bitmap_weight(&cbm, r->cache.cbm_len); ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == r->cache_level) { @@ -2353,6 +2357,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) u32 used_b = 0, unused_b = 0; u32 closid = rdtgrp->closid; struct rdt_resource *r; + unsigned long tmp_cbm; enum rdtgrp_mode mode; struct rdt_domain *d; int i, ret; @@ -2390,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) * modify the CBM based on system availability. */ cbm_ensure_valid(&d->new_ctrl, r); - if (bitmap_weight((unsigned long *) &d->new_ctrl, - r->cache.cbm_len) < - r->cache.min_cbm_bits) { + /* + * Assign the u32 CBM to an unsigned long to ensure + * that bitmap_weight() does not access out-of-bound + * memory. + */ + tmp_cbm = d->new_ctrl; + if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < + r->cache.min_cbm_bits) { rdt_last_cmd_printf("no space on %s:%d\n", r->name, d->id); return -ENOSPC; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index dd6b564f65e3..19c3c6bec325 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -26,6 +26,7 @@ #include <asm/apic.h> #include <asm/intel-family.h> #include <asm/i8259.h> +#include <asm/uv/uv.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1433,6 +1434,9 @@ void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return; + /* Don't change UV TSC multi-chassis synchronization */ + if (is_early_uv_system()) + return; if (!determine_cpu_tsc_frequencies(true)) return; loops_per_jiffy = get_loops_per_jiffy(); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d7e9bce6ff61..51b953ad9d4e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +/* + * In some cases, we need to preserve the GFN of a non-present or reserved + * SPTE when we usurp the upper five bits of the physical address space to + * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll + * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask + * left into the reserved bits, i.e. the GFN in the SPTE will be split into + * high and low parts. This mask covers the lower bits of the GFN. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + + static void mmu_spte_set(u64 *sptep, u64 spte); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); @@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | - shadow_nonpresent_or_rsvd_mask; - u64 gpa = spte & ~mask; + u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) & shadow_nonpresent_or_rsvd_mask; @@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static void kvm_mmu_reset_all_pte_masks(void) { + u8 low_phys_bits; + shadow_user_mask = 0; shadow_accessed_mask = 0; shadow_dirty_mask = 0; @@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void) * appropriate mask to guard against L1TF attacks. Otherwise, it is * assumed that the CPU is not vulnerable to L1TF. */ + low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_data.x86_phys_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) + 52 - shadow_nonpresent_or_rsvd_mask_len) { shadow_nonpresent_or_rsvd_mask = rsvd_bits(boot_cpu_data.x86_phys_bits - shadow_nonpresent_or_rsvd_mask_len, boot_cpu_data.x86_phys_bits - 1); + low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; + } + shadow_nonpresent_or_rsvd_lower_gfn_mask = + GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); } static int is_cpuid_PSE36(void) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d96092b35936..61ccfb13899e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) static inline bool svm_sev_enabled(void) { - return max_sev_asid; + return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; } static inline bool sev_guest(struct kvm *kvm) { +#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; +#else + return false; +#endif } static inline int sev_get_asid(struct kvm *kvm) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 06412ba46aa3..e665aa7167cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 -#define MSR_BITMAP_MODE_LM 4 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL @@ -857,6 +856,7 @@ struct nested_vmx { /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ u64 vmcs01_debugctl; + u64 vmcs01_guest_bndcfgs; u16 vpid02; u16 last_vpid; @@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm) goto out; } + /* + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the + * base of EPT PML4 table, strip off EPT configuration information. + */ ret = hyperv_flush_guest_mapping( - to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); out: spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); @@ -2899,8 +2903,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); } - if (is_long_mode(&vmx->vcpu)) - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #else savesegment(fs, fs_sel); savesegment(gs, gs_sel); @@ -2951,8 +2954,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) vmx->loaded_cpu_state = NULL; #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #endif if (host_state->ldt_sel || (host_state->gs_sel & 7)) { kvm_load_ldt(host_state->ldt_sel); @@ -2980,24 +2982,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - rdmsrl(MSR_KERNEL_GS_BASE, - vmx->msr_guest_kernel_gs_base); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + preempt_enable(); return vmx->msr_guest_kernel_gs_base; } static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + wrmsrl(MSR_KERNEL_GS_BASE, data); + preempt_enable(); vmx->msr_guest_kernel_gs_base = data; } #endif @@ -3533,9 +3530,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (kvm_mpx_supported()) - msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; @@ -3552,8 +3546,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_ENTRY_LOAD_IA32_PAT; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (kvm_mpx_supported()) - msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; @@ -3601,12 +3593,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high); msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING; + /* * We can emulate "VMCS shadowing," even if the hardware * doesn't support it. @@ -3663,6 +3655,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (flexpriority_enabled) + msrs->secondary_ctls_high |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, msrs->misc_low, @@ -5073,19 +5069,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!msr) return; - /* - * MSR_KERNEL_GS_BASE is not intercepted when the guest is in - * 64-bit mode as a 64-bit kernel may frequently access the - * MSR. This means we need to manually save/restore the MSR - * when switching between guest and host state, but only if - * the guest is in 64-bit mode. Sync our cached value if the - * guest is transitioning to 32-bit mode and the CPU contains - * guest state, i.e. the cache is stale. - */ -#ifdef CONFIG_X86_64 - if (!(efer & EFER_LMA)) - (void)vmx_read_guest_kernel_gs_base(vmx); -#endif vcpu->arch.efer = efer; if (efer & EFER_LMA) { vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); @@ -6078,9 +6061,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) mode |= MSR_BITMAP_MODE_X2APIC_APICV; } - if (is_long_mode(vcpu)) - mode |= MSR_BITMAP_MODE_LM; - return mode; } @@ -6121,9 +6101,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) if (!changed) return; - vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, - !(mode & MSR_BITMAP_MODE_LM)); - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); @@ -6189,6 +6166,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) nested_mark_vmcs12_pages_dirty(vcpu); } +static u8 vmx_get_rvi(void) +{ + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; +} + static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6201,7 +6183,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) return false; - rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff; + rvi = vmx_get_rvi(); vapic_page = kmap(vmx->nested.virtual_apic_page); vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); @@ -10245,15 +10227,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + if (!flexpriority_enabled && + !cpu_has_vmx_virtualize_x2apic_mode()) + return; + /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; return; } - if (!cpu_need_tpr_shadow(vcpu)) - return; - sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); @@ -10375,6 +10358,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) +{ + u8 rvi = vmx_get_rvi(); + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -11264,6 +11255,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) #undef cr4_fixed1_update } +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (kvm_mpx_supported()) { + bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); + + if (mpx_enabled) { + vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + } else { + vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; + } + } +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -11280,8 +11288,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (nested_vmx_allowed(vcpu)) + if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); + nested_vmx_entry_exit_ctls_update(vcpu); + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -12049,8 +12059,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) set_cr4_guest_host_mask(vmx); - if (vmx_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + if (kvm_mpx_supported()) { + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + else + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); + } if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) @@ -12595,15 +12610,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); bool from_vmentry = !!exit_qual; u32 dummy_exit_qual; - u32 vmcs01_cpu_exec_ctrl; + bool evaluate_pending_interrupts; int r = 0; - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + if (kvm_mpx_supported() && + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); @@ -12643,16 +12664,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) * to L1 or delivered directly to L2 (e.g. In case L1 don't * intercept EXTERNAL_INTERRUPT). * - * Usually this would be handled by L0 requesting a - * IRQ/NMI window by setting VMCS accordingly. However, - * this setting was done on VMCS01 and now VMCS02 is active - * instead. Thus, we force L0 to perform pending event - * evaluation by requesting a KVM_REQ_EVENT. - */ - if (vmcs01_cpu_exec_ctrl & - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { + * Usually this would be handled by the processor noticing an + * IRQ/NMI window request, or checking RVI during evaluation of + * pending virtual interrupts. However, this setting was done + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. + */ + if (unlikely(evaluate_pending_interrupts)) kvm_make_request(KVM_REQ_EVENT, vcpu); - } /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edbf00ec56b3..ca717737347e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4698,7 +4698,7 @@ static void kvm_init_msr_list(void) */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) continue; break; case MSR_TSC_AUX: diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 089e78c4effd..59274e2c1ac4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#define MAX_UNSHARED_PTRS_PER_PGD \ + max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) @@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd) * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table @@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) +#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 +#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 +#define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) @@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; - pmd_t *pmds[PREALLOCATED_PMDS]; + pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; + pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8e20a0677dcf..8ac93fcbaa2e 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -310,6 +310,7 @@ static void scale_up(struct rq_wb *rwb) rq_depth_scale_up(&rwb->rq_depth); calc_wb_limits(rwb); rwb->unknown_cnt = 0; + rwb_wake_all(rwb); rwb_trace_step(rwb, "scale up"); } @@ -318,7 +319,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle) rq_depth_scale_down(&rwb->rq_depth, hard_throttle); calc_wb_limits(rwb); rwb->unknown_cnt = 0; - rwb_wake_all(rwb); rwb_trace_step(rwb, "scale down"); } diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index b3c0498ee433..8e9213b36e31 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -226,8 +226,11 @@ static int alloc_lookup_fw_priv(const char *fw_name, } tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size); - if (tmp && !(opt_flags & FW_OPT_NOCACHE)) - list_add(&tmp->list, &fwc->head); + if (tmp) { + INIT_LIST_HEAD(&tmp->list); + if (!(opt_flags & FW_OPT_NOCACHE)) + list_add(&tmp->list, &fwc->head); + } spin_unlock(&fwc->lock); *fw_priv = tmp; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 3f68e2919dc5..a690fd400260 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1713,8 +1713,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dpm_wait_for_subordinate(dev, async); - if (async_error) + if (async_error) { + dev->power.direct_complete = false; goto Complete; + } /* * If a device configured to wake up the system from sleep states @@ -1726,6 +1728,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + dev->power.direct_complete = false; async_error = -EBUSY; goto Complete; } diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5ca56bfae63c..f68e9baffad7 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -36,6 +36,10 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define VDC_TX_RING_SIZE 512 #define VDC_DEFAULT_BLK_SIZE 512 +#define MAX_XFER_BLKS (128 * 1024) +#define MAX_XFER_SIZE (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE) +#define MAX_RING_COOKIES ((MAX_XFER_BLKS / PAGE_SIZE) + 2) + #define WAITING_FOR_LINK_UP 0x01 #define WAITING_FOR_TX_SPACE 0x02 #define WAITING_FOR_GEN_CMD 0x04 @@ -450,7 +454,7 @@ static int __send_request(struct request *req) { struct vdc_port *port = req->rq_disk->private_data; struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - struct scatterlist sg[port->ring_cookies]; + struct scatterlist sg[MAX_RING_COOKIES]; struct vdc_req_entry *rqe; struct vio_disk_desc *desc; unsigned int map_perm; @@ -458,6 +462,9 @@ static int __send_request(struct request *req) u64 len; u8 op; + if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES)) + return -EINVAL; + map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO; if (rq_data_dir(req) == READ) { @@ -984,9 +991,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto err_out_free_port; port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE; - port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size); - port->ring_cookies = ((port->max_xfer_size * - port->vdisk_block_size) / PAGE_SIZE) + 2; + port->max_xfer_size = MAX_XFER_SIZE; + port->ring_cookies = MAX_RING_COOKIES; err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port); if (err) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e182f6019f68..2fee65886d50 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1322,7 +1322,7 @@ static int qca_init_regulators(struct qca_power *qca, { int i; - qca->vreg_bulk = devm_kzalloc(qca->dev, num_vregs * + qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs, sizeof(struct regulator_bulk_data), GFP_KERNEL); if (!qca->vreg_bulk) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index d67667970f7e..ec40f991e6c6 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1553,8 +1553,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->sec4_sg_bytes = sec4_sg_bytes; - edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + - desc_bytes; + edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + + desc_bytes); edesc->iv_dir = DMA_TO_DEVICE; /* Make sure IV is located in a DMAable area */ @@ -1757,8 +1757,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->sec4_sg_bytes = sec4_sg_bytes; - edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + - desc_bytes; + edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + + desc_bytes); edesc->iv_dir = DMA_FROM_DEVICE; /* Make sure IV is located in a DMAable area */ diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 5c539af8ed60..010bbf607797 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -367,7 +367,8 @@ static inline void dsgl_walk_init(struct dsgl_walk *walk, walk->to = (struct phys_sge_pairs *)(dsgl + 1); } -static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid) +static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid, + int pci_chan_id) { struct cpl_rx_phys_dsgl *phys_cpl; @@ -385,6 +386,7 @@ static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid) phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR; phys_cpl->rss_hdr_int.qid = htons(qid); phys_cpl->rss_hdr_int.hash_val = 0; + phys_cpl->rss_hdr_int.channel = pci_chan_id; } static inline void dsgl_walk_add_page(struct dsgl_walk *walk, @@ -718,7 +720,7 @@ static inline void create_wreq(struct chcr_context *ctx, FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid, !!lcb, ctx->tx_qidx); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id, + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, qid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) - ((sizeof(chcr_req->wreq)) >> 4))); @@ -1339,16 +1341,23 @@ static int chcr_device_init(struct chcr_context *ctx) adap->vres.ncrypto_fc); rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; txq_perchan = ntxq / u_ctx->lldi.nchan; - rxq_idx = ctx->dev->tx_channel_id * rxq_perchan; - rxq_idx += id % rxq_perchan; - txq_idx = ctx->dev->tx_channel_id * txq_perchan; - txq_idx += id % txq_perchan; spin_lock(&ctx->dev->lock_chcr_dev); - ctx->rx_qidx = rxq_idx; - ctx->tx_qidx = txq_idx; + ctx->tx_chan_id = ctx->dev->tx_channel_id; ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id; ctx->dev->rx_channel_id = 0; spin_unlock(&ctx->dev->lock_chcr_dev); + rxq_idx = ctx->tx_chan_id * rxq_perchan; + rxq_idx += id % rxq_perchan; + txq_idx = ctx->tx_chan_id * txq_perchan; + txq_idx += id % txq_perchan; + ctx->rx_qidx = rxq_idx; + ctx->tx_qidx = txq_idx; + /* Channel Id used by SGE to forward packet to Host. + * Same value should be used in cpl_fw6_pld RSS_CH field + * by FW. Driver programs PCI channel ID to be used in fw + * at the time of queue allocation with value "pi->tx_chan" + */ + ctx->pci_chan_id = txq_idx / txq_perchan; } out: return err; @@ -2503,6 +2512,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req, struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct dsgl_walk dsgl_walk; unsigned int authsize = crypto_aead_authsize(tfm); + struct chcr_context *ctx = a_ctx(tfm); u32 temp; dsgl_walk_init(&dsgl_walk, phys_cpl); @@ -2512,7 +2522,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req, dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma); temp = req->cryptlen + (reqctx->op ? -authsize : authsize); dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, req->assoclen); - dsgl_walk_end(&dsgl_walk, qid); + dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); } void chcr_add_cipher_src_ent(struct ablkcipher_request *req, @@ -2544,6 +2554,8 @@ void chcr_add_cipher_dst_ent(struct ablkcipher_request *req, unsigned short qid) { struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req); + struct chcr_context *ctx = c_ctx(tfm); struct dsgl_walk dsgl_walk; dsgl_walk_init(&dsgl_walk, phys_cpl); @@ -2552,7 +2564,7 @@ void chcr_add_cipher_dst_ent(struct ablkcipher_request *req, reqctx->dstsg = dsgl_walk.last_sg; reqctx->dst_ofst = dsgl_walk.last_sg_len; - dsgl_walk_end(&dsgl_walk, qid); + dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); } void chcr_add_hash_src_ent(struct ahash_request *req, diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 54835cb109e5..0d2c70c344f3 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -255,6 +255,8 @@ struct chcr_context { struct chcr_dev *dev; unsigned char tx_qidx; unsigned char rx_qidx; + unsigned char tx_chan_id; + unsigned char pci_chan_id; struct __crypto_ctx crypto_ctx[0]; }; diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 7e71043457a6..86c699c14f84 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1044,7 +1044,8 @@ static int safexcel_probe(struct platform_device *pdev) safexcel_configure(priv); - priv->ring = devm_kzalloc(dev, priv->config.rings * sizeof(*priv->ring), + priv->ring = devm_kcalloc(dev, priv->config.rings, + sizeof(*priv->ring), GFP_KERNEL); if (!priv->ring) { ret = -ENOMEM; @@ -1063,8 +1064,9 @@ static int safexcel_probe(struct platform_device *pdev) if (ret) goto err_reg_clk; - priv->ring[i].rdr_req = devm_kzalloc(dev, - sizeof(priv->ring[i].rdr_req) * EIP197_DEFAULT_RING_SIZE, + priv->ring[i].rdr_req = devm_kcalloc(dev, + EIP197_DEFAULT_RING_SIZE, + sizeof(priv->ring[i].rdr_req), GFP_KERNEL); if (!priv->ring[i].rdr_req) { ret = -ENOMEM; diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index a10c418d4e5c..56bd28174f52 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -63,7 +63,7 @@ struct dcp { struct dcp_coherent_block *coh; struct completion completion[DCP_MAX_CHANS]; - struct mutex mutex[DCP_MAX_CHANS]; + spinlock_t lock[DCP_MAX_CHANS]; struct task_struct *thread[DCP_MAX_CHANS]; struct crypto_queue queue[DCP_MAX_CHANS]; }; @@ -349,13 +349,20 @@ static int dcp_chan_thread_aes(void *data) int ret; - do { - __set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&sdcp->mutex[chan]); + spin_lock(&sdcp->lock[chan]); backlog = crypto_get_backlog(&sdcp->queue[chan]); arq = crypto_dequeue_request(&sdcp->queue[chan]); - mutex_unlock(&sdcp->mutex[chan]); + spin_unlock(&sdcp->lock[chan]); + + if (!backlog && !arq) { + schedule(); + continue; + } + + set_current_state(TASK_RUNNING); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -363,11 +370,8 @@ static int dcp_chan_thread_aes(void *data) if (arq) { ret = mxs_dcp_aes_block_crypt(arq); arq->complete(arq, ret); - continue; } - - schedule(); - } while (!kthread_should_stop()); + } return 0; } @@ -409,9 +413,9 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb) rctx->ecb = ecb; actx->chan = DCP_CHAN_CRYPTO; - mutex_lock(&sdcp->mutex[actx->chan]); + spin_lock(&sdcp->lock[actx->chan]); ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); - mutex_unlock(&sdcp->mutex[actx->chan]); + spin_unlock(&sdcp->lock[actx->chan]); wake_up_process(sdcp->thread[actx->chan]); @@ -640,13 +644,20 @@ static int dcp_chan_thread_sha(void *data) struct ahash_request *req; int ret, fini; - do { - __set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&sdcp->mutex[chan]); + spin_lock(&sdcp->lock[chan]); backlog = crypto_get_backlog(&sdcp->queue[chan]); arq = crypto_dequeue_request(&sdcp->queue[chan]); - mutex_unlock(&sdcp->mutex[chan]); + spin_unlock(&sdcp->lock[chan]); + + if (!backlog && !arq) { + schedule(); + continue; + } + + set_current_state(TASK_RUNNING); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -658,12 +669,8 @@ static int dcp_chan_thread_sha(void *data) ret = dcp_sha_req_to_buf(arq); fini = rctx->fini; arq->complete(arq, ret); - if (!fini) - continue; } - - schedule(); - } while (!kthread_should_stop()); + } return 0; } @@ -721,9 +728,9 @@ static int dcp_sha_update_fx(struct ahash_request *req, int fini) rctx->init = 1; } - mutex_lock(&sdcp->mutex[actx->chan]); + spin_lock(&sdcp->lock[actx->chan]); ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); - mutex_unlock(&sdcp->mutex[actx->chan]); + spin_unlock(&sdcp->lock[actx->chan]); wake_up_process(sdcp->thread[actx->chan]); mutex_unlock(&actx->mutex); @@ -997,7 +1004,7 @@ static int mxs_dcp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sdcp); for (i = 0; i < DCP_MAX_CHANS; i++) { - mutex_init(&sdcp->mutex[i]); + spin_lock_init(&sdcp->lock[i]); init_completion(&sdcp->completion[i]); crypto_init_queue(&sdcp->queue[i], 50); } diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index ba197f34c252..763c2166ee0e 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C3XXX_PCI_DEVICE_ID: @@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 24ec908eb26c..613c7d5644ce 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C3XXXIOV_PCI_DEVICE_ID: @@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 59a5a0df50b6..9cb832963357 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C62X_PCI_DEVICE_ID: @@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index b9f3e0e4fde9..278452b8ef81 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C62XIOV_PCI_DEVICE_ID: @@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index be5c5a988ca5..3a9708ef4ce2 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_DH895XCC_PCI_DEVICE_ID: @@ -237,8 +238,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 26ab17bfc6da..3da0f951cb59 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_DH895XCCIOV_PCI_DEVICE_ID: @@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/fpga/dfl-fme-region.c b/drivers/fpga/dfl-fme-region.c index 0b7e19c27c6d..51a5ac2293a7 100644 --- a/drivers/fpga/dfl-fme-region.c +++ b/drivers/fpga/dfl-fme-region.c @@ -14,6 +14,7 @@ */ #include <linux/module.h> +#include <linux/fpga/fpga-mgr.h> #include <linux/fpga/fpga-region.h> #include "dfl-fme-pr.h" @@ -66,9 +67,10 @@ eprobe_mgr_put: static int fme_region_remove(struct platform_device *pdev) { struct fpga_region *region = dev_get_drvdata(&pdev->dev); + struct fpga_manager *mgr = region->mgr; fpga_region_unregister(region); - fpga_mgr_put(region->mgr); + fpga_mgr_put(mgr); return 0; } diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c index 24b8f98b73ec..c983dac97501 100644 --- a/drivers/fpga/fpga-bridge.c +++ b/drivers/fpga/fpga-bridge.c @@ -125,7 +125,7 @@ static int fpga_bridge_dev_match(struct device *dev, const void *data) * * Given a device, get an exclusive reference to a fpga bridge. * - * Return: fpga manager struct or IS_ERR() condition containing error code. + * Return: fpga bridge struct or IS_ERR() condition containing error code. */ struct fpga_bridge *fpga_bridge_get(struct device *dev, struct fpga_image_info *info) diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c index 35fabb8083fb..052a1342ab7e 100644 --- a/drivers/fpga/of-fpga-region.c +++ b/drivers/fpga/of-fpga-region.c @@ -437,9 +437,10 @@ eprobe_mgr_put: static int of_fpga_region_remove(struct platform_device *pdev) { struct fpga_region *region = platform_get_drvdata(pdev); + struct fpga_manager *mgr = region->mgr; fpga_region_unregister(region); - fpga_mgr_put(region->mgr); + fpga_mgr_put(mgr); return 0; } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e8f8a1999393..25187403e3ac 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -571,7 +571,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_descs; lh->descs[i] = desc; - count = i; + count = i + 1; if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); @@ -1682,7 +1682,8 @@ static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gpiochip, irq_set_chained_handler_and_data(parent_irq, parent_handler, gpiochip); - gpiochip->irq.parents = &parent_irq; + gpiochip->irq.parent_irq = parent_irq; + gpiochip->irq.parents = &gpiochip->irq.parent_irq; gpiochip->irq.num_parents = 1; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ec0d62a16e53..4f22e745df51 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { - int retval; struct mqd_manager *mqd_mgr; + int retval; mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (!mqd_mgr) @@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (!q->properties.is_active) return 0; - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, - &q->properties, q->process->mm); + if (WARN(q->process->mm != current->mm, + "should only run in user thread")) + retval = -EFAULT; + else + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, + &q->properties, current->mm); if (retval) goto out_uninit_mqd; @@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, - &q->properties, q->process->mm); + q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + if (WARN(q->process->mm != current->mm, + "should only run in user thread")) + retval = -EFAULT; + else + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, + q->pipe, q->queue, + &q->properties, current->mm); + } out_unlock: dqm_unlock(dqm); @@ -653,6 +663,7 @@ out: static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { + struct mm_struct *mm = NULL; struct queue *q; struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; @@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, kfd_flush_tlb(pdd); } + /* Take a safe reference to the mm_struct, which may otherwise + * disappear even while the kfd_process is still referenced. + */ + mm = get_task_mm(pdd->process->lead_thread); + if (!mm) { + retval = -EFAULT; + goto out; + } + /* activate all active queues on the qpd */ list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_evicted) @@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.is_evicted = false; q->properties.is_active = true; retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, - q->queue, &q->properties, - q->process->mm); + q->queue, &q->properties, mm); if (retval) goto out; dqm->queue_count++; } qpd->evicted = 0; out: + if (mm) + mmput(mm); dqm_unlock(dqm); return retval; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 96875950845a..6903fe6c894b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4633,12 +4633,18 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } spin_unlock_irqrestore(&adev->ddev->event_lock, flags); - /* Signal HW programming completion */ - drm_atomic_helper_commit_hw_done(state); if (wait_for_vblank) drm_atomic_helper_wait_for_flip_done(dev, state); + /* + * FIXME: + * Delay hw_done() until flip_done() is signaled. This is to block + * another commit from freeing the CRTC state while we're still + * waiting on flip_done. + */ + drm_atomic_helper_commit_hw_done(state); + drm_atomic_helper_cleanup_planes(dev, state); /* Finally, drop a runtime PM reference for each newly disabled CRTC, diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index baff50a4c234..df31c3815092 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -63,20 +63,21 @@ static void drm_client_close(struct drm_client_dev *client) EXPORT_SYMBOL(drm_client_close); /** - * drm_client_new - Create a DRM client + * drm_client_init - Initialise a DRM client * @dev: DRM device * @client: DRM client * @name: Client name * @funcs: DRM client functions (optional) * + * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process. * The caller needs to hold a reference on @dev before calling this function. * The client is freed when the &drm_device is unregistered. See drm_client_release(). * * Returns: * Zero on success or negative error code on failure. */ -int drm_client_new(struct drm_device *dev, struct drm_client_dev *client, - const char *name, const struct drm_client_funcs *funcs) +int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, + const char *name, const struct drm_client_funcs *funcs) { int ret; @@ -95,10 +96,6 @@ int drm_client_new(struct drm_device *dev, struct drm_client_dev *client, if (ret) goto err_put_module; - mutex_lock(&dev->clientlist_mutex); - list_add(&client->list, &dev->clientlist); - mutex_unlock(&dev->clientlist_mutex); - drm_dev_get(dev); return 0; @@ -109,13 +106,33 @@ err_put_module: return ret; } -EXPORT_SYMBOL(drm_client_new); +EXPORT_SYMBOL(drm_client_init); + +/** + * drm_client_add - Add client to the device list + * @client: DRM client + * + * Add the client to the &drm_device client list to activate its callbacks. + * @client must be initialized by a call to drm_client_init(). After + * drm_client_add() it is no longer permissible to call drm_client_release() + * directly (outside the unregister callback), instead cleanup will happen + * automatically on driver unload. + */ +void drm_client_add(struct drm_client_dev *client) +{ + struct drm_device *dev = client->dev; + + mutex_lock(&dev->clientlist_mutex); + list_add(&client->list, &dev->clientlist); + mutex_unlock(&dev->clientlist_mutex); +} +EXPORT_SYMBOL(drm_client_add); /** * drm_client_release - Release DRM client resources * @client: DRM client * - * Releases resources by closing the &drm_file that was opened by drm_client_new(). + * Releases resources by closing the &drm_file that was opened by drm_client_init(). * It is called automatically if the &drm_client_funcs.unregister callback is _not_ set. * * This function should only be called from the unregister callback. An exception diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c index 9da36a6271d3..9ac1f2e0f064 100644 --- a/drivers/gpu/drm/drm_fb_cma_helper.c +++ b/drivers/gpu/drm/drm_fb_cma_helper.c @@ -160,7 +160,7 @@ struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev, fb_helper = &fbdev_cma->fb_helper; - ret = drm_client_new(dev, &fb_helper->client, "fbdev", NULL); + ret = drm_client_init(dev, &fb_helper->client, "fbdev", NULL); if (ret) goto err_free; @@ -169,6 +169,8 @@ struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev, if (ret) goto err_client_put; + drm_client_add(&fb_helper->client); + return fbdev_cma; err_client_put: diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 16ec93b75dbf..515a7aec57ac 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -3218,12 +3218,14 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) if (!fb_helper) return -ENOMEM; - ret = drm_client_new(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs); + ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs); if (ret) { kfree(fb_helper); return ret; } + drm_client_add(&fb_helper->client); + fb_helper->preferred_bpp = preferred_bpp; drm_fbdev_client_hotplug(&fb_helper->client); diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index b54fb78a283c..b82da96ded5c 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -566,14 +566,14 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, lessee_priv->is_master = 1; lessee_priv->authenticated = 1; - /* Hook up the fd */ - fd_install(fd, lessee_file); - /* Pass fd back to userspace */ DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id); cl->fd = fd; cl->lessee_id = lessee->lessee_id; + /* Hook up the fd */ + fd_install(fd, lessee_file); + DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n"); return 0; diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h index 87f6b5672e11..797d9ee5f15a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h @@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv, unsigned long start, unsigned long size) { - struct iommu_domain *domain; - int ret; - - domain = iommu_domain_alloc(priv->dma_dev->bus); - if (!domain) - return -ENOMEM; - - ret = iommu_get_dma_cookie(domain); - if (ret) - goto free_domain; - - ret = iommu_dma_init_domain(domain, start, size, NULL); - if (ret) - goto put_cookie; - - priv->mapping = domain; + priv->mapping = iommu_get_domain_for_dev(priv->dma_dev); return 0; - -put_cookie: - iommu_put_dma_cookie(domain); -free_domain: - iommu_domain_free(domain); - return ret; } static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv) { - struct iommu_domain *domain = priv->mapping; - - iommu_put_dma_cookie(domain); - iommu_domain_free(domain); priv->mapping = NULL; } @@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct exynos_drm_private *priv, { struct iommu_domain *domain = priv->mapping; - return iommu_attach_device(domain, dev); + if (dev != priv->dma_dev) + return iommu_attach_device(domain, dev); + return 0; } static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, @@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, { struct iommu_domain *domain = priv->mapping; - iommu_detach_device(domain, dev); + if (dev != priv->dma_dev) + iommu_detach_device(domain, dev); } #else #error Unsupported architecture and IOMMU/DMA-mapping glue code diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c index 5d2f0d548469..250b5e02a314 100644 --- a/drivers/gpu/drm/i2c/tda9950.c +++ b/drivers/gpu/drm/i2c/tda9950.c @@ -191,7 +191,8 @@ static irqreturn_t tda9950_irq(int irq, void *data) break; } /* TDA9950 executes all retries for us */ - tx_status |= CEC_TX_STATUS_MAX_RETRIES; + if (tx_status != CEC_TX_STATUS_OK) + tx_status |= CEC_TX_STATUS_MAX_RETRIES; cec_transmit_done(priv->adap, tx_status, arb_lost_cnt, nack_cnt, 0, err_cnt); break; @@ -310,7 +311,7 @@ static void tda9950_release(struct tda9950_priv *priv) /* Wait up to .5s for it to signal non-busy */ do { csr = tda9950_read(client, REG_CSR); - if (!(csr & CSR_BUSY) || --timeout) + if (!(csr & CSR_BUSY) || !--timeout) break; msleep(10); } while (1); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f7f2aa71d8d9..a262a64f5625 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -232,6 +232,20 @@ static bool compress_init(struct compress *c) return true; } +static void *compress_next_page(struct drm_i915_error_object *dst) +{ + unsigned long page; + + if (dst->page_count >= dst->num_pages) + return ERR_PTR(-ENOSPC); + + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) + return ERR_PTR(-ENOMEM); + + return dst->pages[dst->page_count++] = (void *)page; +} + static int compress_page(struct compress *c, void *src, struct drm_i915_error_object *dst) @@ -245,19 +259,14 @@ static int compress_page(struct compress *c, do { if (zstream->avail_out == 0) { - unsigned long page; - - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); - if (!page) - return -ENOMEM; + zstream->next_out = compress_next_page(dst); + if (IS_ERR(zstream->next_out)) + return PTR_ERR(zstream->next_out); - dst->pages[dst->page_count++] = (void *)page; - - zstream->next_out = (void *)page; zstream->avail_out = PAGE_SIZE; } - if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK) + if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; } while (zstream->avail_in); @@ -268,19 +277,42 @@ static int compress_page(struct compress *c, return 0; } -static void compress_fini(struct compress *c, +static int compress_flush(struct compress *c, struct drm_i915_error_object *dst) { struct z_stream_s *zstream = &c->zstream; - if (dst) { - zlib_deflate(zstream, Z_FINISH); - dst->unused = zstream->avail_out; - } + do { + switch (zlib_deflate(zstream, Z_FINISH)) { + case Z_OK: /* more space requested */ + zstream->next_out = compress_next_page(dst); + if (IS_ERR(zstream->next_out)) + return PTR_ERR(zstream->next_out); + + zstream->avail_out = PAGE_SIZE; + break; + + case Z_STREAM_END: + goto end; + + default: /* any error */ + return -EIO; + } + } while (1); + +end: + memset(zstream->next_out, 0, zstream->avail_out); + dst->unused = zstream->avail_out; + return 0; +} + +static void compress_fini(struct compress *c, + struct drm_i915_error_object *dst) +{ + struct z_stream_s *zstream = &c->zstream; zlib_deflateEnd(zstream); kfree(zstream->workspace); - if (c->tmp) free_page((unsigned long)c->tmp); } @@ -319,6 +351,12 @@ static int compress_page(struct compress *c, return 0; } +static int compress_flush(struct compress *c, + struct drm_i915_error_object *dst) +{ + return 0; +} + static void compress_fini(struct compress *c, struct drm_i915_error_object *dst) { @@ -917,6 +955,7 @@ i915_error_object_create(struct drm_i915_private *i915, unsigned long num_pages; struct sgt_iter iter; dma_addr_t dma; + int ret; if (!vma) return NULL; @@ -930,6 +969,7 @@ i915_error_object_create(struct drm_i915_private *i915, dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0; @@ -938,28 +978,26 @@ i915_error_object_create(struct drm_i915_private *i915, return NULL; } + ret = -EINVAL; for_each_sgt_dma(dma, iter, vma->pages) { void __iomem *s; - int ret; ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s); - if (ret) - goto unwind; + break; } - goto out; -unwind: - while (dst->page_count--) - free_page((unsigned long)dst->pages[dst->page_count]); - kfree(dst); - dst = NULL; + if (ret || compress_flush(&compress, dst)) { + while (dst->page_count--) + free_page((unsigned long)dst->pages[dst->page_count]); + kfree(dst); + dst = NULL; + } -out: compress_fini(&compress, dst); ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); return dst; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index f893a4e8b783..8710fb18ed74 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -135,6 +135,7 @@ struct i915_gpu_state { struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; + int num_pages; int page_count; int unused; u32 *pages[0]; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 90628a47ae17..29877969310d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3091,36 +3091,27 @@ gen11_gt_irq_handler(struct drm_i915_private * const i915, spin_unlock(&i915->irq_lock); } -static void -gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl, - u32 *iir) +static u32 +gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl) { void __iomem * const regs = dev_priv->regs; + u32 iir; if (!(master_ctl & GEN11_GU_MISC_IRQ)) - return; + return 0; + + iir = raw_reg_read(regs, GEN11_GU_MISC_IIR); + if (likely(iir)) + raw_reg_write(regs, GEN11_GU_MISC_IIR, iir); - *iir = raw_reg_read(regs, GEN11_GU_MISC_IIR); - if (likely(*iir)) - raw_reg_write(regs, GEN11_GU_MISC_IIR, *iir); + return iir; } static void -gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, - const u32 master_ctl, const u32 iir) +gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, const u32 iir) { - if (!(master_ctl & GEN11_GU_MISC_IRQ)) - return; - - if (unlikely(!iir)) { - DRM_ERROR("GU_MISC iir blank!\n"); - return; - } - if (iir & GEN11_GU_MISC_GSE) intel_opregion_asle_intr(dev_priv); - else - DRM_ERROR("Unexpected GU_MISC interrupt 0x%x\n", iir); } static irqreturn_t gen11_irq_handler(int irq, void *arg) @@ -3157,12 +3148,12 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) enable_rpm_wakeref_asserts(i915); } - gen11_gu_misc_irq_ack(i915, master_ctl, &gu_misc_iir); + gu_misc_iir = gen11_gu_misc_irq_ack(i915, master_ctl); /* Acknowledge and enable interrupts. */ raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl); - gen11_gu_misc_irq_handler(i915, master_ctl, gu_misc_iir); + gen11_gu_misc_irq_handler(i915, gu_misc_iir); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6a4d1388ad2d..1df3ce134cd0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -592,7 +592,6 @@ static const struct intel_device_info intel_cannonlake_info = { GEN10_FEATURES, \ GEN(11), \ .ddb_size = 2048, \ - .has_csr = 0, \ .has_logical_ring_elsq = 1 static const struct intel_device_info intel_icelake_11_info = { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 0b976dfd04df..92ecb9bf982c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -600,7 +600,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); - mtk_crtc->planes = devm_kzalloc(dev, mtk_crtc->layer_nr * + mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr, sizeof(struct drm_plane), GFP_KERNEL); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c index 790d39f816dc..b557687b1964 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c @@ -153,8 +153,8 @@ int msm_dss_parse_clock(struct platform_device *pdev, return 0; } - mp->clk_config = devm_kzalloc(&pdev->dev, - sizeof(struct dss_clk) * num_clk, + mp->clk_config = devm_kcalloc(&pdev->dev, + num_clk, sizeof(struct dss_clk), GFP_KERNEL); if (!mp->clk_config) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 5691dfa1db6f..041e7daf8a33 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -900,9 +900,22 @@ static enum drm_connector_status nv50_mstc_detect(struct drm_connector *connector, bool force) { struct nv50_mstc *mstc = nv50_mstc(connector); + enum drm_connector_status conn_status; + int ret; + if (!mstc->port) return connector_status_disconnected; - return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port); + + ret = pm_runtime_get_sync(connector->dev->dev); + if (ret < 0 && ret != -EACCES) + return connector_status_disconnected; + + conn_status = drm_dp_mst_detect_port(connector, mstc->port->mgr, + mstc->port); + + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + return conn_status; } static void diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5146ee029db4..bc49909aba8e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -976,7 +976,6 @@ #define USB_DEVICE_ID_SIS817_TOUCH 0x0817 #define USB_DEVICE_ID_SIS_TS 0x1013 #define USB_DEVICE_ID_SIS1030_TOUCH 0x1030 -#define USB_DEVICE_ID_SIS10FB_TOUCH 0x10fb #define USB_VENDOR_ID_SKYCABLE 0x1223 #define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07 diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index f3076659361a..4e3592e7a3f7 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -47,7 +47,7 @@ /* quirks to control the device */ #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0) #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1) -#define I2C_HID_QUIRK_RESEND_REPORT_DESCR BIT(2) +#define I2C_HID_QUIRK_NO_RUNTIME_PM BIT(2) /* flags */ #define I2C_HID_STARTED 0 @@ -169,9 +169,8 @@ static const struct i2c_hid_quirks { { USB_VENDOR_ID_WEIDA, USB_DEVICE_ID_WEIDA_8755, I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV }, { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288, - I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, - { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH, - I2C_HID_QUIRK_RESEND_REPORT_DESCR }, + I2C_HID_QUIRK_NO_IRQ_AFTER_RESET | + I2C_HID_QUIRK_NO_RUNTIME_PM }, { 0, 0 } }; @@ -1105,7 +1104,9 @@ static int i2c_hid_probe(struct i2c_client *client, goto err_mem_free; } - pm_runtime_put(&client->dev); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM)) + pm_runtime_put(&client->dev); + return 0; err_mem_free: @@ -1130,7 +1131,8 @@ static int i2c_hid_remove(struct i2c_client *client) struct i2c_hid *ihid = i2c_get_clientdata(client); struct hid_device *hid; - pm_runtime_get_sync(&client->dev); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM)) + pm_runtime_get_sync(&client->dev); pm_runtime_disable(&client->dev); pm_runtime_set_suspended(&client->dev); pm_runtime_put_noidle(&client->dev); @@ -1236,22 +1238,13 @@ static int i2c_hid_resume(struct device *dev) /* Instead of resetting device, simply powers the device on. This * solves "incomplete reports" on Raydium devices 2386:3118 and - * 2386:4B33 + * 2386:4B33 and fixes various SIS touchscreens no longer sending + * data after a suspend/resume. */ ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); if (ret) return ret; - /* Some devices need to re-send report descr cmd - * after resume, after this it will be back normal. - * otherwise it issues too many incomplete reports. - */ - if (ihid->quirks & I2C_HID_QUIRK_RESEND_REPORT_DESCR) { - ret = i2c_hid_command(client, &hid_report_descr_cmd, NULL, 0); - if (ret) - return ret; - } - if (hid->driver && hid->driver->reset_resume) { ret = hid->driver->reset_resume(hid); return ret; diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index da133716bed0..08a8327dfd22 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -29,6 +29,7 @@ #define CNL_Ax_DEVICE_ID 0x9DFC #define GLK_Ax_DEVICE_ID 0x31A2 #define CNL_H_DEVICE_ID 0xA37C +#define ICL_MOBILE_DEVICE_ID 0x34FC #define SPT_H_DEVICE_ID 0xA135 #define REVISION_ID_CHT_A0 0x6 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index a1125a5c7965..256b3016116c 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -38,6 +38,7 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_Ax_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, GLK_Ax_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_H_DEVICE_ID)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ICL_MOBILE_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, SPT_H_DEVICE_ID)}, {0, } }; diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index ced041899456..f4d08c8ac7f8 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -76,6 +76,7 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, __u32 version) { int ret = 0; + unsigned int cur_cpu; struct vmbus_channel_initiate_contact *msg; unsigned long flags; @@ -118,9 +119,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, * the CPU attempting to connect may not be CPU 0. */ if (version >= VERSION_WIN8_1) { - msg->target_vcpu = - hv_cpu_number_to_vp_number(smp_processor_id()); - vmbus_connection.connect_cpu = smp_processor_id(); + cur_cpu = get_cpu(); + msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); + vmbus_connection.connect_cpu = cur_cpu; + put_cpu(); } else { msg->target_vcpu = 0; vmbus_connection.connect_cpu = 0; diff --git a/drivers/hwmon/npcm750-pwm-fan.c b/drivers/hwmon/npcm750-pwm-fan.c index 8474d601aa63..b998f9fbed41 100644 --- a/drivers/hwmon/npcm750-pwm-fan.c +++ b/drivers/hwmon/npcm750-pwm-fan.c @@ -908,7 +908,7 @@ static int npcm7xx_en_pwm_fan(struct device *dev, if (fan_cnt < 1) return -EINVAL; - fan_ch = devm_kzalloc(dev, sizeof(*fan_ch) * fan_cnt, GFP_KERNEL); + fan_ch = devm_kcalloc(dev, fan_cnt, sizeof(*fan_ch), GFP_KERNEL); if (!fan_ch) return -ENOMEM; diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 94d94b4a9a0d..18cc324f3ca9 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -34,11 +34,11 @@ static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev) static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev) { - u32 ic_clk = i2c_dw_clk_rate(dev); const char *mode_str, *fp_str = ""; u32 comp_param1; u32 sda_falling_time, scl_falling_time; struct i2c_timings *t = &dev->timings; + u32 ic_clk; int ret; ret = i2c_dw_acquire_lock(dev); @@ -53,6 +53,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev) /* Calculate SCL timing parameters for standard mode if not set */ if (!dev->ss_hcnt || !dev->ss_lcnt) { + ic_clk = i2c_dw_clk_rate(dev); dev->ss_hcnt = i2c_dw_scl_hcnt(ic_clk, 4000, /* tHD;STA = tHIGH = 4.0 us */ @@ -89,6 +90,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev) * needed also in high speed mode. */ if (!dev->fs_hcnt || !dev->fs_lcnt) { + ic_clk = i2c_dw_clk_rate(dev); dev->fs_hcnt = i2c_dw_scl_hcnt(ic_clk, 600, /* tHD;STA = tHIGH = 0.6 us */ diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c index 0cf1379f4e80..5c754bf659e2 100644 --- a/drivers/i2c/busses/i2c-isch.c +++ b/drivers/i2c/busses/i2c-isch.c @@ -164,7 +164,7 @@ static s32 sch_access(struct i2c_adapter *adap, u16 addr, * run ~75 kHz instead which should do no harm. */ dev_notice(&sch_adapter.dev, - "Clock divider unitialized. Setting defaults\n"); + "Clock divider uninitialized. Setting defaults\n"); outw(backbone_speed / (4 * 100), SMBHSTCLK); } diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 36732eb688a4..9f2eb02481d3 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -367,20 +367,26 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, dma_addr_t rx_dma; enum geni_se_xfer_mode mode; unsigned long time_left = XFER_TIMEOUT; + void *dma_buf; gi2c->cur = msg; - mode = msg->len > 32 ? GENI_SE_DMA : GENI_SE_FIFO; + mode = GENI_SE_FIFO; + dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (dma_buf) + mode = GENI_SE_DMA; + geni_se_select_mode(&gi2c->se, mode); writel_relaxed(msg->len, gi2c->se.base + SE_I2C_RX_TRANS_LEN); geni_se_setup_m_cmd(&gi2c->se, I2C_READ, m_param); if (mode == GENI_SE_DMA) { int ret; - ret = geni_se_rx_dma_prep(&gi2c->se, msg->buf, msg->len, + ret = geni_se_rx_dma_prep(&gi2c->se, dma_buf, msg->len, &rx_dma); if (ret) { mode = GENI_SE_FIFO; geni_se_select_mode(&gi2c->se, mode); + i2c_put_dma_safe_msg_buf(dma_buf, msg, false); } } @@ -393,6 +399,7 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, if (gi2c->err) geni_i2c_rx_fsm_rst(gi2c); geni_se_rx_dma_unprep(&gi2c->se, rx_dma, msg->len); + i2c_put_dma_safe_msg_buf(dma_buf, msg, !gi2c->err); } return gi2c->err; } @@ -403,20 +410,26 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, dma_addr_t tx_dma; enum geni_se_xfer_mode mode; unsigned long time_left; + void *dma_buf; gi2c->cur = msg; - mode = msg->len > 32 ? GENI_SE_DMA : GENI_SE_FIFO; + mode = GENI_SE_FIFO; + dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (dma_buf) + mode = GENI_SE_DMA; + geni_se_select_mode(&gi2c->se, mode); writel_relaxed(msg->len, gi2c->se.base + SE_I2C_TX_TRANS_LEN); geni_se_setup_m_cmd(&gi2c->se, I2C_WRITE, m_param); if (mode == GENI_SE_DMA) { int ret; - ret = geni_se_tx_dma_prep(&gi2c->se, msg->buf, msg->len, + ret = geni_se_tx_dma_prep(&gi2c->se, dma_buf, msg->len, &tx_dma); if (ret) { mode = GENI_SE_FIFO; geni_se_select_mode(&gi2c->se, mode); + i2c_put_dma_safe_msg_buf(dma_buf, msg, false); } } @@ -432,6 +445,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, if (gi2c->err) geni_i2c_tx_fsm_rst(gi2c); geni_se_tx_dma_unprep(&gi2c->se, tx_dma, msg->len); + i2c_put_dma_safe_msg_buf(dma_buf, msg, !gi2c->err); } return gi2c->err; } diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index a01389b85f13..7e9a2bbf5ddc 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -152,6 +152,7 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags, mt_params[3].type = ACPI_TYPE_INTEGER; mt_params[3].integer.value = len; mt_params[4].type = ACPI_TYPE_BUFFER; + mt_params[4].buffer.length = len; mt_params[4].buffer.pointer = data->block + 1; } break; diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9ee9a15e7134..9200e349f29e 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2270,7 +2270,7 @@ EXPORT_SYMBOL(i2c_put_adapter); * * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO. * Or a valid pointer to be used with DMA. After use, release it by - * calling i2c_release_dma_safe_msg_buf(). + * calling i2c_put_dma_safe_msg_buf(). * * This function must only be called from process context! */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9fb1d9cb9401..e22314837645 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -544,6 +544,9 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int shrink = 0; int c; + if (!mr->allocated_from_cache) + return; + c = order2idx(dev, mr->order); if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); @@ -1647,18 +1650,19 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) umem = NULL; } #endif - clean_mr(dev, mr); + /* + * We should unregister the DMA address from the HCA before + * remove the DMA mapping. + */ + mlx5_mr_cache_free(dev, mr); if (umem) { ib_umem_release(umem); atomic_sub(npages, &dev->mdev->priv.reg_pages); } - if (!mr->allocated_from_cache) kfree(mr); - else - mlx5_mr_cache_free(dev, mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 370206f987f9..f48369d6f3a0 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -564,6 +564,7 @@ static ssize_t evdev_write(struct file *file, const char __user *buffer, input_inject_event(&evdev->handle, event.type, event.code, event.value); + cond_resched(); } out: diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index cd620e009bad..d4b9db487b16 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -231,6 +231,7 @@ static const struct xpad_device { { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -530,6 +531,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index eb14ddf69346..8ec483e8688b 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -598,6 +598,7 @@ static ssize_t uinput_inject_events(struct uinput_device *udev, input_event(udev->dev, ev.type, ev.code, ev.value); bytes += input_event_size(); + cond_resched(); } return bytes; diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index e08228061bcd..412fa71245af 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -707,6 +707,7 @@ static ssize_t mousedev_write(struct file *file, const char __user *buffer, mousedev_generate_response(client, c); spin_unlock_irq(&client->packet_lock); + cond_resched(); } kill_fasync(&client->fasync, SIGIO, POLL_IN); diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index b8bc71569349..95a78ccbd847 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1395,15 +1395,26 @@ static void __init i8042_register_ports(void) for (i = 0; i < I8042_NUM_PORTS; i++) { struct serio *serio = i8042_ports[i].serio; - if (serio) { - printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n", - serio->name, - (unsigned long) I8042_DATA_REG, - (unsigned long) I8042_COMMAND_REG, - i8042_ports[i].irq); - serio_register_port(serio); - device_set_wakeup_capable(&serio->dev, true); - } + if (!serio) + continue; + + printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n", + serio->name, + (unsigned long) I8042_DATA_REG, + (unsigned long) I8042_COMMAND_REG, + i8042_ports[i].irq); + serio_register_port(serio); + device_set_wakeup_capable(&serio->dev, true); + + /* + * On platforms using suspend-to-idle, allow the keyboard to + * wake up the system from sleep by enabling keyboard wakeups + * by default. This is consistent with keyboard wakeup + * behavior on many platforms using suspend-to-RAM (ACPI S3) + * by default. + */ + if (pm_suspend_via_s2idle() && i == I8042_KBD_PORT_NO) + device_set_wakeup_enable(&serio->dev, true); } } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 73e47d93e7a0..bee0dfb7b93b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3069,7 +3069,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return 0; offset_mask = pte_pgsize - 1; - __pte = *pte & PM_ADDR_MASK; + __pte = __sme_clr(*pte & PM_ADDR_MASK); return (__pte & ~offset_mask) | (iova & offset_mask); } diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 69dddeab124c..5936de71883f 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1455,8 +1455,8 @@ static int __load_mappings(struct dm_cache_metadata *cmd, if (hints_valid) { r = dm_array_cursor_next(&cmd->hint_cursor); if (r) { - DMERR("dm_array_cursor_next for hint failed"); - goto out; + dm_array_cursor_end(&cmd->hint_cursor); + hints_valid = false; } } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index a53413371725..b29a8327eed1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3009,8 +3009,13 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { - if (from_cblock(new_size) > from_cblock(cache->cache_size)) - return true; + if (from_cblock(new_size) > from_cblock(cache->cache_size)) { + if (cache->sized) { + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; + } + } /* * We can't drop a dirty block when shrinking the cache. @@ -3479,14 +3484,13 @@ static int __init dm_cache_init(void) int r; migration_cache = KMEM_CACHE(dm_cache_migration, 0); - if (!migration_cache) { - dm_unregister_target(&cache_target); + if (!migration_cache) return -ENOMEM; - } r = dm_register_target(&cache_target); if (r) { DMERR("cache target registration failed: %d", r); + kmem_cache_destroy(migration_cache); return r; } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 21d126a5078c..32aabe27b37c 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -467,7 +467,9 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", .version = {1, 5, 0}, +#ifdef CONFIG_BLK_DEV_ZONED .features = DM_TARGET_ZONED_HM, +#endif .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 89ccb64342de..e1fa6baf4e8e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3462,7 +3462,8 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc((RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size, GFP_KERNEL); + ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, + ic->tag_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index d10964d41fd7..2f7c44a006c4 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -102,6 +102,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +#ifdef CONFIG_BLK_DEV_ZONED static int linear_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { @@ -112,6 +113,7 @@ static int linear_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; } +#endif static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -208,12 +210,16 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, +#ifdef CONFIG_BLK_DEV_ZONED + .end_io = linear_end_io, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, +#else + .features = DM_TARGET_PASSES_INTEGRITY, +#endif .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, - .end_io = linear_end_io, .status = linear_status, .prepare_ioctl = linear_prepare_ioctl, .iterate_devices = linear_iterate_devices, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d94ba6f72ff5..419362c2d8ac 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -806,19 +806,19 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, } static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, - const char *attached_handler_name, char **error) + const char **attached_handler_name, char **error) { struct request_queue *q = bdev_get_queue(bdev); int r; if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { retain: - if (attached_handler_name) { + if (*attached_handler_name) { /* * Clear any hw_handler_params associated with a * handler that isn't already attached. */ - if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) { + if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) { kfree(m->hw_handler_params); m->hw_handler_params = NULL; } @@ -830,7 +830,8 @@ retain: * handler instead of the original table passed in. */ kfree(m->hw_handler_name); - m->hw_handler_name = attached_handler_name; + m->hw_handler_name = *attached_handler_name; + *attached_handler_name = NULL; } } @@ -867,7 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps struct pgpath *p; struct multipath *m = ti->private; struct request_queue *q; - const char *attached_handler_name; + const char *attached_handler_name = NULL; /* we need at least a path arg */ if (as->argc < 1) { @@ -890,7 +891,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); if (attached_handler_name || m->hw_handler_name) { INIT_DELAYED_WORK(&p->activate_path, activate_path_work); - r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error); + r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error); if (r) { dm_put_device(ti, p->path.dev); goto bad; @@ -905,6 +906,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps return p; bad: + kfree(attached_handler_name); free_pgpath(p); return ERR_PTR(r); } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5ba067fa0c72..c44925e4e481 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3353,7 +3353,7 @@ static const char *sync_str(enum sync_state state) }; /* Return enum sync_state for @mddev derived from @recovery flags */ -static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) +static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) { if (test_bit(MD_RECOVERY_FROZEN, &recovery)) return st_frozen; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 74f6770c70b1..20b0776e39ef 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -832,10 +832,8 @@ static void __set_metadata_reserve(struct dm_pool_metadata *pmd) if (r) { DMERR("could not get size of metadata device"); pmd->metadata_reserve = max_blocks; - } else { - sector_div(total, 10); - pmd->metadata_reserve = min(max_blocks, total); - } + } else + pmd->metadata_reserve = min(max_blocks, div_u64(total, 10)); } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 20f7e4ef5342..45abb54037fc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1155,12 +1155,14 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) EXPORT_SYMBOL_GPL(dm_accept_partial_bio); /* - * The zone descriptors obtained with a zone report indicate - * zone positions within the target device. The zone descriptors - * must be remapped to match their position within the dm device. - * A target may call dm_remap_zone_report after completion of a - * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained - * from the target device mapping to the dm device. + * The zone descriptors obtained with a zone report indicate zone positions + * within the target backing device, regardless of that device is a partition + * and regardless of the target mapping start sector on the device or partition. + * The zone descriptors start sector and write pointer position must be adjusted + * to match their relative position within the dm device. + * A target may call dm_remap_zone_report() after completion of a + * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the + * backing device. */ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) { @@ -1171,6 +1173,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) struct blk_zone *zone; unsigned int nr_rep = 0; unsigned int ofst; + sector_t part_offset; struct bio_vec bvec; struct bvec_iter iter; void *addr; @@ -1179,6 +1182,15 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) return; /* + * bio sector was incremented by the request size on completion. Taking + * into account the original request sector, the target start offset on + * the backing device and the target mapping offset (ti->begin), the + * start sector of the backing device. The partition offset is always 0 + * if the target uses a whole device. + */ + part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio)); + + /* * Remap the start sector of the reported zones. For sequential zones, * also remap the write pointer position. */ @@ -1195,6 +1207,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) /* Set zones start sector */ while (hdr->nr_zones && ofst < bvec.bv_len) { zone = addr + ofst; + zone->start -= part_offset; if (zone->start >= start + ti->len) { hdr->nr_zones = 0; break; @@ -1206,7 +1219,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) else if (zone->cond == BLK_ZONE_COND_EMPTY) zone->wp = zone->start; else - zone->wp = zone->wp + ti->begin - start; + zone->wp = zone->wp + ti->begin - start - part_offset; } ofst += sizeof(struct blk_zone); hdr->nr_zones--; diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c index 127fe6eb91d9..a3ef1f50a4b3 100644 --- a/drivers/media/v4l2-core/v4l2-event.c +++ b/drivers/media/v4l2-core/v4l2-event.c @@ -115,14 +115,6 @@ static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *e if (sev == NULL) return; - /* - * If the event has been added to the fh->subscribed list, but its - * add op has not completed yet elems will be 0, treat this as - * not being subscribed. - */ - if (!sev->elems) - return; - /* Increase event sequence number on fh. */ fh->sequence++; @@ -208,6 +200,7 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, struct v4l2_subscribed_event *sev, *found_ev; unsigned long flags; unsigned i; + int ret = 0; if (sub->type == V4L2_EVENT_ALL) return -EINVAL; @@ -225,31 +218,36 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, sev->flags = sub->flags; sev->fh = fh; sev->ops = ops; + sev->elems = elems; + + mutex_lock(&fh->subscribe_lock); spin_lock_irqsave(&fh->vdev->fh_lock, flags); found_ev = v4l2_event_subscribed(fh, sub->type, sub->id); - if (!found_ev) - list_add(&sev->list, &fh->subscribed); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (found_ev) { + /* Already listening */ kvfree(sev); - return 0; /* Already listening */ + goto out_unlock; } if (sev->ops && sev->ops->add) { - int ret = sev->ops->add(sev, elems); + ret = sev->ops->add(sev, elems); if (ret) { - sev->ops = NULL; - v4l2_event_unsubscribe(fh, sub); - return ret; + kvfree(sev); + goto out_unlock; } } - /* Mark as ready for use */ - sev->elems = elems; + spin_lock_irqsave(&fh->vdev->fh_lock, flags); + list_add(&sev->list, &fh->subscribed); + spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); - return 0; +out_unlock: + mutex_unlock(&fh->subscribe_lock); + + return ret; } EXPORT_SYMBOL_GPL(v4l2_event_subscribe); @@ -288,6 +286,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, return 0; } + mutex_lock(&fh->subscribe_lock); + spin_lock_irqsave(&fh->vdev->fh_lock, flags); sev = v4l2_event_subscribed(fh, sub->type, sub->id); @@ -305,6 +305,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, if (sev && sev->ops && sev->ops->del) sev->ops->del(sev); + mutex_unlock(&fh->subscribe_lock); + kvfree(sev); return 0; diff --git a/drivers/media/v4l2-core/v4l2-fh.c b/drivers/media/v4l2-core/v4l2-fh.c index 3895999bf880..c91a7bd3ecfc 100644 --- a/drivers/media/v4l2-core/v4l2-fh.c +++ b/drivers/media/v4l2-core/v4l2-fh.c @@ -45,6 +45,7 @@ void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev) INIT_LIST_HEAD(&fh->available); INIT_LIST_HEAD(&fh->subscribed); fh->sequence = -1; + mutex_init(&fh->subscribe_lock); } EXPORT_SYMBOL_GPL(v4l2_fh_init); @@ -90,6 +91,7 @@ void v4l2_fh_exit(struct v4l2_fh *fh) return; v4l_disable_media_source(fh->vdev); v4l2_event_unsubscribe_all(fh); + mutex_destroy(&fh->subscribe_lock); fh->vdev = NULL; } EXPORT_SYMBOL_GPL(v4l2_fh_exit); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index a0b9102c4c6e..e201ccb3fda4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1371,6 +1371,16 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, if (brq->data.blocks > 1) { /* + * Some SD cards in SPI mode return a CRC error or even lock up + * completely when trying to read the last block using a + * multiblock read command. + */ + if (mmc_host_is_spi(card->host) && (rq_data_dir(req) == READ) && + (blk_rq_pos(req) + blk_rq_sectors(req) == + get_capacity(md->disk))) + brq->data.blocks--; + + /* * After a read error, we redo the request one sector * at a time in order to accurately determine which * sectors can be read successfully. diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index abf9e884386c..f57f5de54206 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -235,7 +235,7 @@ int mmc_of_parse(struct mmc_host *host) host->caps |= MMC_CAP_NEEDS_POLL; ret = mmc_gpiod_request_cd(host, "cd", 0, true, - cd_debounce_delay_ms, + cd_debounce_delay_ms * 1000, &cd_gpio_invert); if (!ret) dev_info(host->parent, "Got CD GPIO\n"); diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 2a833686784b..86803a3a04dc 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -271,7 +271,7 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, if (debounce) { ret = gpiod_set_debounce(desc, debounce); if (ret < 0) - ctx->cd_debounce_delay_ms = debounce; + ctx->cd_debounce_delay_ms = debounce / 1000; } if (gpio_invert) diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index 890f192dedbd..5389c4821882 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -498,7 +498,8 @@ static const struct soc_device_attribute gen3_soc_whitelist[] = { static int renesas_sdhi_sys_dmac_probe(struct platform_device *pdev) { - if (of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible && + if ((of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible || + of_device_get_match_data(&pdev->dev) == &of_rcar_r8a7795_compatible) && !soc_device_match(gen3_soc_whitelist)) return -ENODEV; diff --git a/drivers/mux/adgs1408.c b/drivers/mux/adgs1408.c index 0f7cf54e3234..89096f10f4c4 100644 --- a/drivers/mux/adgs1408.c +++ b/drivers/mux/adgs1408.c @@ -128,4 +128,4 @@ module_spi_driver(adgs1408_driver); MODULE_AUTHOR("Mircea Caprioru <mircea.caprioru@analog.com>"); MODULE_DESCRIPTION("Analog Devices ADGS1408 MUX driver"); -MODULE_LICENSE("GPL v2"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0d87e11e7f1d..ee28ec9e0aba 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -210,6 +210,7 @@ static void bond_get_stats(struct net_device *bond_dev, static void bond_slave_arr_handler(struct work_struct *work); static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, int mod); +static void bond_netdev_notify_work(struct work_struct *work); /*---------------------------- General routines -----------------------------*/ @@ -1170,9 +1171,27 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } } - /* don't change skb->dev for link-local packets */ - if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) + /* Link-local multicast packets should be passed to the + * stack on the link they arrive as well as pass them to the + * bond-master device. These packets are mostly usable when + * stack receives it with the link on which they arrive + * (e.g. LLDP) they also must be available on master. Some of + * the use cases include (but are not limited to): LLDP agents + * that must be able to operate both on enslaved interfaces as + * well as on bonds themselves; linux bridges that must be able + * to process/pass BPDUs from attached bonds when any kind of + * STP version is enabled on the network. + */ + if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (nskb) { + nskb->dev = bond->dev; + nskb->queue_mapping = 0; + netif_rx(nskb); + } return RX_HANDLER_PASS; + } if (bond_should_deliver_exact_match(skb, slave, bond)) return RX_HANDLER_EXACT; @@ -1269,6 +1288,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond) return NULL; } } + INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); + return slave; } @@ -1276,6 +1297,7 @@ static void bond_free_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); + cancel_delayed_work_sync(&slave->notify_work); if (BOND_MODE(bond) == BOND_MODE_8023AD) kfree(SLAVE_AD_INFO(slave)); @@ -1297,39 +1319,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) info->link_failure_count = slave->link_failure_count; } -static void bond_netdev_notify(struct net_device *dev, - struct netdev_bonding_info *info) -{ - rtnl_lock(); - netdev_bonding_info_change(dev, info); - rtnl_unlock(); -} - static void bond_netdev_notify_work(struct work_struct *_work) { - struct netdev_notify_work *w = - container_of(_work, struct netdev_notify_work, work.work); + struct slave *slave = container_of(_work, struct slave, + notify_work.work); + + if (rtnl_trylock()) { + struct netdev_bonding_info binfo; - bond_netdev_notify(w->dev, &w->bonding_info); - dev_put(w->dev); - kfree(w); + bond_fill_ifslave(slave, &binfo.slave); + bond_fill_ifbond(slave->bond, &binfo.master); + netdev_bonding_info_change(slave->dev, &binfo); + rtnl_unlock(); + } else { + queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); + } } void bond_queue_slave_event(struct slave *slave) { - struct bonding *bond = slave->bond; - struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); - - if (!nnw) - return; - - dev_hold(slave->dev); - nnw->dev = slave->dev; - bond_fill_ifslave(slave, &nnw->bonding_info.slave); - bond_fill_ifbond(bond, &nnw->bonding_info.master); - INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); - - queue_delayed_work(slave->bond->wq, &nnw->work, 0); + queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); } void bond_lower_state_changed(struct slave *slave) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d93c790bfbe8..ad534b90ef21 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1107,7 +1107,7 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); vl->members |= BIT(port); - if (untagged) + if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag |= BIT(port); else vl->untag &= ~BIT(port); @@ -1149,7 +1149,7 @@ int b53_vlan_del(struct dsa_switch *ds, int port, pvid = 0; } - if (untagged) + if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag &= ~(BIT(port)); b53_set_vlan_entry(dev, vid, vl); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e0066adcd2f3..fc8b48adf38b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -703,7 +703,6 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) static int bcm_sf2_sw_resume(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int port; int ret; ret = bcm_sf2_sw_rst(priv); @@ -715,14 +714,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(ds, true); - for (port = 0; port < DSA_MAX_PORTS; port++) { - if (dsa_is_user_port(ds, port)) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) - bcm_sf2_imp_setup(ds, port); - } - - bcm_sf2_enable_acb(ds); + ds->ops->setup(ds); return 0; } @@ -1173,10 +1165,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); - /* Disable all ports and interrupts */ priv->wol_ports_mask = 0; - bcm_sf2_sw_suspend(priv->dev->ds); dsa_unregister_switch(priv->dev->ds); + /* Disable all ports and interrupts */ + bcm_sf2_sw_suspend(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index 1c682b76190f..2b3ff0c20155 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -245,11 +245,11 @@ static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; ena_rx_ctx->l3_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT); ena_rx_ctx->l4_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT); ena_rx_ctx->hash = cdesc->hash; ena_rx_ctx->frag = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 29b5774dd32d..d906293ce07d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1575,8 +1575,6 @@ static int ena_up_complete(struct ena_adapter *adapter) if (rc) return rc; - ena_init_napi(adapter); - ena_change_mtu(adapter->netdev, adapter->netdev->mtu); ena_refill_all_rx_bufs(adapter); @@ -1730,6 +1728,13 @@ static int ena_up(struct ena_adapter *adapter) ena_setup_io_intr(adapter); + /* napi poll functions should be initialized before running + * request_irq(), to handle a rare condition where there is a pending + * interrupt, causing the ISR to fire immediately while the poll + * function wasn't set yet, causing a null dereference + */ + ena_init_napi(adapter); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; @@ -2185,25 +2190,6 @@ error_drop_packet: return NETDEV_TX_OK; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ena_netpoll(struct net_device *netdev) -{ - struct ena_adapter *adapter = netdev_priv(netdev); - int i; - - /* Dont schedule NAPI if the driver is in the middle of reset - * or netdev is down. - */ - - if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) || - test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) - return; - - for (i = 0; i < adapter->num_queues; i++) - napi_schedule(&adapter->ena_napi[i].napi); -} -#endif /* CONFIG_NET_POLL_CONTROLLER */ - static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev, select_queue_fallback_t fallback) @@ -2369,9 +2355,6 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_change_mtu = ena_change_mtu, .ndo_set_mac_address = NULL, .ndo_validate_addr = eth_validate_addr, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ena_netpoll, -#endif /* CONFIG_NET_POLL_CONTROLLER */ }; static int ena_device_validate_params(struct ena_adapter *adapter, @@ -2641,7 +2624,11 @@ err_disable_msix: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_device_destroy: + ena_com_abort_admin_commands(ena_dev); + ena_com_wait_for_abort_completion(ena_dev); ena_com_admin_destroy(ena_dev); + ena_com_mmio_reg_read_request_destroy(ena_dev); + ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); err: clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); @@ -3121,15 +3108,8 @@ err_rss_init: static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { - int release_bars; - - if (ena_dev->mem_bar) - devm_iounmap(&pdev->dev, ena_dev->mem_bar); - - if (ena_dev->reg_bar) - devm_iounmap(&pdev->dev, ena_dev->reg_bar); + int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; - release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 116997a8b593..00332a1ea84b 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1031,6 +1031,7 @@ static int dec_lance_probe(struct device *bdev, const int type) int i, ret; unsigned long esar_base; unsigned char *esar; + const char *desc; if (dec_lance_debug && version_printed++ == 0) printk(version); @@ -1216,19 +1217,20 @@ static int dec_lance_probe(struct device *bdev, const int type) */ switch (type) { case ASIC_LANCE: - printk("%s: IOASIC onboard LANCE", name); + desc = "IOASIC onboard LANCE"; break; case PMAD_LANCE: - printk("%s: PMAD-AA", name); + desc = "PMAD-AA"; break; case PMAX_LANCE: - printk("%s: PMAX onboard LANCE", name); + desc = "PMAX onboard LANCE"; break; } for (i = 0; i < 6; i++) dev->dev_addr[i] = esar[i * 4]; - printk(", addr = %pM, irq = %d\n", dev->dev_addr, dev->irq); + printk("%s: %s, addr = %pM, irq = %d\n", + name, desc, dev->dev_addr, dev->irq); dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 147045757b10..c57238fce863 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1069,9 +1069,6 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) { u32 reg; - /* Stop monitoring MPD interrupt */ - intrl2_0_mask_set(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG); - /* Disable RXCHK, active filters and Broadcom tag matching */ reg = rxchk_readl(priv, RXCHK_CONTROL); reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK << @@ -1081,6 +1078,17 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) /* Clear the MagicPacket detection logic */ mpd_enable_set(priv, false); + reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); + if (reg & INTRL2_0_MPD) + netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); + + if (reg & INTRL2_0_BRCM_MATCH_TAG) { + reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & + RXCHK_BRCM_TAG_MATCH_MASK; + netdev_info(priv->netdev, + "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); + } + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); } @@ -1105,7 +1113,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) struct bcm_sysport_priv *priv = netdev_priv(dev); struct bcm_sysport_tx_ring *txr; unsigned int ring, ring_bit; - u32 reg; priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) & ~intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); @@ -1131,16 +1138,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) bcm_sysport_tx_reclaim_all(priv); - if (priv->irq0_stat & INTRL2_0_MPD) - netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); - - if (priv->irq0_stat & INTRL2_0_BRCM_MATCH_TAG) { - reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & - RXCHK_BRCM_TAG_MATCH_MASK; - netdev_info(priv->netdev, - "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); - } - if (!priv->is_lite) goto out; @@ -2641,9 +2638,6 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) /* UniMAC receive needs to be turned on */ umac_enable_set(priv, CMD_RX_EN, 1); - /* Enable the interrupt wake-up source */ - intrl2_0_mask_clear(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG); - netif_dbg(priv, wol, ndev, "entered WOL mode\n"); return 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 61957b0bbd8c..e2d92548226a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1884,8 +1884,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts > bp->tx_wake_thresh)) + if (unlikely(tx_pkts > bp->tx_wake_thresh)) { rx_pkts = budget; + raw_cons = NEXT_RAW_CMP(raw_cons); + break; + } } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { if (likely(budget)) rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); @@ -1913,7 +1916,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) } raw_cons = NEXT_RAW_CMP(raw_cons); - if (rx_pkts == budget) + if (rx_pkts && rx_pkts == budget) break; } @@ -2027,8 +2030,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget) while (1) { work_done += bnxt_poll_work(bp, bnapi, budget - work_done); - if (work_done >= budget) + if (work_done >= budget) { + if (!budget) + BNXT_CP_DB_REARM(cpr->cp_doorbell, + cpr->cp_raw_cons); break; + } if (!bnxt_has_work(bp, cpr)) { if (napi_complete_done(napi, work_done)) @@ -3010,10 +3017,11 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp) { struct pci_dev *pdev = bp->pdev; - dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, - bp->hwrm_cmd_resp_dma_addr); - - bp->hwrm_cmd_resp_addr = NULL; + if (bp->hwrm_cmd_resp_addr) { + dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, + bp->hwrm_cmd_resp_dma_addr); + bp->hwrm_cmd_resp_addr = NULL; + } } static int bnxt_alloc_hwrm_resources(struct bnxt *bp) @@ -4643,7 +4651,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req, FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; enables |= ring_grps ? FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; - enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; + enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0; req->num_rx_rings = cpu_to_le16(rx_rings); req->num_hw_ring_grps = cpu_to_le16(ring_grps); @@ -8614,7 +8622,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, *max_tx = hw_resc->max_tx_rings; *max_rx = hw_resc->max_rx_rings; *max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp), - hw_resc->max_irqs); + hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp)); *max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs); max_ring_grps = hw_resc->max_hw_ring_grps; if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { @@ -9050,6 +9058,7 @@ init_err_cleanup_tc: bnxt_clear_int_mode(bp); init_err_pci_clean: + bnxt_free_hwrm_resources(bp); bnxt_cleanup_pci(bp); init_err_free: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index ddc98c359488..a85d2be986af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -98,13 +98,13 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets, bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1); for (i = 0; i < max_tc; i++) { - u8 qidx; + u8 qidx = bp->tc_to_qidx[i]; req.enables |= cpu_to_le32( - QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i); + QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << + qidx); memset(&cos2bw, 0, sizeof(cos2bw)); - qidx = bp->tc_to_qidx[i]; cos2bw.queue_id = bp->q_info[qidx].queue_id; if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) { cos2bw.tsa = diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f1a86b422617..58b9744c4058 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2160,6 +2160,7 @@ static void macb_configure_dma(struct macb *bp) else dmacfg &= ~GEM_BIT(TXCOEN); + dmacfg &= ~GEM_BIT(ADDR64); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (bp->hw_dma_cap & HW_DMA_CAP_64B) dmacfg |= GEM_BIT(ADDR64); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index a19172dbe6be..c34ea385fe4a 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2159,6 +2159,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_QSET_PARAMS) + return -EINVAL; if (t.qset_idx >= SGE_QSETS) return -EINVAL; if (!in_range(t.intr_lat, 0, M_NEWTIMER) || @@ -2258,6 +2260,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_QSET_PARAMS) + return -EINVAL; + /* Display qsets for all ports when offload enabled */ if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { q1 = 0; @@ -2303,6 +2308,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + if (edata.cmd != CHELSIO_SET_QSET_NUM) + return -EINVAL; if (edata.val < 1 || (edata.val > 1 && !(adapter->flags & USING_MSIX))) return -EINVAL; @@ -2343,6 +2350,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_LOAD_FW) + return -EINVAL; /* Check t.len sanity ? */ fw_data = memdup_user(useraddr + sizeof(t), t.len); if (IS_ERR(fw_data)) @@ -2366,6 +2375,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SETMTUTAB) + return -EINVAL; if (m.nmtus != NMTUS) return -EINVAL; if (m.mtus[0] < 81) /* accommodate SACK */ @@ -2407,6 +2418,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SET_PM) + return -EINVAL; if (!is_power_of_2(m.rx_pg_sz) || !is_power_of_2(m.tx_pg_sz)) return -EINVAL; /* not power of 2 */ @@ -2440,6 +2453,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_MEM) + return -EINVAL; if ((t.addr & 7) || (t.len & 7)) return -EINVAL; if (t.mem_id == MEM_CM) @@ -2492,6 +2507,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EAGAIN; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_TRACE_FILTER) + return -EINVAL; tp = (const struct trace_params *)&t.sip; if (t.config_tx) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 74d122616e76..534787291b44 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4002,8 +4002,6 @@ static int be_enable_vxlan_offloads(struct be_adapter *adapter) netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->features |= NETIF_F_GSO_UDP_TUNNEL; dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); @@ -4025,8 +4023,6 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter) adapter->vxlan_port = 0; netdev->hw_enc_features = 0; - netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL); - netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL); } static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs, @@ -5320,6 +5316,7 @@ static void be_netdev_init(struct net_device *netdev) struct be_adapter *adapter = netdev_priv(netdev); netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX; if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2708297e7795..bf9b9fd6d2a0 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1158,7 +1158,7 @@ static void fec_enet_timeout_work(struct work_struct *work) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -1273,7 +1273,7 @@ skb_done: /* Since we have freed up a buffer, the ring is no longer full */ - if (netif_queue_stopped(ndev)) { + if (netif_tx_queue_stopped(nq)) { entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free >= txq->tx_wake_threshold) netif_tx_wake_queue(nq); @@ -1746,7 +1746,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -2247,7 +2247,7 @@ static int fec_enet_set_pauseparam(struct net_device *ndev, napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index a051e582d541..79d03f8ee7b1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb) if (cb->type == DESC_TYPE_SKB) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); - else + else if (cb->length) dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index f56855e63c96..28e907831b0e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -40,9 +40,9 @@ #define SKB_TMP_LEN(SKB) \ (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) -static void fill_v2_desc(struct hnae_ring *ring, void *priv, - int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu) +static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, + int send_sz, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, desc_cb->type = type; desc->addr = cpu_to_le64(dma); - desc->tx.send_size = cpu_to_le16((u16)size); + desc->tx.send_size = cpu_to_le16((u16)send_sz); /* config bd buffer end */ hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); @@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, ring_ptr_move_fw(ring, next_to_use); } +static void fill_v2_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + fill_v2_desc_hw(ring, priv, size, size, dma, frag_end, + buf_num, type, mtu); +} + static const struct acpi_device_id hns_enet_acpi_match[] = { { "HISI00C1", 0 }, { "HISI00C2", 0 }, @@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv, /* when the frag size is bigger than hardware, split this frag */ for (k = 0; k < frag_buf_num; k++) - fill_v2_desc(ring, priv, - (k == frag_buf_num - 1) ? + fill_v2_desc_hw(ring, priv, k == 0 ? size : 0, + (k == frag_buf_num - 1) ? sizeoflast : BD_MAX_SEND_SIZE, - dma + BD_MAX_SEND_SIZE * k, - frag_end && (k == frag_buf_num - 1) ? 1 : 0, - buf_num, - (type == DESC_TYPE_SKB && !k) ? + dma + BD_MAX_SEND_SIZE * k, + frag_end && (k == frag_buf_num - 1) ? 1 : 0, + buf_num, + (type == DESC_TYPE_SKB && !k) ? DESC_TYPE_SKB : DESC_TYPE_PAGE, - mtu); + mtu); } netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, @@ -1495,21 +1503,6 @@ static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, return phy_mii_ioctl(phy_dev, ifr, cmd); } -/* use only for netconsole to poll with the device without interrupt */ -#ifdef CONFIG_NET_POLL_CONTROLLER -static void hns_nic_poll_controller(struct net_device *ndev) -{ - struct hns_nic_priv *priv = netdev_priv(ndev); - unsigned long flags; - int i; - - local_irq_save(flags); - for (i = 0; i < priv->ae_handle->q_num * 2; i++) - napi_schedule(&priv->ring_data[i].napi); - local_irq_restore(flags); -} -#endif - static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -1962,9 +1955,6 @@ static const struct net_device_ops hns_nic_netdev_ops = { .ndo_set_features = hns_nic_set_features, .ndo_fix_features = hns_nic_fix_features, .ndo_get_stats64 = hns_nic_get_stats64, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = hns_nic_poll_controller, -#endif .ndo_set_rx_mode = hns_nic_set_rx_mode, .ndo_select_queue = hns_nic_select_queue, }; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 09e9da10b786..4a8f82938ed5 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -789,23 +789,6 @@ static void hinic_get_stats64(struct net_device *netdev, stats->tx_errors = nic_tx_stats->tx_dropped; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void hinic_netpoll(struct net_device *netdev) -{ - struct hinic_dev *nic_dev = netdev_priv(netdev); - int i, num_qps; - - num_qps = hinic_hwdev_num_qps(nic_dev->hwdev); - for (i = 0; i < num_qps; i++) { - struct hinic_txq *txq = &nic_dev->txqs[i]; - struct hinic_rxq *rxq = &nic_dev->rxqs[i]; - - napi_schedule(&txq->napi); - napi_schedule(&rxq->napi); - } -} -#endif - static const struct net_device_ops hinic_netdev_ops = { .ndo_open = hinic_open, .ndo_stop = hinic_close, @@ -818,9 +801,6 @@ static const struct net_device_ops hinic_netdev_ops = { .ndo_start_xmit = hinic_xmit_frame, .ndo_tx_timeout = hinic_tx_timeout, .ndo_get_stats64 = hinic_get_stats64, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = hinic_netpoll, -#endif }; static void netdev_features_init(struct net_device *netdev) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index ba580bfae512..03f64f40b2a3 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -921,17 +921,6 @@ static int ehea_poll(struct napi_struct *napi, int budget) return rx; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ehea_netpoll(struct net_device *dev) -{ - struct ehea_port *port = netdev_priv(dev); - int i; - - for (i = 0; i < port->num_def_qps; i++) - napi_schedule(&port->port_res[i].napi); -} -#endif - static irqreturn_t ehea_recv_irq_handler(int irq, void *param) { struct ehea_port_res *pr = param; @@ -2953,9 +2942,6 @@ static const struct net_device_ops ehea_netdev_ops = { .ndo_open = ehea_open, .ndo_stop = ehea_stop, .ndo_start_xmit = ehea_start_xmit, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ehea_netpoll, -#endif .ndo_get_stats64 = ehea_get_stats64, .ndo_set_mac_address = ehea_set_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4f0daf67b18d..699ef942b615 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2207,19 +2207,6 @@ restart_poll: return frames_processed; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ibmvnic_netpoll_controller(struct net_device *dev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(dev); - int i; - - replenish_pools(netdev_priv(dev)); - for (i = 0; i < adapter->req_rx_queues; i++) - ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq, - adapter->rx_scrq[i]); -} -#endif - static int wait_for_reset(struct ibmvnic_adapter *adapter) { int rc, ret; @@ -2292,9 +2279,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = { .ndo_set_mac_address = ibmvnic_set_mac, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = ibmvnic_tx_timeout, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ibmvnic_netpoll_controller, -#endif .ndo_change_mtu = ibmvnic_change_mtu, .ndo_features_check = ibmvnic_features_check, }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f27d73a7bf16..6cdd58d9d461 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3196,11 +3196,13 @@ int ixgbe_poll(struct napi_struct *napi, int budget) return budget; /* all work done, exit the polling mode */ - napi_complete_done(napi, work_done); - if (adapter->rx_itr_setting & 1) - ixgbe_set_itr(q_vector); - if (!test_bit(__IXGBE_DOWN, &adapter->state)) - ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx)); + if (likely(napi_complete_done(napi, work_done))) { + if (adapter->rx_itr_setting & 1) + ixgbe_set_itr(q_vector); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) + ixgbe_irq_enable_queues(adapter, + BIT_ULL(q_vector->v_idx)); + } return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 38cc01beea79..a74002b43b51 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1725,7 +1725,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq) } /* Set Tx descriptors fields relevant for CSUM calculation */ -static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, +static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, int ip_hdr_len, int l4_proto) { u32 command; @@ -2600,14 +2600,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { int ip_hdr_len = 0; u8 l4_proto; + __be16 l3_proto = vlan_get_protocol(skb); - if (skb->protocol == htons(ETH_P_IP)) { + if (l3_proto == htons(ETH_P_IP)) { struct iphdr *ip4h = ip_hdr(skb); /* Calculate IPv4 checksum and L4 checksum */ ip_hdr_len = ip4h->ihl; l4_proto = ip4h->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (l3_proto == htons(ETH_P_IPV6)) { struct ipv6hdr *ip6h = ipv6_hdr(skb); /* Read l4_protocol from one of IPv6 extra headers */ @@ -2619,7 +2620,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb) } return mvpp2_txq_desc_csum(skb_network_offset(skb), - skb->protocol, ip_hdr_len, l4_proto); + l3_proto, ip_hdr_len, l4_proto); } return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d2d59444f562..6a046030e873 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -260,47 +260,34 @@ static const struct devlink_param mlx4_devlink_params[] = { NULL, NULL, NULL), }; -static void mlx4_devlink_set_init_value(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) -{ - struct mlx4_priv *priv = devlink_priv(devlink); - struct mlx4_dev *dev = &priv->dev; - int err; - - err = devlink_param_driverinit_value_set(devlink, param_id, init_val); - if (err) - mlx4_warn(dev, - "devlink set parameter %u value failed (err = %d)", - param_id, err); -} - static void mlx4_devlink_set_params_init_values(struct devlink *devlink) { union devlink_param_value value; value.vbool = !!mlx4_internal_err_reset; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, - value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, + value); value.vu32 = 1UL << log_num_mac; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_MAX_MACS, value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); value.vbool = enable_64b_cqe_eqe; - mlx4_devlink_set_init_value(devlink, - MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, - value); + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + value); value.vbool = enable_4k_uar; - mlx4_devlink_set_init_value(devlink, - MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, - value); + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, + value); value.vbool = false; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, - value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + value); } static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index db2cfcd21d43..0f189f873859 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -54,6 +54,7 @@ #include "en_stats.h" #include "en/fs.h" +extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index bbf69e859b78..1431232c9a09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -16,6 +16,8 @@ struct mlx5e_tc_table { DECLARE_HASHTABLE(mod_hdr_tbl, 8); DECLARE_HASHTABLE(hairpin_tbl, 8); + + struct notifier_block netdevice_nb; }; struct mlx5e_flow_table { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 54118b77dc1f..f291d1bf1558 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4315,7 +4315,7 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static const struct net_device_ops mlx5e_netdev_ops = { +const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9fed54017659..85796727093e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1368,6 +1368,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); } if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { @@ -2946,14 +2949,71 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, return 0; } +static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, + struct mlx5e_priv *peer_priv) +{ + struct mlx5_core_dev *peer_mdev = peer_priv->mdev; + struct mlx5e_hairpin_entry *hpe; + u16 peer_vhca_id; + int bkt; + + if (!same_hw_devs(priv, peer_priv)) + return; + + peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { + if (hpe->peer_vhca_id == peer_vhca_id) + hpe->hp->pair->peer_gone = true; + } +} + +static int mlx5e_tc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_flow_steering *fs; + struct mlx5e_priv *peer_priv; + struct mlx5e_tc_table *tc; + struct mlx5e_priv *priv; + + if (ndev->netdev_ops != &mlx5e_netdev_ops || + event != NETDEV_UNREGISTER || + ndev->reg_state == NETREG_REGISTERED) + return NOTIFY_DONE; + + tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); + fs = container_of(tc, struct mlx5e_flow_steering, tc); + priv = container_of(fs, struct mlx5e_priv, fs); + peer_priv = netdev_priv(ndev); + if (priv == peer_priv || + !(priv->netdev->features & NETIF_F_HW_TC)) + return NOTIFY_DONE; + + mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); + + return NOTIFY_DONE; +} + int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; + int err; hash_init(tc->mod_hdr_tbl); hash_init(tc->hairpin_tbl); - return rhashtable_init(&tc->ht, &tc_ht_params); + err = rhashtable_init(&tc->ht, &tc_ht_params); + if (err) + return err; + + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; + if (register_netdevice_notifier(&tc->netdevice_nb)) { + tc->netdevice_nb.notifier_call = NULL; + mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); + } + + return err; } static void _mlx5e_tc_del_flow(void *ptr, void *arg) @@ -2969,6 +3029,9 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; + if (tc->netdevice_nb.notifier_call) + unregister_netdevice_notifier(&tc->netdevice_nb); + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); if (!IS_ERR_OR_NULL(tc->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2b252cde5cc2..ea7dedc2d5ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2000,7 +2000,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i <= esw->total_vports; i++) { + for (i = 0; i < esw->total_vports; i++) { evport = &esw->vports[i]; if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; @@ -2020,7 +2020,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i <= esw->total_vports; i++) { + for (i = 0; i < esw->total_vports; i++) { evport = &esw->vports[i]; if (!evport->enabled) continue; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index d2f76070ea7c..a1ee9a8a769e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -475,7 +475,8 @@ static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) for (i = 0; i < hp->num_channels; i++) { mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); - mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + if (!hp->peer_gone) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); } } @@ -567,6 +568,8 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) MLX5_RQC_STATE_RST, 0, 0); /* unset peer SQs */ + if (hp->peer_gone) + return; for (i = 0; i < hp->num_channels; i++) mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_RST, 0, 0); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 4d271fb3de3d..5890fdfd62c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -718,14 +718,17 @@ static void mlxsw_pci_eq_tasklet(unsigned long data) memset(&active_cqns, 0, sizeof(active_cqns)); while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) { - u8 event_type = mlxsw_pci_eqe_event_type_get(eqe); - switch (event_type) { - case MLXSW_PCI_EQE_EVENT_TYPE_CMD: + /* Command interface completion events are always received on + * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events + * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1). + */ + switch (q->num) { + case MLXSW_PCI_EQ_ASYNC_NUM: mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); q->u.eq.ev_cmd_count++; break; - case MLXSW_PCI_EQE_EVENT_TYPE_COMP: + case MLXSW_PCI_EQ_COMP_NUM: cqn = mlxsw_pci_eqe_cqn_get(eqe); set_bit(cqn, active_cqns); cq_handle = true; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index b492152c8881..30bb2c533cec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4845,6 +4845,8 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, upper_dev = info->upper_dev; if (info->linking) break; + if (is_vlan_dev(upper_dev)) + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev); if (netif_is_macvlan(upper_dev)) mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); break; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 8ed38fd5a852..c6d29fdbb880 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2077,14 +2077,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, return true; } -static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) { struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; struct nfp_net *nn = r_vec->nfp_net; struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; - while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring)) + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) continue; + + return budget; } static void nfp_ctrl_poll(unsigned long arg) @@ -2096,9 +2099,13 @@ static void nfp_ctrl_poll(unsigned long arg) __nfp_ctrl_tx_queued(r_vec); spin_unlock_bh(&r_vec->lock); - nfp_ctrl_rx(r_vec); - - nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } } /* Setup and Configuration diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 69aa7fc392c5..59c70be22a84 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -72,9 +72,6 @@ static void netxen_schedule_work(struct netxen_adapter *adapter, work_func_t func, int delay); static void netxen_cancel_fw_work(struct netxen_adapter *adapter); static int netxen_nic_poll(struct napi_struct *napi, int budget); -#ifdef CONFIG_NET_POLL_CONTROLLER -static void netxen_nic_poll_controller(struct net_device *netdev); -#endif static void netxen_create_sysfs_entries(struct netxen_adapter *adapter); static void netxen_remove_sysfs_entries(struct netxen_adapter *adapter); @@ -581,9 +578,6 @@ static const struct net_device_ops netxen_netdev_ops = { .ndo_tx_timeout = netxen_tx_timeout, .ndo_fix_features = netxen_fix_features, .ndo_set_features = netxen_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = netxen_nic_poll_controller, -#endif }; static inline bool netxen_function_zero(struct pci_dev *pdev) @@ -2402,23 +2396,6 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget) return work_done; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void netxen_nic_poll_controller(struct net_device *netdev) -{ - int ring; - struct nx_host_sds_ring *sds_ring; - struct netxen_adapter *adapter = netdev_priv(netdev); - struct netxen_recv_context *recv_ctx = &adapter->recv_ctx; - - disable_irq(adapter->irq); - for (ring = 0; ring < adapter->max_sds_rings; ring++) { - sds_ring = &recv_ctx->sds_rings[ring]; - netxen_intr(adapter->irq, sds_ring); - } - enable_irq(adapter->irq); -} -#endif - static int nx_incr_dev_ref_cnt(struct netxen_adapter *adapter) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 9b3ef00e5782..a71382687ef2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -11987,6 +11987,7 @@ struct public_global { u32 running_bundle_id; s32 external_temperature; u32 mdump_reason; + u64 reserved; u32 data_ptr; u32 data_size; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 17f3dfa2cc94..e860bdf0f752 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1710,7 +1710,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->local_ip[0] = ntohl(iph->daddr); cm_info->remote_ip[0] = ntohl(iph->saddr); - cm_info->ip_version = TCP_IPV4; + cm_info->ip_version = QED_TCP_IPV4; ip_hlen = (iph->ihl) * sizeof(u32); *payload_len = ntohs(iph->tot_len) - ip_hlen; @@ -1730,7 +1730,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->remote_ip[i] = ntohl(ip6h->saddr.in6_u.u6_addr32[i]); } - cm_info->ip_version = TCP_IPV6; + cm_info->ip_version = QED_TCP_IPV6; ip_hlen = sizeof(*ip6h); *payload_len = ntohs(ip6h->payload_len); diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index be941cfaa2d4..c71391b9c757 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -228,7 +228,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, num_cons, "Toggle"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, - "Failed to allocate toogle bits, rc = %d\n", rc); + "Failed to allocate toggle bits, rc = %d\n", rc); goto free_cq_map; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 7d7a64c55ff1..f9167d1354bb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -140,23 +140,16 @@ static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) { - enum roce_flavor flavor; - switch (roce_mode) { case ROCE_V1: - flavor = PLAIN_ROCE; - break; + return PLAIN_ROCE; case ROCE_V2_IPV4: - flavor = RROCE_IPV4; - break; + return RROCE_IPV4; case ROCE_V2_IPV6: - flavor = ROCE_V2_IPV6; - break; + return RROCE_IPV6; default: - flavor = MAX_ROCE_MODE; - break; + return MAX_ROCE_FLAVOR; } - return flavor; } static void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 8de644b4721e..77b6248ad3b9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -154,7 +154,7 @@ qed_set_pf_update_tunn_mode(struct qed_tunnel_info *p_tun, static void qed_set_tunn_cls_info(struct qed_tunnel_info *p_tun, struct qed_tunnel_info *p_src) { - enum tunnel_clss type; + int type; p_tun->b_update_rx_cls = p_src->b_update_rx_cls; p_tun->b_update_tx_cls = p_src->b_update_tx_cls; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 3d4269659820..be118d057b92 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -413,7 +413,6 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } if (!p_iov->b_pre_fp_hsi && - ETH_HSI_VER_MINOR && (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) { DP_INFO(p_hwfn, "PF is using older fastpath HSI; %02x.%02x is configured\n", @@ -572,7 +571,7 @@ free_p_iov: static void __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, u8 *p_cls) + enum qed_tunn_mode mask, u8 *p_cls) { if (p_src->b_update_mode) { p_req->tun_mode_update_mask |= BIT(mask); @@ -587,7 +586,7 @@ __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, static void qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, + enum qed_tunn_mode mask, u8 *p_cls, struct qed_tunn_update_udp_port *p_port, u8 *p_update_port, u16 *p_udp_port) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 81312924df14..0c443ea98479 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops { int (*config_loopback) (struct qlcnic_adapter *, u8); int (*clear_loopback) (struct qlcnic_adapter *, u8); int (*config_promisc_mode) (struct qlcnic_adapter *, u32); - void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); + void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *tx_ring); int (*get_board_info) (struct qlcnic_adapter *); void (*set_mac_filter_count) (struct qlcnic_adapter *); void (*free_mac_list) (struct qlcnic_adapter *); @@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter, } static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, - u64 *addr, u16 id) + u64 *addr, u16 vlan, + struct qlcnic_host_tx_ring *tx_ring) { - adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); + adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); } static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 569d54ededec..a79d84f99102 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2135,7 +2135,8 @@ out: } void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, - u16 vlan_id) + u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring) { u8 mac[ETH_ALEN]; memcpy(&mac, addr, ETH_ALEN); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index b75a81246856..73fe2f64491d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32); int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); -void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); +void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *ring); int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h index 4bb33af8e2b3..56a3bd9e37dc 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h @@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, struct net_device *netdev); void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, - u64 *uaddr, u16 vlan_id); + u64 *uaddr, u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring); int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, struct ethtool_coalesce *); int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 84dd83031a1b..9647578cbe6a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, } void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, - u16 vlan_id) + u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) { struct cmd_desc_type0 *hwdesc; struct qlcnic_nic_req *req; struct qlcnic_mac_req *mac_req; struct qlcnic_vlan_req *vlan_req; - struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; u32 producer; u64 word; @@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, - struct sk_buff *skb) + struct sk_buff *skb, + struct qlcnic_host_tx_ring *tx_ring) { struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) qlcnic_change_filter(adapter, &src_addr, - vlan_id); + vlan_id, tx_ring); tmp_fil->ftime = jiffies; return; } @@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, if (!fil) return; - qlcnic_change_filter(adapter, &src_addr, vlan_id); + qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); fil->ftime = jiffies; fil->vlan_id = vlan_id; memcpy(fil->faddr, &src_addr, ETH_ALEN); @@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } if (adapter->drv_mac_learn) - qlcnic_send_filter(adapter, first_desc, skb); + qlcnic_send_filter(adapter, first_desc, skb, tx_ring); tx_ring->tx_stats.tx_bytes += skb->len; tx_ring->tx_stats.xmit_called++; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 2d38d1ac2aae..dbd48012224f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -59,9 +59,6 @@ static int qlcnic_close(struct net_device *netdev); static void qlcnic_tx_timeout(struct net_device *netdev); static void qlcnic_attach_work(struct work_struct *work); static void qlcnic_fwinit_work(struct work_struct *work); -#ifdef CONFIG_NET_POLL_CONTROLLER -static void qlcnic_poll_controller(struct net_device *netdev); -#endif static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding); static int qlcnic_can_start_firmware(struct qlcnic_adapter *adapter); @@ -545,9 +542,6 @@ static const struct net_device_ops qlcnic_netdev_ops = { .ndo_udp_tunnel_add = qlcnic_add_vxlan_port, .ndo_udp_tunnel_del = qlcnic_del_vxlan_port, .ndo_features_check = qlcnic_features_check, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = qlcnic_poll_controller, -#endif #ifdef CONFIG_QLCNIC_SRIOV .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, .ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate, @@ -3200,45 +3194,6 @@ static irqreturn_t qlcnic_msix_tx_intr(int irq, void *data) return IRQ_HANDLED; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void qlcnic_poll_controller(struct net_device *netdev) -{ - struct qlcnic_adapter *adapter = netdev_priv(netdev); - struct qlcnic_host_sds_ring *sds_ring; - struct qlcnic_recv_context *recv_ctx; - struct qlcnic_host_tx_ring *tx_ring; - int ring; - - if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) - return; - - recv_ctx = adapter->recv_ctx; - - for (ring = 0; ring < adapter->drv_sds_rings; ring++) { - sds_ring = &recv_ctx->sds_rings[ring]; - qlcnic_disable_sds_intr(adapter, sds_ring); - napi_schedule(&sds_ring->napi); - } - - if (adapter->flags & QLCNIC_MSIX_ENABLED) { - /* Only Multi-Tx queue capable devices need to - * schedule NAPI for TX rings - */ - if ((qlcnic_83xx_check(adapter) && - (adapter->flags & QLCNIC_TX_INTR_SHARED)) || - (qlcnic_82xx_check(adapter) && - !qlcnic_check_multi_tx(adapter))) - return; - - for (ring = 0; ring < adapter->drv_tx_rings; ring++) { - tx_ring = &adapter->tx_ring[ring]; - qlcnic_disable_tx_intr(adapter, tx_ring); - napi_schedule(&tx_ring->napi); - } - } -} -#endif - static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding) { diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 7fd86d40a337..11167abe5934 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -113,7 +113,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb, struct sk_buff *skbn; if (skb->dev->type == ARPHRD_ETHER) { - if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_KERNEL)) { + if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_ATOMIC)) { kfree_skb(skb); return; } @@ -147,7 +147,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, } if (skb_headroom(skb) < required_headroom) { - if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL)) + if (pskb_expand_head(skb, required_headroom, 0, GFP_ATOMIC)) return -ENOMEM; } @@ -189,6 +189,9 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) if (!skb) goto done; + if (skb->pkt_type == PACKET_LOOPBACK) + return RX_HANDLER_PASS; + dev = skb->dev; port = rmnet_get_port(dev); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index ab30aaeac6d3..3a5e6160bf0d 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4072,13 +4072,12 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) genphy_soft_reset(dev->phydev); - /* It was reported that chip version 33 ends up with 10MBit/Half on a + /* It was reported that several chips end up with 10MBit/Half on a * 1GBit link after resuming from S3. For whatever reason the PHY on - * this chip doesn't properly start a renegotiation when soft-reset. + * these chips doesn't properly start a renegotiation when soft-reset. * Explicitly requesting a renegotiation fixes this. */ - if (tp->mac_version == RTL_GIGA_MAC_VER_33 && - dev->phydev->autoneg == AUTONEG_ENABLE) + if (dev->phydev->autoneg == AUTONEG_ENABLE) phy_restart_aneg(dev->phydev); } @@ -4283,8 +4282,8 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: @@ -4536,9 +4535,14 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp) static void rtl_set_tx_config_registers(struct rtl8169_private *tp) { - /* Set DMA burst size and Interframe Gap Time */ - RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) | - (InterFrameGap << TxInterFrameGapShift)); + u32 val = TX_DMA_BURST << TxDMAShift | + InterFrameGap << TxInterFrameGapShift; + + if (tp->mac_version >= RTL_GIGA_MAC_VER_34 && + tp->mac_version != RTL_GIGA_MAC_VER_39) + val |= TXCFG_AUTO_FIFO; + + RTL_W32(tp, TxConfig, val); } static void rtl_set_rx_max_size(struct rtl8169_private *tp) @@ -5033,7 +5037,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_disable_clock_request(tp); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); /* Adjust EEE LED frequency */ @@ -5067,7 +5070,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) rtl_disable_clock_request(tp); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); @@ -5112,8 +5114,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) static void rtl_hw_start_8168g(struct rtl8169_private *tp) { - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); - rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); @@ -5211,8 +5211,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_hw_aspm_clkreq_enable(tp, false); rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1)); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); - rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); @@ -5295,8 +5293,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) { rtl8168ep_stop_cmac(tp); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); - rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC); @@ -5618,7 +5614,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) /* Force LAN exit from ASPM if Rx/Tx are not idle */ RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); @@ -6869,8 +6864,10 @@ static int rtl8169_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); + struct rtl8169_private *tp = netdev_priv(dev); rtl8169_net_suspend(dev); + clk_disable_unprepare(tp->clk); return 0; } @@ -6898,6 +6895,9 @@ static int rtl8169_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); + struct rtl8169_private *tp = netdev_priv(dev); + + clk_prepare_enable(tp->clk); if (netif_running(dev)) __rtl8169_resume(dev); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 330233286e78..3d0dd39c289e 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2208,29 +2208,6 @@ static void efx_fini_napi(struct efx_nic *efx) /************************************************************************** * - * Kernel netpoll interface - * - *************************************************************************/ - -#ifdef CONFIG_NET_POLL_CONTROLLER - -/* Although in the common case interrupts will be disabled, this is not - * guaranteed. However, all our work happens inside the NAPI callback, - * so no locking is required. - */ -static void efx_netpoll(struct net_device *net_dev) -{ - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_schedule_channel(channel); -} - -#endif - -/************************************************************************** - * * Kernel net device interface * *************************************************************************/ @@ -2509,9 +2486,6 @@ static const struct net_device_ops efx_netdev_ops = { #endif .ndo_get_phys_port_id = efx_get_phys_port_id, .ndo_get_phys_port_name = efx_get_phys_port_name, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = efx_netpoll, -#endif .ndo_setup_tc = efx_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = efx_filter_rfs, diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index dd5530a4f8c8..03e2455c502e 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2054,29 +2054,6 @@ static void ef4_fini_napi(struct ef4_nic *efx) /************************************************************************** * - * Kernel netpoll interface - * - *************************************************************************/ - -#ifdef CONFIG_NET_POLL_CONTROLLER - -/* Although in the common case interrupts will be disabled, this is not - * guaranteed. However, all our work happens inside the NAPI callback, - * so no locking is required. - */ -static void ef4_netpoll(struct net_device *net_dev) -{ - struct ef4_nic *efx = netdev_priv(net_dev); - struct ef4_channel *channel; - - ef4_for_each_channel(channel, efx) - ef4_schedule_channel(channel); -} - -#endif - -/************************************************************************** - * * Kernel net device interface * *************************************************************************/ @@ -2250,9 +2227,6 @@ static const struct net_device_ops ef4_netdev_ops = { .ndo_set_mac_address = ef4_set_mac_address, .ndo_set_rx_mode = ef4_set_rx_mode, .ndo_set_features = ef4_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ef4_netpoll, -#endif .ndo_setup_tc = ef4_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = ef4_filter_rfs, diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 7aa5ebb6766c..4289ccb26e4e 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -735,8 +735,11 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) u16 idx = dring->tail; struct netsec_de *de = dring->vaddr + (DESC_SZ * idx); - if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) + if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) { + /* reading the register clears the irq */ + netsec_read(priv, NETSEC_REG_NRM_RX_PKTCNT); break; + } /* This barrier is needed to keep us from reading * any other fields out of the netsec_de until we have diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 16ec7af6ab7b..ba9df430fca6 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -966,6 +966,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); + if (ym->cmd != SIOCYAMSMCS) + return -EINVAL; if (ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; @@ -981,6 +983,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg))) return -EFAULT; + if (yi.cmd != SIOCYAMSCFG) + return -EINVAL; if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev)) return -EINVAL; /* Cannot change this parameter when up */ if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev)) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 23a52b9293f3..cd1d8faccca5 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1308,8 +1308,7 @@ static int adf7242_remove(struct spi_device *spi) { struct adf7242_local *lp = spi_get_drvdata(spi); - if (!IS_ERR_OR_NULL(lp->debugfs_root)) - debugfs_remove_recursive(lp->debugfs_root); + debugfs_remove_recursive(lp->debugfs_root); cancel_delayed_work_sync(&lp->work); destroy_workqueue(lp->wqueue); diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 58299fb666ed..0ff5a403a8dc 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -634,10 +634,9 @@ static int ca8210_test_int_driver_write( for (i = 0; i < len; i++) dev_dbg(&priv->spi->dev, "%#03x\n", buf[i]); - fifo_buffer = kmalloc(len, GFP_KERNEL); + fifo_buffer = kmemdup(buf, len, GFP_KERNEL); if (!fifo_buffer) return -ENOMEM; - memcpy(fifo_buffer, buf, len); kfifo_in(&test->up_fifo, &fifo_buffer, 4); wake_up_interruptible(&priv->test.readq); @@ -3044,8 +3043,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv) { struct ca8210_test *test = &priv->test; - if (!IS_ERR(test->ca8210_dfs_spi_int)) - debugfs_remove(test->ca8210_dfs_spi_int); + debugfs_remove(test->ca8210_dfs_spi_int); kfifo_free(&test->up_fifo); dev_info(&priv->spi->dev, "Test interface removed\n"); } diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index e428277781ac..04891429a554 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -903,19 +903,19 @@ mcr20a_irq_clean_complete(void *context) switch (seq_state) { /* TX IRQ, RX IRQ and SEQ IRQ */ - case (0x03): + case (DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { lp->is_tx = 0; dev_dbg(printdev(lp), "TX is done. No ACK\n"); mcr20a_handle_tx_complete(lp); } break; - case (0x05): + case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ): /* rx is starting */ dev_dbg(printdev(lp), "RX is starting\n"); mcr20a_handle_rx(lp); break; - case (0x07): + case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { /* tx is done */ lp->is_tx = 0; @@ -927,7 +927,7 @@ mcr20a_irq_clean_complete(void *context) mcr20a_handle_rx(lp); } break; - case (0x01): + case (DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { dev_dbg(printdev(lp), "TX is starting\n"); mcr20a_handle_tx(lp); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index db1172db1e7c..19ab8a7d1e48 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -93,7 +93,12 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) return !phydev->suspended; - /* Don't suspend PHY if the attached netdev parent may wakeup. + if (netdev->wol_enabled) + return false; + + /* As long as not all affected network drivers support the + * wol_enabled flag, let's check for hints that WoL is enabled. + * Don't suspend PHY if the attached netdev parent may wake up. * The parent may point to a PCI device, as in tg3 driver. */ if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) @@ -1132,9 +1137,9 @@ void phy_detach(struct phy_device *phydev) sysfs_remove_link(&dev->dev.kobj, "phydev"); sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev"); } + phy_suspend(phydev); phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; - phy_suspend(phydev); phydev->phylink = NULL; phy_led_triggers_unregister(phydev); @@ -1168,12 +1173,13 @@ EXPORT_SYMBOL(phy_detach); int phy_suspend(struct phy_device *phydev) { struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); + struct net_device *netdev = phydev->attached_dev; struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; int ret = 0; /* If the device has WOL enabled, we cannot suspend the PHY */ phy_ethtool_get_wol(phydev, &wol); - if (wol.wolopts) + if (wol.wolopts || (netdev && netdev->wol_enabled)) return -EBUSY; if (phydev->drv && phydrv->suspend) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 3ba5cf2a8a5f..7abca86c3aa9 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -717,6 +717,30 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) return 0; } +static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, + phy_interface_t interface) +{ + int ret; + + if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || + (pl->link_an_mode == MLO_AN_INBAND && + phy_interface_mode_is_8023z(interface)))) + return -EINVAL; + + if (pl->phydev) + return -EBUSY; + + ret = phy_attach_direct(pl->netdev, phy, 0, interface); + if (ret) + return ret; + + ret = phylink_bringup_phy(pl, phy); + if (ret) + phy_detach(phy); + + return ret; +} + /** * phylink_connect_phy() - connect a PHY to the phylink instance * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -734,31 +758,13 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) */ int phylink_connect_phy(struct phylink *pl, struct phy_device *phy) { - int ret; - - if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || - (pl->link_an_mode == MLO_AN_INBAND && - phy_interface_mode_is_8023z(pl->link_interface)))) - return -EINVAL; - - if (pl->phydev) - return -EBUSY; - /* Use PHY device/driver interface */ if (pl->link_interface == PHY_INTERFACE_MODE_NA) { pl->link_interface = phy->interface; pl->link_config.interface = pl->link_interface; } - ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface); - if (ret) - return ret; - - ret = phylink_bringup_phy(pl, phy); - if (ret) - phy_detach(phy); - - return ret; + return __phylink_connect_phy(pl, phy, pl->link_interface); } EXPORT_SYMBOL_GPL(phylink_connect_phy); @@ -1672,7 +1678,9 @@ static void phylink_sfp_link_up(void *upstream) static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) { - return phylink_connect_phy(upstream, phy); + struct phylink *pl = upstream; + + return __phylink_connect_phy(upstream, phy, pl->link_config.interface); } static void phylink_sfp_disconnect_phy(void *upstream) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 52fffb98fde9..fd8bb998ae52 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -163,8 +163,6 @@ static const enum gpiod_flags gpio_flags[] = { /* Give this long for the PHY to reset. */ #define T_PHY_RESET_MS 50 -static DEFINE_MUTEX(sfp_mutex); - struct sff_data { unsigned int gpios; bool (*module_supported)(const struct sfp_eeprom_id *id); @@ -1098,8 +1096,11 @@ static int sfp_hwmon_insert(struct sfp *sfp) static void sfp_hwmon_remove(struct sfp *sfp) { - hwmon_device_unregister(sfp->hwmon_dev); - kfree(sfp->hwmon_name); + if (!IS_ERR_OR_NULL(sfp->hwmon_dev)) { + hwmon_device_unregister(sfp->hwmon_dev); + sfp->hwmon_dev = NULL; + kfree(sfp->hwmon_name); + } } #else static int sfp_hwmon_insert(struct sfp *sfp) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6a047d30e8c6..d887016e54b6 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1167,6 +1167,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return -EBUSY; } + if (dev == port_dev) { + NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself"); + netdev_err(dev, "Cannot enslave team device to itself\n"); + return -EINVAL; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e2648b5a3861..50e9cc19023a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -181,6 +181,7 @@ struct tun_file { }; struct napi_struct napi; bool napi_enabled; + bool napi_frags_enabled; struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; @@ -313,32 +314,32 @@ static int tun_napi_poll(struct napi_struct *napi, int budget) } static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, - bool napi_en) + bool napi_en, bool napi_frags) { tfile->napi_enabled = napi_en; + tfile->napi_frags_enabled = napi_en && napi_frags; if (napi_en) { netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, NAPI_POLL_WEIGHT); napi_enable(&tfile->napi); - mutex_init(&tfile->napi_mutex); } } -static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) +static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) napi_disable(&tfile->napi); } -static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) +static void tun_napi_del(struct tun_file *tfile) { if (tfile->napi_enabled) netif_napi_del(&tfile->napi); } -static bool tun_napi_frags_enabled(const struct tun_struct *tun) +static bool tun_napi_frags_enabled(const struct tun_file *tfile) { - return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS; + return tfile->napi_frags_enabled; } #ifdef CONFIG_TUN_VNET_CROSS_LE @@ -690,8 +691,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tun, tfile); - tun_napi_del(tun, tfile); + tun_napi_disable(tfile); + tun_napi_del(tfile); } if (tun && !tfile->detached) { @@ -758,7 +759,7 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); - tun_napi_disable(tun, tfile); + tun_napi_disable(tfile); tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); @@ -774,7 +775,7 @@ static void tun_detach_all(struct net_device *dev) synchronize_net(); for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - tun_napi_del(tun, tfile); + tun_napi_del(tfile); /* Drop read queue */ tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); @@ -793,7 +794,7 @@ static void tun_detach_all(struct net_device *dev) } static int tun_attach(struct tun_struct *tun, struct file *file, - bool skip_filter, bool napi) + bool skip_filter, bool napi, bool napi_frags) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; @@ -866,7 +867,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, tun_enable_queue(tfile); } else { sock_hold(&tfile->sk); - tun_napi_init(tun, tfile, napi); + tun_napi_init(tun, tfile, napi, napi_frags); } tun_set_real_num_queues(tun); @@ -1709,7 +1710,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int err; u32 rxhash = 0; int skb_xdp = 1; - bool frags = tun_napi_frags_enabled(tun); + bool frags = tun_napi_frags_enabled(tfile); if (!(tun->dev->flags & IFF_UP)) return -EIO; @@ -2534,7 +2535,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return err; err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, - ifr->ifr_flags & IFF_NAPI); + ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS); if (err < 0) return err; @@ -2632,7 +2634,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) (ifr->ifr_flags & TUN_FEATURES); INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); + err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS); if (err < 0) goto err_free_flow; @@ -2781,7 +2784,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ret = security_tun_dev_attach_queue(tun->security); if (ret < 0) goto unlock; - ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI); + ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, + tun->flags & IFF_NAPI_FRAGS); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) @@ -3199,6 +3203,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) return -ENOMEM; } + mutex_init(&tfile->napi_mutex); RCU_INIT_POINTER(tfile->tun, NULL); tfile->flags = 0; tfile->ifindex = 0; diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index e95dd12edec4..023b8d0bf175 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -607,6 +607,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 9e8ad372f419..2207f7a7d1ff 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_MODE_RWLC; if (wolinfo->wolopts & WAKE_MAGIC) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a9991c5f4736..c3c9ba44e2a1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1401,19 +1401,10 @@ static int lan78xx_set_wol(struct net_device *netdev, if (ret < 0) return ret; - pdata->wol = 0; - if (wol->wolopts & WAKE_UCAST) - pdata->wol |= WAKE_UCAST; - if (wol->wolopts & WAKE_MCAST) - pdata->wol |= WAKE_MCAST; - if (wol->wolopts & WAKE_BCAST) - pdata->wol |= WAKE_BCAST; - if (wol->wolopts & WAKE_MAGIC) - pdata->wol |= WAKE_MAGIC; - if (wol->wolopts & WAKE_PHY) - pdata->wol |= WAKE_PHY; - if (wol->wolopts & WAKE_ARP) - pdata->wol |= WAKE_ARP; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + + pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 533b6fb8d923..72a55b6b4211 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1241,6 +1241,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2cd71bdb6484..f1b5201cc320 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4506,6 +4506,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (!rtl_can_wakeup(tp)) return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_ANY) + return -EINVAL; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 05553d252446..ec287c9741e8 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -731,6 +731,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net, struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); @@ -1517,6 +1520,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); if (pdata) { + cancel_work_sync(&pdata->set_multicast); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); pdata = NULL; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 06b4d290784d..262e7a3c23cb 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -774,6 +774,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 9277a0f228df..35f39f23d881 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 765920905226..dab504ec5e50 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1699,17 +1699,6 @@ static void virtnet_stats(struct net_device *dev, tot->rx_frame_errors = dev->stats.rx_frame_errors; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void virtnet_netpoll(struct net_device *dev) -{ - struct virtnet_info *vi = netdev_priv(dev); - int i; - - for (i = 0; i < vi->curr_queue_pairs; i++) - napi_schedule(&vi->rq[i].napi); -} -#endif - static void virtnet_ack_link_announce(struct virtnet_info *vi) { rtnl_lock(); @@ -2447,9 +2436,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = virtnet_netpoll, -#endif .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_features_check = passthru_features_check, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ababba37d735..2b8da2b7e721 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3539,6 +3539,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ @@ -3603,6 +3604,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) } if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || + nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT, + !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || nla_put_u8(skb, IFLA_VXLAN_LEARNING, diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c index 094cea775d0c..ef298d8525c5 100644 --- a/drivers/net/wimax/i2400m/control.c +++ b/drivers/net/wimax/i2400m/control.c @@ -257,7 +257,7 @@ static const struct [I2400M_MS_ACCESSIBILITY_ERROR] = { "accesibility error", -EIO }, [I2400M_MS_BUSY] = { "busy", -EBUSY }, [I2400M_MS_CORRUPTED_TLV] = { "corrupted TLV", -EILSEQ }, - [I2400M_MS_UNINITIALIZED] = { "not unitialized", -EILSEQ }, + [I2400M_MS_UNINITIALIZED] = { "uninitialized", -EILSEQ }, [I2400M_MS_UNKNOWN_ERROR] = { "unknown error", -EIO }, [I2400M_MS_PRODUCTION_ERROR] = { "production error", -EIO }, [I2400M_MS_NO_RF] = { "no RF", -EIO }, diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 6b0e1ec346cb..d46d57b989ae 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -1518,13 +1518,15 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, } } else { /* More than a single header/data pair were missed. - * Report this error, and reset the controller to + * Report this error. If running with open-source + * firmware, then reset the controller to * revive operation. */ b43dbg(dev->wl, "Out of order TX status report on DMA ring %d. Expected %d, but got %d\n", ring->index, firstused, slot); - b43_controller_restart(dev, "Out of order TX"); + if (dev->fw.opensource) + b43_controller_restart(dev, "Out of order TX"); return; } } diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/1000.c b/drivers/net/wireless/intel/iwlwifi/cfg/1000.c index 591687984962..497fd766d87c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/1000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/1000.c @@ -51,6 +51,7 @@ static const struct iwl_base_params iwl1000_base_params = { .num_of_queues = IWLAGN_NUM_QUEUES, + .max_tfd_queue_size = 256, .eeprom_size = OTP_LOW_IMAGE_SIZE, .pll_cfg = true, .max_ll_items = OTP_MAX_LL_ITEMS_1000, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 1068757ec42e..07442ada6dd0 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -520,7 +520,6 @@ struct mac80211_hwsim_data { int channels, idx; bool use_chanctx; bool destroy_on_close; - struct work_struct destroy_work; u32 portid; char alpha2[2]; const struct ieee80211_regdomain *regd; @@ -2935,8 +2934,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, hwsim_radios_generation++; spin_unlock_bh(&hwsim_radio_lock); - if (idx > 0) - hwsim_mcast_new_radio(idx, info, param); + hwsim_mcast_new_radio(idx, info, param); return idx; @@ -3565,30 +3563,27 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; -static void destroy_radio(struct work_struct *work) -{ - struct mac80211_hwsim_data *data = - container_of(work, struct mac80211_hwsim_data, destroy_work); - - hwsim_radios_generation++; - mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL); -} - static void remove_user_radios(u32 portid) { struct mac80211_hwsim_data *entry, *tmp; + LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { if (entry->destroy_on_close && entry->portid == portid) { - list_del(&entry->list); + list_move(&entry->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, hwsim_rht_params); - INIT_WORK(&entry->destroy_work, destroy_radio); - queue_work(hwsim_wq, &entry->destroy_work); + hwsim_radios_generation++; } } spin_unlock_bh(&hwsim_radio_lock); + + list_for_each_entry_safe(entry, tmp, &list, list) { + list_del(&entry->list); + mac80211_hwsim_del_radio(entry, wiphy_name(entry->hw->wiphy), + NULL); + } } static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, @@ -3646,6 +3641,7 @@ static __net_init int hwsim_init_net(struct net *net) static void __net_exit hwsim_exit_net(struct net *net) { struct mac80211_hwsim_data *data, *tmp; + LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(data, tmp, &hwsim_radios, list) { @@ -3656,17 +3652,19 @@ static void __net_exit hwsim_exit_net(struct net *net) if (data->netgroup == hwsim_net_get_netgroup(&init_net)) continue; - list_del(&data->list); + list_move(&data->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &data->rht, hwsim_rht_params); hwsim_radios_generation++; - spin_unlock_bh(&hwsim_radio_lock); + } + spin_unlock_bh(&hwsim_radio_lock); + + list_for_each_entry_safe(data, tmp, &list, list) { + list_del(&data->list); mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL); - spin_lock_bh(&hwsim_radio_lock); } - spin_unlock_bh(&hwsim_radio_lock); ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net)); } diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 43743c26c071..39bf85d0ade0 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1317,6 +1317,10 @@ static int if_sdio_suspend(struct device *dev) if (priv->wol_criteria == EHS_REMOVE_WAKEUP) { dev_info(dev, "Suspend without wake params -- powering down card\n"); if (priv->fw_ready) { + ret = lbs_suspend(priv); + if (ret) + return ret; + priv->power_up_on_resume = true; if_sdio_power_off(card); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/main.c b/drivers/net/wireless/mediatek/mt76/mt76x0/main.c index cf6ffb1ba4a2..22bc9d368728 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/main.c @@ -77,9 +77,8 @@ static void mt76x0_remove_interface(struct ieee80211_hw *hw, { struct mt76x0_dev *dev = hw->priv; struct mt76_vif *mvif = (struct mt76_vif *) vif->drv_priv; - unsigned int wcid = mvif->group_wcid.idx; - dev->wcid_mask[wcid / BITS_PER_LONG] &= ~BIT(wcid % BITS_PER_LONG); + dev->vif_mask &= ~BIT(mvif->idx); } static int mt76x0_config(struct ieee80211_hw *hw, u32 changed) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 7780b07543bb..79e59f2379a2 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -258,7 +258,7 @@ int mt76u_buf_alloc(struct mt76_dev *dev, struct mt76u_buf *buf, if (!buf->urb) return -ENOMEM; - buf->urb->sg = devm_kzalloc(dev->dev, nsgs * sizeof(*buf->urb->sg), + buf->urb->sg = devm_kcalloc(dev->dev, nsgs, sizeof(*buf->urb->sg), gfp); if (!buf->urb->sg) return -ENOMEM; @@ -464,8 +464,8 @@ static int mt76u_alloc_rx(struct mt76_dev *dev) int i, err, nsgs; spin_lock_init(&q->lock); - q->entry = devm_kzalloc(dev->dev, - MT_NUM_RX_ENTRIES * sizeof(*q->entry), + q->entry = devm_kcalloc(dev->dev, + MT_NUM_RX_ENTRIES, sizeof(*q->entry), GFP_KERNEL); if (!q->entry) return -ENOMEM; @@ -717,8 +717,8 @@ static int mt76u_alloc_tx(struct mt76_dev *dev) INIT_LIST_HEAD(&q->swq); q->hw_idx = q2hwq(i); - q->entry = devm_kzalloc(dev->dev, - MT_NUM_TX_ENTRIES * sizeof(*q->entry), + q->entry = devm_kcalloc(dev->dev, + MT_NUM_TX_ENTRIES, sizeof(*q->entry), GFP_KERNEL); if (!q->entry) return -ENOMEM; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index a46a1e94505d..936c0b3e0ba2 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -241,8 +241,9 @@ struct xenvif_hash_cache { struct xenvif_hash { unsigned int alg; u32 flags; + bool mapping_sel; u8 key[XEN_NETBK_MAX_HASH_KEY_SIZE]; - u32 mapping[XEN_NETBK_MAX_HASH_MAPPING_SIZE]; + u32 mapping[2][XEN_NETBK_MAX_HASH_MAPPING_SIZE]; unsigned int size; struct xenvif_hash_cache cache; }; diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 3c4c58b9fe76..0ccb021f1e78 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -324,7 +324,8 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; vif->hash.size = size; - memset(vif->hash.mapping, 0, sizeof(u32) * size); + memset(vif->hash.mapping[vif->hash.mapping_sel], 0, + sizeof(u32) * size); return XEN_NETIF_CTRL_STATUS_SUCCESS; } @@ -332,31 +333,49 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, u32 off) { - u32 *mapping = &vif->hash.mapping[off]; - struct gnttab_copy copy_op = { + u32 *mapping = vif->hash.mapping[!vif->hash.mapping_sel]; + unsigned int nr = 1; + struct gnttab_copy copy_op[2] = {{ .source.u.ref = gref, .source.domid = vif->domid, - .dest.u.gmfn = virt_to_gfn(mapping), .dest.domid = DOMID_SELF, - .dest.offset = xen_offset_in_page(mapping), - .len = len * sizeof(u32), + .len = len * sizeof(*mapping), .flags = GNTCOPY_source_gref - }; + }}; - if ((off + len > vif->hash.size) || copy_op.len > XEN_PAGE_SIZE) + if ((off + len < off) || (off + len > vif->hash.size) || + len > XEN_PAGE_SIZE / sizeof(*mapping)) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; - while (len-- != 0) - if (mapping[off++] >= vif->num_queues) - return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + copy_op[0].dest.u.gmfn = virt_to_gfn(mapping + off); + copy_op[0].dest.offset = xen_offset_in_page(mapping + off); + if (copy_op[0].dest.offset + copy_op[0].len > XEN_PAGE_SIZE) { + copy_op[1] = copy_op[0]; + copy_op[1].source.offset = XEN_PAGE_SIZE - copy_op[0].dest.offset; + copy_op[1].dest.u.gmfn = virt_to_gfn(mapping + off + len); + copy_op[1].dest.offset = 0; + copy_op[1].len = copy_op[0].len - copy_op[1].source.offset; + copy_op[0].len = copy_op[1].source.offset; + nr = 2; + } - if (copy_op.len != 0) { - gnttab_batch_copy(©_op, 1); + memcpy(mapping, vif->hash.mapping[vif->hash.mapping_sel], + vif->hash.size * sizeof(*mapping)); - if (copy_op.status != GNTST_okay) + if (copy_op[0].len != 0) { + gnttab_batch_copy(copy_op, nr); + + if (copy_op[0].status != GNTST_okay || + copy_op[nr - 1].status != GNTST_okay) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; } + while (len-- != 0) + if (mapping[off++] >= vif->num_queues) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + vif->hash.mapping_sel = !vif->hash.mapping_sel; + return XEN_NETIF_CTRL_STATUS_SUCCESS; } @@ -408,6 +427,8 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) } if (vif->hash.size != 0) { + const u32 *mapping = vif->hash.mapping[vif->hash.mapping_sel]; + seq_puts(m, "\nHash Mapping:\n"); for (i = 0; i < vif->hash.size; ) { @@ -420,7 +441,7 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); for (j = 0; j < n; j++, i++) - seq_printf(m, "%4u ", vif->hash.mapping[i]); + seq_printf(m, "%4u ", mapping[i]); seq_puts(m, "\n"); } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 92274c237200..f6ae23fc3f6b 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -162,7 +162,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, if (size == 0) return skb_get_hash_raw(skb) % dev->real_num_tx_queues; - return vif->hash.mapping[skb_get_hash_raw(skb) % size]; + return vif->hash.mapping[vif->hash.mapping_sel] + [skb_get_hash_raw(skb) % size]; } static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 722537e14848..41b49716ac75 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -771,6 +771,9 @@ static void __init of_unittest_parse_interrupts(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts0"); if (!np) { pr_err("missing testcase data\n"); @@ -845,6 +848,9 @@ static void __init of_unittest_parse_interrupts_extended(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts-extended0"); if (!np) { pr_err("missing testcase data\n"); @@ -1001,15 +1007,19 @@ static void __init of_unittest_platform_populate(void) pdev = of_find_device_by_node(np); unittest(pdev, "device 1 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq == -EPROBE_DEFER, "device deferred probe failed - %d\n", irq); + if (!(of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)) { + irq = platform_get_irq(pdev, 0); + unittest(irq == -EPROBE_DEFER, + "device deferred probe failed - %d\n", irq); - /* Test that a parsing failure does not return -EPROBE_DEFER */ - np = of_find_node_by_path("/testcase-data/testcase-device2"); - pdev = of_find_device_by_node(np); - unittest(pdev, "device 2 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); + /* Test that a parsing failure does not return -EPROBE_DEFER */ + np = of_find_node_by_path("/testcase-data/testcase-device2"); + pdev = of_find_device_by_node(np); + unittest(pdev, "device 2 creation failed\n"); + irq = platform_get_irq(pdev, 0); + unittest(irq < 0 && irq != -EPROBE_DEFER, + "device parsing error failed - %d\n", irq); + } np = of_find_node_by_path("/testcase-data/platform-tests"); unittest(np, "No testcase data in device tree\n"); diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 50eb0729385b..a41d79b8d46a 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1145,7 +1145,6 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie) { struct device *dev = &pcie->pdev->dev; struct device_node *np = dev->of_node; - unsigned int i; int ret; INIT_LIST_HEAD(&pcie->resources); @@ -1179,13 +1178,58 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie) resource_size(&pcie->io) - 1); pcie->realio.name = "PCI I/O"; + pci_add_resource(&pcie->resources, &pcie->realio); + } + + return devm_request_pci_bus_resources(dev, &pcie->resources); +} + +/* + * This is a copy of pci_host_probe(), except that it does the I/O + * remap as the last step, once we are sure we won't fail. + * + * It should be removed once the I/O remap error handling issue has + * been sorted out. + */ +static int mvebu_pci_host_probe(struct pci_host_bridge *bridge) +{ + struct mvebu_pcie *pcie; + struct pci_bus *bus, *child; + int ret; + + ret = pci_scan_root_bus_bridge(bridge); + if (ret < 0) { + dev_err(bridge->dev.parent, "Scanning root bridge failed"); + return ret; + } + + pcie = pci_host_bridge_priv(bridge); + if (resource_size(&pcie->io) != 0) { + unsigned int i; + for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K) pci_ioremap_io(i, pcie->io.start + i); + } - pci_add_resource(&pcie->resources, &pcie->realio); + bus = bridge->bus; + + /* + * We insert PCI resources into the iomem_resource and + * ioport_resource trees in either pci_bus_claim_resources() + * or pci_bus_assign_resources(). + */ + if (pci_has_flag(PCI_PROBE_ONLY)) { + pci_bus_claim_resources(bus); + } else { + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } - return devm_request_pci_bus_resources(dev, &pcie->resources); + pci_bus_add_devices(bus); + return 0; } static int mvebu_pcie_probe(struct platform_device *pdev) @@ -1268,7 +1312,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev) bridge->align_resource = mvebu_pcie_align_resource; bridge->msi = pcie->msi; - return pci_host_probe(bridge); + return mvebu_pci_host_probe(bridge); } static const struct of_device_id mvebu_pcie_of_match_table[] = { diff --git a/drivers/pci/controller/pcie-cadence.c b/drivers/pci/controller/pcie-cadence.c index 86f1b002c846..975bcdd6b5c0 100644 --- a/drivers/pci/controller/pcie-cadence.c +++ b/drivers/pci/controller/pcie-cadence.c @@ -180,11 +180,11 @@ int cdns_pcie_init_phy(struct device *dev, struct cdns_pcie *pcie) return 0; } - phy = devm_kzalloc(dev, sizeof(*phy) * phy_count, GFP_KERNEL); + phy = devm_kcalloc(dev, phy_count, sizeof(*phy), GFP_KERNEL); if (!phy) return -ENOMEM; - link = devm_kzalloc(dev, sizeof(*link) * phy_count, GFP_KERNEL); + link = devm_kcalloc(dev, phy_count, sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1835f3a7aa8d..51b6c81671c1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1289,12 +1289,12 @@ int pci_save_state(struct pci_dev *dev) EXPORT_SYMBOL(pci_save_state); static void pci_restore_config_dword(struct pci_dev *pdev, int offset, - u32 saved_val, int retry) + u32 saved_val, int retry, bool force) { u32 val; pci_read_config_dword(pdev, offset, &val); - if (val == saved_val) + if (!force && val == saved_val) return; for (;;) { @@ -1313,25 +1313,36 @@ static void pci_restore_config_dword(struct pci_dev *pdev, int offset, } static void pci_restore_config_space_range(struct pci_dev *pdev, - int start, int end, int retry) + int start, int end, int retry, + bool force) { int index; for (index = end; index >= start; index--) pci_restore_config_dword(pdev, 4 * index, pdev->saved_config_space[index], - retry); + retry, force); } static void pci_restore_config_space(struct pci_dev *pdev) { if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) { - pci_restore_config_space_range(pdev, 10, 15, 0); + pci_restore_config_space_range(pdev, 10, 15, 0, false); /* Restore BARs before the command register. */ - pci_restore_config_space_range(pdev, 4, 9, 10); - pci_restore_config_space_range(pdev, 0, 3, 0); + pci_restore_config_space_range(pdev, 4, 9, 10, false); + pci_restore_config_space_range(pdev, 0, 3, 0, false); + } else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + pci_restore_config_space_range(pdev, 12, 15, 0, false); + + /* + * Force rewriting of prefetch registers to avoid S3 resume + * issues on Intel PCI bridges that occur when these + * registers are not explicitly written. + */ + pci_restore_config_space_range(pdev, 9, 11, 0, true); + pci_restore_config_space_range(pdev, 0, 8, 0, false); } else { - pci_restore_config_space_range(pdev, 0, 15, 0); + pci_restore_config_space_range(pdev, 0, 15, 0, false); } } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 7f01f6f60b87..d0b7dd8fb184 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); + int ret; + + ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); + if (ret && armpmu->filter_match) + return armpmu->filter_match(event); + + return ret; } static ssize_t armpmu_cpumask_show(struct device *dev, diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 4a8a8efadefa..cf73a403d22d 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -636,6 +636,14 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp) return err; } + return 0; +} + +static int mcp23s08_irqchip_setup(struct mcp23s08 *mcp) +{ + struct gpio_chip *chip = &mcp->chip; + int err; + err = gpiochip_irqchip_add_nested(chip, &mcp23s08_irq_chip, 0, @@ -912,7 +920,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, } if (mcp->irq && mcp->irq_controller) { - ret = mcp23s08_irq_setup(mcp); + ret = mcp23s08_irqchip_setup(mcp); if (ret) goto fail; } @@ -944,6 +952,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, goto fail; } + if (mcp->irq) + ret = mcp23s08_irq_setup(mcp); + fail: if (ret < 0) dev_dbg(dev, "can't setup chip %d, --> %d\n", addr, ret); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 398393ab5df8..b6fd4838f60f 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -520,7 +520,7 @@ static int get_next_event_xfer(struct cros_ec_device *ec_dev, ret = cros_ec_cmd_xfer(ec_dev, msg); if (ret > 0) { ec_dev->event_size = ret - 1; - memcpy(&ec_dev->event_data, msg->data, ec_dev->event_size); + memcpy(&ec_dev->event_data, msg->data, ret); } return ret; diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index eceba3858cef..2f61f5579aa5 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -210,11 +210,11 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220) * Output one or more lines of text on the SCLP console (VT220 and / * or line-mode). */ -void __sclp_early_printk(const char *str, unsigned int len) +void __sclp_early_printk(const char *str, unsigned int len, unsigned int force) { int have_linemode, have_vt220; - if (sclp_init_state != sclp_init_state_uninitialized) + if (!force && sclp_init_state != sclp_init_state_uninitialized) return; if (sclp_early_setup(0, &have_linemode, &have_vt220) != 0) return; @@ -227,5 +227,10 @@ void __sclp_early_printk(const char *str, unsigned int len) void sclp_early_printk(const char *str) { - __sclp_early_printk(str, strlen(str)); + __sclp_early_printk(str, strlen(str), 0); +} + +void sclp_early_printk_force(const char *str) +{ + __sclp_early_printk(str, strlen(str), 1); } diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index dbe7c7ac9ac8..fd77e46eb3b2 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -163,7 +163,7 @@ static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat, for (i = 0; i < pat->pat_nr; i++, pa++) for (j = 0; j < pa->pa_nr; j++) - if (pa->pa_iova_pfn[i] == iova_pfn) + if (pa->pa_iova_pfn[j] == iova_pfn) return true; return false; diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 770fa9cfc310..f47d16b5810b 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -22,6 +22,7 @@ #include "vfio_ccw_private.h" struct workqueue_struct *vfio_ccw_work_q; +struct kmem_cache *vfio_ccw_io_region; /* * Helpers @@ -79,7 +80,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) cp_update_scsw(&private->cp, &irb->scsw); cp_free(&private->cp); } - memcpy(private->io_region.irb_area, irb, sizeof(*irb)); + memcpy(private->io_region->irb_area, irb, sizeof(*irb)); if (private->io_trigger) eventfd_signal(private->io_trigger, 1); @@ -114,6 +115,14 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); if (!private) return -ENOMEM; + + private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, + GFP_KERNEL | GFP_DMA); + if (!private->io_region) { + kfree(private); + return -ENOMEM; + } + private->sch = sch; dev_set_drvdata(&sch->dev, private); @@ -139,6 +148,7 @@ out_disable: cio_disable_subchannel(sch); out_free: dev_set_drvdata(&sch->dev, NULL); + kmem_cache_free(vfio_ccw_io_region, private->io_region); kfree(private); return ret; } @@ -153,6 +163,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch) dev_set_drvdata(&sch->dev, NULL); + kmem_cache_free(vfio_ccw_io_region, private->io_region); kfree(private); return 0; @@ -232,10 +243,20 @@ static int __init vfio_ccw_sch_init(void) if (!vfio_ccw_work_q) return -ENOMEM; + vfio_ccw_io_region = kmem_cache_create_usercopy("vfio_ccw_io_region", + sizeof(struct ccw_io_region), 0, + SLAB_ACCOUNT, 0, + sizeof(struct ccw_io_region), NULL); + if (!vfio_ccw_io_region) { + destroy_workqueue(vfio_ccw_work_q); + return -ENOMEM; + } + isc_register(VFIO_CCW_ISC); ret = css_driver_register(&vfio_ccw_sch_driver); if (ret) { isc_unregister(VFIO_CCW_ISC); + kmem_cache_destroy(vfio_ccw_io_region); destroy_workqueue(vfio_ccw_work_q); } @@ -246,6 +267,7 @@ static void __exit vfio_ccw_sch_exit(void) { css_driver_unregister(&vfio_ccw_sch_driver); isc_unregister(VFIO_CCW_ISC); + kmem_cache_destroy(vfio_ccw_io_region); destroy_workqueue(vfio_ccw_work_q); } module_init(vfio_ccw_sch_init); diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 797a82731159..f94aa01f9c36 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -93,13 +93,13 @@ static void fsm_io_error(struct vfio_ccw_private *private, enum vfio_ccw_event event) { pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state); - private->io_region.ret_code = -EIO; + private->io_region->ret_code = -EIO; } static void fsm_io_busy(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - private->io_region.ret_code = -EBUSY; + private->io_region->ret_code = -EBUSY; } static void fsm_disabled_irq(struct vfio_ccw_private *private, @@ -126,7 +126,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, { union orb *orb; union scsw *scsw = &private->scsw; - struct ccw_io_region *io_region = &private->io_region; + struct ccw_io_region *io_region = private->io_region; struct mdev_device *mdev = private->mdev; char *errstr = "request"; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 41eeb57d68a3..f673e106c041 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -174,7 +174,7 @@ static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev, return -EINVAL; private = dev_get_drvdata(mdev_parent_dev(mdev)); - region = &private->io_region; + region = private->io_region; if (copy_to_user(buf, (void *)region + *ppos, count)) return -EFAULT; @@ -196,7 +196,7 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, if (private->state != VFIO_CCW_STATE_IDLE) return -EACCES; - region = &private->io_region; + region = private->io_region; if (copy_from_user((void *)region + *ppos, buf, count)) return -EFAULT; diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 78a66d96756b..078e46f9623d 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -41,7 +41,7 @@ struct vfio_ccw_private { atomic_t avail; struct mdev_device *mdev; struct notifier_block nb; - struct ccw_io_region io_region; + struct ccw_io_region *io_region; struct channel_program cp; struct irb irb; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index de8282420f96..ffce6f39828a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -610,7 +610,7 @@ static void qeth_put_reply(struct qeth_reply *reply) static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc, struct qeth_card *card) { - char *ipa_name; + const char *ipa_name; int com = cmd->hdr.command; ipa_name = qeth_get_ipa_cmd_name(com); if (rc) diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 5bcb8dafc3ee..e891c0b52f4c 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -148,10 +148,10 @@ EXPORT_SYMBOL_GPL(IPA_PDU_HEADER); struct ipa_rc_msg { enum qeth_ipa_return_codes rc; - char *msg; + const char *msg; }; -static struct ipa_rc_msg qeth_ipa_rc_msg[] = { +static const struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_SUCCESS, "success"}, {IPA_RC_NOTSUPP, "Command not supported"}, {IPA_RC_IP_TABLE_FULL, "Add Addr IP Table Full - ipv6"}, @@ -219,23 +219,23 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { -char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) +const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) { - int x = 0; - qeth_ipa_rc_msg[sizeof(qeth_ipa_rc_msg) / - sizeof(struct ipa_rc_msg) - 1].rc = rc; - while (qeth_ipa_rc_msg[x].rc != rc) - x++; + int x; + + for (x = 0; x < ARRAY_SIZE(qeth_ipa_rc_msg) - 1; x++) + if (qeth_ipa_rc_msg[x].rc == rc) + return qeth_ipa_rc_msg[x].msg; return qeth_ipa_rc_msg[x].msg; } struct ipa_cmd_names { enum qeth_ipa_cmds cmd; - char *name; + const char *name; }; -static struct ipa_cmd_names qeth_ipa_cmd_names[] = { +static const struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_STARTLAN, "startlan"}, {IPA_CMD_STOPLAN, "stoplan"}, {IPA_CMD_SETVMAC, "setvmac"}, @@ -267,13 +267,12 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_UNKNOWN, "unknown"}, }; -char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd) +const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd) { - int x = 0; - qeth_ipa_cmd_names[ - sizeof(qeth_ipa_cmd_names) / - sizeof(struct ipa_cmd_names)-1].cmd = cmd; - while (qeth_ipa_cmd_names[x].cmd != cmd) - x++; + int x; + + for (x = 0; x < ARRAY_SIZE(qeth_ipa_cmd_names) - 1; x++) + if (qeth_ipa_cmd_names[x].cmd == cmd) + return qeth_ipa_cmd_names[x].name; return qeth_ipa_cmd_names[x].name; } diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index aa8b9196b089..aa5de1fe01e1 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -797,8 +797,8 @@ enum qeth_ipa_arp_return_codes { QETH_IPA_ARP_RC_Q_NO_DATA = 0x0008, }; -extern char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); -extern char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); +extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); +extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); #define QETH_SETASS_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \ sizeof(struct qeth_ipacmd_setassparms_hdr)) diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c index 7b31f19ade83..050879a2ddef 100644 --- a/drivers/sbus/char/openprom.c +++ b/drivers/sbus/char/openprom.c @@ -715,22 +715,13 @@ static struct miscdevice openprom_dev = { static int __init openprom_init(void) { - struct device_node *dp; int err; err = misc_register(&openprom_dev); if (err) return err; - dp = of_find_node_by_path("/"); - dp = dp->child; - while (dp) { - if (!strcmp(dp->name, "options")) - break; - dp = dp->sibling; - } - options_node = dp; - + options_node = of_get_child_by_name(of_find_node_by_path("/"), "options"); if (!options_node) { misc_deregister(&openprom_dev); return -EIO; diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c index 524f9ea62e52..6516bc3cb58b 100644 --- a/drivers/sbus/char/oradax.c +++ b/drivers/sbus/char/oradax.c @@ -689,8 +689,7 @@ static int dax_open(struct inode *inode, struct file *f) alloc_error: kfree(ctx->ccb_buf); done: - if (ctx != NULL) - kfree(ctx); + kfree(ctx); return -ENOMEM; } diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index cc8e64dc65ad..e5bd035ebad0 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2472,6 +2472,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) /* start qedi context */ spin_lock_init(&qedi->hba_lock); spin_lock_init(&qedi->task_idx_lock); + mutex_init(&qedi->stats_lock); } qedi_ops->ll2->register_cb_ops(qedi->cdev, &qedi_ll2_cb_ops, qedi); qedi_ops->ll2->start(qedi->cdev, ¶ms); diff --git a/drivers/soc/fsl/qbman/bman_ccsr.c b/drivers/soc/fsl/qbman/bman_ccsr.c index 05c42235dd41..7c3cc968053c 100644 --- a/drivers/soc/fsl/qbman/bman_ccsr.c +++ b/drivers/soc/fsl/qbman/bman_ccsr.c @@ -120,6 +120,7 @@ static void bm_set_memory(u64 ba, u32 size) */ static dma_addr_t fbpr_a; static size_t fbpr_sz; +static int __bman_probed; static int bman_fbpr(struct reserved_mem *rmem) { @@ -166,6 +167,12 @@ static irqreturn_t bman_isr(int irq, void *ptr) return IRQ_HANDLED; } +int bman_is_probed(void) +{ + return __bman_probed; +} +EXPORT_SYMBOL_GPL(bman_is_probed); + static int fsl_bman_probe(struct platform_device *pdev) { int ret, err_irq; @@ -175,6 +182,8 @@ static int fsl_bman_probe(struct platform_device *pdev) u16 id, bm_pool_cnt; u8 major, minor; + __bman_probed = -1; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(dev, "Can't get %pOF property 'IORESOURCE_MEM'\n", @@ -255,6 +264,8 @@ static int fsl_bman_probe(struct platform_device *pdev) return ret; } + __bman_probed = 1; + return 0; }; diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index ecb22749df0b..8cc015183043 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2729,6 +2729,9 @@ static int qman_alloc_range(struct gen_pool *p, u32 *result, u32 cnt) { unsigned long addr; + if (!p) + return -ENODEV; + addr = gen_pool_alloc(p, cnt); if (!addr) return -ENOMEM; diff --git a/drivers/soc/fsl/qbman/qman_ccsr.c b/drivers/soc/fsl/qbman/qman_ccsr.c index 79cba58387a5..6fd5fef5f39b 100644 --- a/drivers/soc/fsl/qbman/qman_ccsr.c +++ b/drivers/soc/fsl/qbman/qman_ccsr.c @@ -273,6 +273,7 @@ static const struct qman_error_info_mdata error_mdata[] = { static u32 __iomem *qm_ccsr_start; /* A SDQCR mask comprising all the available/visible pool channels */ static u32 qm_pools_sdqcr; +static int __qman_probed; static inline u32 qm_ccsr_in(u32 offset) { @@ -686,6 +687,12 @@ static int qman_resource_init(struct device *dev) return 0; } +int qman_is_probed(void) +{ + return __qman_probed; +} +EXPORT_SYMBOL_GPL(qman_is_probed); + static int fsl_qman_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -695,6 +702,8 @@ static int fsl_qman_probe(struct platform_device *pdev) u16 id; u8 major, minor; + __qman_probed = -1; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(dev, "Can't get %pOF property 'IORESOURCE_MEM'\n", @@ -828,6 +837,8 @@ static int fsl_qman_probe(struct platform_device *pdev) if (ret) return ret; + __qman_probed = 1; + return 0; } diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c index a120002b630e..3e9391d117c5 100644 --- a/drivers/soc/fsl/qbman/qman_portal.c +++ b/drivers/soc/fsl/qbman/qman_portal.c @@ -227,6 +227,14 @@ static int qman_portal_probe(struct platform_device *pdev) int irq, cpu, err; u32 val; + err = qman_is_probed(); + if (!err) + return -EPROBE_DEFER; + if (err < 0) { + dev_err(&pdev->dev, "failing probe due to qman probe error\n"); + return -ENODEV; + } + pcfg = devm_kmalloc(dev, sizeof(*pcfg), GFP_KERNEL); if (!pcfg) return -ENOMEM; diff --git a/drivers/soc/fsl/qe/ucc.c b/drivers/soc/fsl/qe/ucc.c index c646d8713861..681f7d4b7724 100644 --- a/drivers/soc/fsl/qe/ucc.c +++ b/drivers/soc/fsl/qe/ucc.c @@ -626,7 +626,7 @@ static u32 ucc_get_tdm_sync_shift(enum comm_dir mode, u32 tdm_num) { u32 shift; - shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : RX_SYNC_SHIFT_BASE; + shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : TX_SYNC_SHIFT_BASE; shift -= tdm_num * 2; return shift; diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index e1e264a9a4c7..28fc4ce75edb 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -738,14 +738,6 @@ icm_fr_xdomain_connected(struct tb *tb, const struct icm_pkg_header *hdr) u8 link, depth; u64 route; - /* - * After NVM upgrade adding root switch device fails because we - * initiated reset. During that time ICM might still send - * XDomain connected message which we ignore here. - */ - if (!tb->root_switch) - return; - link = pkg->link_info & ICM_LINK_INFO_LINK_MASK; depth = (pkg->link_info & ICM_LINK_INFO_DEPTH_MASK) >> ICM_LINK_INFO_DEPTH_SHIFT; @@ -1037,14 +1029,6 @@ icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr) if (pkg->hdr.packet_id) return; - /* - * After NVM upgrade adding root switch device fails because we - * initiated reset. During that time ICM might still send device - * connected message which we ignore here. - */ - if (!tb->root_switch) - return; - route = get_route(pkg->route_hi, pkg->route_lo); authorized = pkg->link_info & ICM_LINK_INFO_APPROVED; security_level = (pkg->hdr.flags & ICM_FLAGS_SLEVEL_MASK) >> @@ -1408,19 +1392,26 @@ static void icm_handle_notification(struct work_struct *work) mutex_lock(&tb->lock); - switch (n->pkg->code) { - case ICM_EVENT_DEVICE_CONNECTED: - icm->device_connected(tb, n->pkg); - break; - case ICM_EVENT_DEVICE_DISCONNECTED: - icm->device_disconnected(tb, n->pkg); - break; - case ICM_EVENT_XDOMAIN_CONNECTED: - icm->xdomain_connected(tb, n->pkg); - break; - case ICM_EVENT_XDOMAIN_DISCONNECTED: - icm->xdomain_disconnected(tb, n->pkg); - break; + /* + * When the domain is stopped we flush its workqueue but before + * that the root switch is removed. In that case we should treat + * the queued events as being canceled. + */ + if (tb->root_switch) { + switch (n->pkg->code) { + case ICM_EVENT_DEVICE_CONNECTED: + icm->device_connected(tb, n->pkg); + break; + case ICM_EVENT_DEVICE_DISCONNECTED: + icm->device_disconnected(tb, n->pkg); + break; + case ICM_EVENT_XDOMAIN_CONNECTED: + icm->xdomain_connected(tb, n->pkg); + break; + case ICM_EVENT_XDOMAIN_DISCONNECTED: + icm->xdomain_disconnected(tb, n->pkg); + break; + } } mutex_unlock(&tb->lock); diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 88cff05a1808..5cd6bdfa068f 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -1191,5 +1191,5 @@ static void __exit nhi_unload(void) tb_domain_exit(); } -fs_initcall(nhi_init); +rootfs_initcall(nhi_init); module_exit(nhi_unload); diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index fa8dcb470640..d31b975dd3fd 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -630,10 +630,6 @@ static int dw8250_probe(struct platform_device *pdev) if (!data->skip_autocfg) dw8250_setup_port(p); -#ifdef CONFIG_PM - uart.capabilities |= UART_CAP_RPM; -#endif - /* If we have a valid fifosize, try hooking up DMA */ if (p->fifosize) { data->dma.rxconf.src_maxburst = p->fifosize / 4; diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 29ec34387246..1515074e18fb 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -868,8 +868,8 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) geni_se_init(&port->se, port->rx_wm, port->rx_rfr); geni_se_select_mode(&port->se, port->xfer_mode); if (!uart_console(uport)) { - port->rx_fifo = devm_kzalloc(uport->dev, - port->rx_fifo_depth * sizeof(u32), GFP_KERNEL); + port->rx_fifo = devm_kcalloc(uport->dev, + port->rx_fifo_depth, sizeof(u32), GFP_KERNEL); if (!port->rx_fifo) return -ENOMEM; } diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index ac4424bf6b13..ab3f6e91853d 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -292,6 +292,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { }, /* + * The "SCIFA" that is in RZ/T and RZ/A2. + * It looks like a normal SCIF with FIFO data, but with a + * compressed address space. Also, the break out of interrupts + * are different: ERI/BRI, RXI, TXI, TEI, DRI. + */ + [SCIx_RZ_SCIFA_REGTYPE] = { + .regs = { + [SCSMR] = { 0x00, 16 }, + [SCBRR] = { 0x02, 8 }, + [SCSCR] = { 0x04, 16 }, + [SCxTDR] = { 0x06, 8 }, + [SCxSR] = { 0x08, 16 }, + [SCxRDR] = { 0x0A, 8 }, + [SCFCR] = { 0x0C, 16 }, + [SCFDR] = { 0x0E, 16 }, + [SCSPTR] = { 0x10, 16 }, + [SCLSR] = { 0x12, 16 }, + }, + .fifosize = 16, + .overrun_reg = SCLSR, + .overrun_mask = SCLSR_ORER, + .sampling_rate_mask = SCI_SR(32), + .error_mask = SCIF_DEFAULT_ERROR_MASK, + .error_clear = SCIF_ERROR_CLEAR, + }, + + /* * Common SH-3 SCIF definitions. */ [SCIx_SH3_SCIF_REGTYPE] = { @@ -319,15 +346,15 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { [SCIx_SH4_SCIF_REGTYPE] = { .regs = { [SCSMR] = { 0x00, 16 }, - [SCBRR] = { 0x02, 8 }, - [SCSCR] = { 0x04, 16 }, - [SCxTDR] = { 0x06, 8 }, - [SCxSR] = { 0x08, 16 }, - [SCxRDR] = { 0x0a, 8 }, - [SCFCR] = { 0x0c, 16 }, - [SCFDR] = { 0x0e, 16 }, - [SCSPTR] = { 0x10, 16 }, - [SCLSR] = { 0x12, 16 }, + [SCBRR] = { 0x04, 8 }, + [SCSCR] = { 0x08, 16 }, + [SCxTDR] = { 0x0c, 8 }, + [SCxSR] = { 0x10, 16 }, + [SCxRDR] = { 0x14, 8 }, + [SCFCR] = { 0x18, 16 }, + [SCFDR] = { 0x1c, 16 }, + [SCSPTR] = { 0x20, 16 }, + [SCLSR] = { 0x24, 16 }, }, .fifosize = 16, .overrun_reg = SCLSR, @@ -2810,7 +2837,7 @@ static int sci_init_single(struct platform_device *dev, { struct uart_port *port = &sci_port->port; const struct resource *res; - unsigned int i, regtype; + unsigned int i; int ret; sci_port->cfg = p; @@ -2847,7 +2874,6 @@ static int sci_init_single(struct platform_device *dev, if (unlikely(sci_port->params == NULL)) return -EINVAL; - regtype = sci_port->params - sci_port_params; switch (p->type) { case PORT_SCIFB: sci_port->rx_trigger = 48; @@ -2902,10 +2928,6 @@ static int sci_init_single(struct platform_device *dev, port->regshift = 1; } - if (regtype == SCIx_SH4_SCIF_REGTYPE) - if (sci_port->reg_size >= 0x20) - port->regshift = 1; - /* * The UART port needs an IRQ value, so we peg this to the RX IRQ * for the multi-IRQ ports, which is where we are primarily @@ -3110,6 +3132,10 @@ static const struct of_device_id of_sci_match[] = { .compatible = "renesas,scif-r7s72100", .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE), }, + { + .compatible = "renesas,scif-r7s9210", + .data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE), + }, /* Family-specific types */ { .compatible = "renesas,rcar-gen1-scif", diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f9b40a9dc4d3..bc03b0a690b4 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1514,6 +1514,7 @@ static void acm_disconnect(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); struct tty_struct *tty; + int i; /* sibling interface is already cleaning up */ if (!acm) @@ -1544,6 +1545,11 @@ static void acm_disconnect(struct usb_interface *intf) tty_unregister_device(acm_tty_driver, acm->minor); + usb_free_urb(acm->ctrlurb); + for (i = 0; i < ACM_NW; i++) + usb_free_urb(acm->wb[i].urb); + for (i = 0; i < acm->rx_buflimit; i++) + usb_free_urb(acm->read_urbs[i]); acm_write_buffers_free(acm); usb_free_coherent(acm->dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); acm_read_buffers_free(acm); diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 7334da9e9779..71d0d33c3286 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -642,10 +642,10 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev) xhci_mtk_host_enable(mtk); xhci_dbg(xhci, "%s: restart port polling\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &hcd->flags); - usb_hcd_poll_rh_status(hcd); set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); usb_hcd_poll_rh_status(xhci->shared_hcd); + set_bit(HCD_FLAG_POLL_RH, &hcd->flags); + usb_hcd_poll_rh_status(hcd); return 0; } diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 6372edf339d9..722860eb5a91 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -185,6 +185,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0215b70c4efc..e72ad9f81c73 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -561,6 +561,9 @@ static void option_instat_callback(struct urb *urb); /* Interface is reserved */ #define RSVD(ifnum) ((BIT(ifnum) & 0xff) << 0) +/* Interface must have two endpoints */ +#define NUMEP2 BIT(16) + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -1081,8 +1084,9 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = RSVD(4) }, - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06), - .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), @@ -1999,6 +2003,13 @@ static int option_probe(struct usb_serial *serial, if (device_flags & RSVD(iface_desc->bInterfaceNumber)) return -ENODEV; + /* + * Allow matching on bNumEndpoints for devices whose interface numbers + * can change (e.g. Quectel EP06). + */ + if (device_flags & NUMEP2 && iface_desc->bNumEndpoints != 2) + return -ENODEV; + /* Store the device flags so we can use them during attach. */ usb_set_serial_data(serial, (void *)device_flags); diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 40864c2bd9dc..4d0273508043 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -84,7 +84,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ - { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ + { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ + { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ diff --git a/drivers/video/fbdev/aty/atyfb.h b/drivers/video/fbdev/aty/atyfb.h index 8235b285dbb2..d09bab3bf224 100644 --- a/drivers/video/fbdev/aty/atyfb.h +++ b/drivers/video/fbdev/aty/atyfb.h @@ -333,6 +333,8 @@ extern const struct aty_pll_ops aty_pll_ct; /* Integrated */ extern void aty_set_pll_ct(const struct fb_info *info, const union aty_pll *pll); extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); +extern const u8 aty_postdividers[8]; + /* * Hardware cursor support @@ -359,7 +361,6 @@ static inline void wait_for_idle(struct atyfb_par *par) extern void aty_reset_engine(const struct atyfb_par *par); extern void aty_init_engine(struct atyfb_par *par, struct fb_info *info); -extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index a9a8272f7a6e..05111e90f168 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3087,17 +3087,18 @@ static int atyfb_setup_sparc(struct pci_dev *pdev, struct fb_info *info, /* * PLL Reference Divider M: */ - M = pll_regs[2]; + M = pll_regs[PLL_REF_DIV]; /* * PLL Feedback Divider N (Dependent on CLOCK_CNTL): */ - N = pll_regs[7 + (clock_cntl & 3)]; + N = pll_regs[VCLK0_FB_DIV + (clock_cntl & 3)]; /* * PLL Post Divider P (Dependent on CLOCK_CNTL): */ - P = 1 << (pll_regs[6] >> ((clock_cntl & 3) << 1)); + P = aty_postdividers[((pll_regs[VCLK_POST_DIV] >> ((clock_cntl & 3) << 1)) & 3) | + ((pll_regs[PLL_EXT_CNTL] >> (2 + (clock_cntl & 3))) & 4)]; /* * PLL Divider Q: diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c index 74a62aa193c0..f87cc81f4fa2 100644 --- a/drivers/video/fbdev/aty/mach64_ct.c +++ b/drivers/video/fbdev/aty/mach64_ct.c @@ -115,7 +115,7 @@ static void aty_st_pll_ct(int offset, u8 val, const struct atyfb_par *par) */ #define Maximum_DSP_PRECISION 7 -static u8 postdividers[] = {1,2,4,8,3}; +const u8 aty_postdividers[8] = {1,2,4,8,3,5,6,12}; static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll) { @@ -222,7 +222,7 @@ static int aty_valid_pll_ct(const struct fb_info *info, u32 vclk_per, struct pll pll->vclk_post_div += (q < 64*8); pll->vclk_post_div += (q < 32*8); } - pll->vclk_post_div_real = postdividers[pll->vclk_post_div]; + pll->vclk_post_div_real = aty_postdividers[pll->vclk_post_div]; // pll->vclk_post_div <<= 6; pll->vclk_fb_div = q * pll->vclk_post_div_real / 8; pllvclk = (1000000 * 2 * pll->vclk_fb_div) / @@ -513,7 +513,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) u8 mclk_fb_div, pll_ext_cntl; pll->ct.pll_ref_div = aty_ld_pll_ct(PLL_REF_DIV, par); pll_ext_cntl = aty_ld_pll_ct(PLL_EXT_CNTL, par); - pll->ct.xclk_post_div_real = postdividers[pll_ext_cntl & 0x07]; + pll->ct.xclk_post_div_real = aty_postdividers[pll_ext_cntl & 0x07]; mclk_fb_div = aty_ld_pll_ct(MCLK_FB_DIV, par); if (pll_ext_cntl & PLL_MFB_TIMES_4_2B) mclk_fb_div <<= 1; @@ -535,7 +535,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) xpost_div += (q < 64*8); xpost_div += (q < 32*8); } - pll->ct.xclk_post_div_real = postdividers[xpost_div]; + pll->ct.xclk_post_div_real = aty_postdividers[xpost_div]; pll->ct.mclk_fb_div = q * pll->ct.xclk_post_div_real / 8; #ifdef CONFIG_PPC @@ -584,7 +584,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) mpost_div += (q < 64*8); mpost_div += (q < 32*8); } - sclk_post_div_real = postdividers[mpost_div]; + sclk_post_div_real = aty_postdividers[mpost_div]; pll->ct.sclk_fb_div = q * sclk_post_div_real / 8; pll->ct.spll_cntl2 = mpost_div << 4; #ifdef DEBUG diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index 3946649b85c8..ba906876cc45 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -42,6 +42,7 @@ struct bmp_dib_header { u32 colors_important; } __packed; +static bool use_bgrt = true; static bool request_mem_succeeded = false; static u64 mem_flags = EFI_MEMORY_WC | EFI_MEMORY_UC; @@ -160,6 +161,9 @@ static void efifb_show_boot_graphics(struct fb_info *info) void *bgrt_image = NULL; u8 *dst = info->screen_base; + if (!use_bgrt) + return; + if (!bgrt_tab.image_address) { pr_info("efifb: No BGRT, not showing boot graphics\n"); return; @@ -290,6 +294,8 @@ static int efifb_setup(char *options) screen_info.lfb_width = simple_strtoul(this_opt+6, NULL, 0); else if (!strcmp(this_opt, "nowc")) mem_flags &= ~EFI_MEMORY_WC; + else if (!strcmp(this_opt, "nobgrt")) + use_bgrt = false; } } diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c index ef69273074ba..a3edb20ea4c3 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c @@ -496,6 +496,9 @@ static int omapfb_memory_read(struct fb_info *fbi, if (!access_ok(VERIFY_WRITE, mr->buffer, mr->buffer_size)) return -EFAULT; + if (mr->w > 4096 || mr->h > 4096) + return -EINVAL; + if (mr->w * mr->h * 3 > mr->buffer_size) return -EINVAL; @@ -509,7 +512,7 @@ static int omapfb_memory_read(struct fb_info *fbi, mr->x, mr->y, mr->w, mr->h); if (r > 0) { - if (copy_to_user(mr->buffer, buf, mr->buffer_size)) + if (copy_to_user(mr->buffer, buf, r)) r = -EFAULT; } diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index def3a501acd6..d059d04c63ac 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -712,7 +712,7 @@ static int pxa168fb_probe(struct platform_device *pdev) /* * enable controller clock */ - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk); pxa168fb_set_par(info); @@ -767,7 +767,7 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_cmap: fb_dealloc_cmap(&info->cmap); failed_free_clk: - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); failed_free_fbmem: dma_free_coherent(fbi->dev, info->fix.smem_len, info->screen_base, fbi->fb_start_dma); @@ -807,7 +807,7 @@ static int pxa168fb_remove(struct platform_device *pdev) dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), info->screen_base, info->fix.smem_start); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); framebuffer_release(info); diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 045e8afe398b..9e88e3f594c2 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1157,7 +1157,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) dev_name); goto out_err0; } - /* fall though */ + /* fall through */ case S9000_ID_ARTIST: case S9000_ID_HCRX: case S9000_ID_TIMBER: diff --git a/fs/afs/cell.c b/fs/afs/cell.c index f3d0bef16d78..6127f0fcd62c 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -514,6 +514,8 @@ static int afs_alloc_anon_key(struct afs_cell *cell) */ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) { + struct hlist_node **p; + struct afs_cell *pcell; int ret; if (!cell->anonymous_key) { @@ -534,7 +536,18 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) return ret; mutex_lock(&net->proc_cells_lock); - list_add_tail(&cell->proc_link, &net->proc_cells); + for (p = &net->proc_cells.first; *p; p = &(*p)->next) { + pcell = hlist_entry(*p, struct afs_cell, proc_link); + if (strcmp(cell->name, pcell->name) < 0) + break; + } + + cell->proc_link.pprev = p; + cell->proc_link.next = *p; + rcu_assign_pointer(*p, &cell->proc_link.next); + if (cell->proc_link.next) + cell->proc_link.next->pprev = &cell->proc_link.next; + afs_dynroot_mkdir(net, cell); mutex_unlock(&net->proc_cells_lock); return 0; @@ -550,7 +563,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) afs_proc_cell_remove(cell); mutex_lock(&net->proc_cells_lock); - list_del_init(&cell->proc_link); + hlist_del_rcu(&cell->proc_link); afs_dynroot_rmdir(net, cell); mutex_unlock(&net->proc_cells_lock); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 1cde710a8013..f29c6dade7f6 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -265,7 +265,7 @@ int afs_dynroot_populate(struct super_block *sb) return -ERESTARTSYS; net->dynroot_sb = sb; - list_for_each_entry(cell, &net->proc_cells, proc_link) { + hlist_for_each_entry(cell, &net->proc_cells, proc_link) { ret = afs_dynroot_mkdir(net, cell); if (ret < 0) goto error; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 871a228d7f37..34c02fdcc25f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -242,7 +242,7 @@ struct afs_net { seqlock_t cells_lock; struct mutex proc_cells_lock; - struct list_head proc_cells; + struct hlist_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one * cell, but in practice, people create aliases and subsets and there's @@ -320,7 +320,7 @@ struct afs_cell { struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager; /* Manager for init/deinit/dns */ - struct list_head proc_link; /* /proc cell list link */ + struct hlist_node proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif diff --git a/fs/afs/main.c b/fs/afs/main.c index e84fe822a960..107427688edd 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -87,7 +87,7 @@ static int __net_init afs_net_init(struct net *net_ns) timer_setup(&net->cells_timer, afs_cells_timer, 0); mutex_init(&net->proc_cells_lock); - INIT_LIST_HEAD(&net->proc_cells); + INIT_HLIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); net->fs_servers = RB_ROOT; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 476dcbb79713..9101f62707af 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -33,9 +33,8 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m) static int afs_proc_cells_show(struct seq_file *m, void *v) { struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); - struct afs_net *net = afs_seq2net(m); - if (v == &net->proc_cells) { + if (v == SEQ_START_TOKEN) { /* display header on line 1 */ seq_puts(m, "USE NAME\n"); return 0; @@ -50,12 +49,12 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { rcu_read_lock(); - return seq_list_start_head(&afs_seq2net(m)->proc_cells, *_pos); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos); } static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &afs_seq2net(m)->proc_cells, pos); + return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos); } static void afs_proc_cells_stop(struct seq_file *m, void *v) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 35f2ae30f31f..77a83790a31f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply[0] = NULL; - /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the * call because the work item may be queued again. diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0c9ab62c3df4..9dcaed031843 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1553,6 +1553,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, /* Flags */ #define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ +#define MID_DELETED 2 /* Mid has been dequeued/deleted */ /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7aa08dba4719..52d71b64c0c6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -659,7 +659,15 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) mid->mid_state = MID_RESPONSE_RECEIVED; else mid->mid_state = MID_RESPONSE_MALFORMED; - list_del_init(&mid->qhead); + /* + * Trying to handle/dequeue a mid after the send_recv() + * function has finished processing it is a bug. + */ + if (mid->mid_flags & MID_DELETED) + printk_once(KERN_WARNING + "trying to dequeue a deleted mid\n"); + else + list_del_init(&mid->qhead); spin_unlock(&GlobalMid_Lock); } @@ -938,8 +946,7 @@ next_pdu: } else { mids[0] = server->ops->find_mid(server, buf); bufs[0] = buf; - if (mids[0]) - num_mids = 1; + num_mids = 1; if (!mids[0] || !mids[0]->receive) length = standard_receive3(server, mids[0]); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d954ce36b473..89985a0a6819 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1477,7 +1477,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, } srch_inf->entries_in_buffer = 0; - srch_inf->index_of_last_entry = 0; + srch_inf->index_of_last_entry = 2; rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 78f96fa3d7d9..b48f43963da6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -142,7 +142,8 @@ void cifs_delete_mid(struct mid_q_entry *mid) { spin_lock(&GlobalMid_Lock); - list_del(&mid->qhead); + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(mid); @@ -772,6 +773,11 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) return mid; } +static void +cifs_noop_callback(struct mid_q_entry *mid) +{ +} + int compound_send_recv(const unsigned int xid, struct cifs_ses *ses, const int flags, const int num_rqst, struct smb_rqst *rqst, @@ -826,8 +832,13 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, } midQ[i]->mid_state = MID_REQUEST_SUBMITTED; + /* + * We don't invoke the callback compounds unless it is the last + * request. + */ + if (i < num_rqst - 1) + midQ[i]->callback = cifs_noop_callback; } - cifs_in_send_inc(ses->server); rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); cifs_in_send_dec(ses->server); @@ -908,6 +919,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, midQ[i]->resp_buf = NULL; } out: + /* + * This will dequeue all mids. After this it is important that the + * demultiplex_thread will not process any of these mids any futher. + * This is prevented above by using a noop callback that will not + * wake this thread except for the very last PDU. + */ for (i = 0; i < num_rqst; i++) cifs_delete_mid(midQ[i]); add_credits(ses->server, credits, optype); @@ -447,6 +447,7 @@ bool dax_lock_mapping_entry(struct page *page) xa_unlock_irq(&mapping->i_pages); break; } else if (IS_ERR(entry)) { + xa_unlock_irq(&mapping->i_pages); WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); continue; } @@ -665,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping) while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { + pgoff_t nr_pages = 1; + for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; @@ -679,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping) xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); - if (entry) + if (entry) { page = dax_busy_page(entry); + /* + * Account for multi-order entries at + * the end of the pagevec. + */ + if (i + 1 >= pagevec_count(&pvec)) + nr_pages = 1UL << dax_radix_order(entry); + } put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) @@ -695,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - index++; + index += nr_pages; if (page) break; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index defc2168de91..f58c0cacc531 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 03128ed1f34e..84544a4f012d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1057,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } } release_metapath(&mp); - if (gfs2_is_jdata(ip)) + if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; diff --git a/fs/ioctl.c b/fs/ioctl.c index 3212c29235ce..2005529af560 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -230,7 +230,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); fdput: fdput(src_file); return ret; diff --git a/fs/iomap.c b/fs/iomap.c index 74762b1ec233..ec15cf2ec696 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1051,6 +1051,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, } else { WARN_ON_ONCE(!PageUptodate(page)); iomap_page_create(inode, page); + set_page_dirty(page); } return length; @@ -1090,7 +1091,6 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) length -= ret; } - set_page_dirty(page); wait_for_stable_page(page); return VM_FAULT_LOCKED; out_unlock: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 55a099e47ba2..b53e76391e52 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, + count)); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index aaca0949fe53..826f0567ec43 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->last_used = 0; - spin_lock(&dlm->spinlock); + spin_lock(&dlm->track_lock); list_add_tail(&res->tracking, &dlm->tracking_list); - spin_unlock(&dlm->spinlock); + spin_unlock(&dlm->track_lock); memset(res->lvb, 0, DLM_LVB_LEN); memset(res->refmap, 0, sizeof(res->refmap)); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8e712b614e6e..933aac5da193 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +#endif static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 7869622af22a..7a5ee145c733 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (map_end & (PAGE_SIZE - 1)) to = map_end & (PAGE_SIZE - 1); +retry: page = find_or_create_page(mapping, page_index, GFP_NOFS); if (!page) { ret = -ENOMEM; @@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, } /* - * In case PAGE_SIZE <= CLUSTER_SIZE, This page - * can't be dirtied before we CoW it out. + * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty + * page, so write it back. */ - if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) - BUG_ON(PageDirty(page)); + if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { + if (PageDirty(page)) { + /* + * write_on_page will unlock the page on return + */ + ret = write_one_page(page); + goto retry; + } + } if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 296037afecdb..1cc797a08a5b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -141,7 +141,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); + error = do_clone_file_range(old_file, 0, new_file, 0, len); if (!error) goto out; /* Couldn't clone, so now we try to copy the data */ diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index aeaefd2a551b..986313da0c88 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -240,8 +240,10 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out_unlock; old_cred = ovl_override_creds(file_inode(file)->i_sb); + file_start_write(real.file); ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, ovl_iocb_to_rwf(iocb)); + file_end_write(real.file); revert_creds(old_cred); /* Update size */ diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b6ac545b5a32..3b7ed5d2279c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -504,7 +504,7 @@ static const struct inode_operations ovl_special_inode_operations = { .update_time = ovl_update_time, }; -const struct address_space_operations ovl_aops = { +static const struct address_space_operations ovl_aops = { /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ .direct_IO = noop_direct_IO, }; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f28711846dd6..9c0ca6a7becf 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -686,7 +686,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, index = NULL; goto out; } - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" + pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, err); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f61839e1054c..a3c0d9584312 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -152,8 +152,8 @@ static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { int err = vfs_setxattr(dentry, name, value, size, flags); - pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", - dentry, name, (int) size, (char *) value, flags, err); + pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n", + dentry, name, min((int)size, 48), value, size, flags, err); return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 8cfb62cc8672..ace4fe4c39a9 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -683,7 +683,7 @@ static void ovl_cleanup_index(struct dentry *dentry) struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; struct inode *inode; - struct qstr name; + struct qstr name = { }; int err; err = ovl_get_index_name(lowerdentry, &name); @@ -726,6 +726,7 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; out: + kfree(name.name); dput(index); return; diff --git a/fs/proc/base.c b/fs/proc/base.c index ccf86f16d9f0..7e9f07bf260d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, unsigned long *entries; int err; + /* + * The ability to racily run the kernel stack unwinder on a running task + * and then observe the unwinder output is scary; while it is useful for + * debugging kernel issues, it can also allow an attacker to leak kernel + * stack contents. + * Doing this in a manner that is at least safe from races would require + * some work to ensure that the remote task can not be scheduled; and + * even then, this would still expose the unwinder as local attack + * surface. + * Therefore, this interface is restricted to root. + */ + if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) + return -EACCES; + entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), GFP_KERNEL); if (!entries) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index bbd1e357c23d..f4fd2e72add4 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -898,8 +898,22 @@ static struct platform_driver ramoops_driver = { }, }; -static void ramoops_register_dummy(void) +static inline void ramoops_unregister_dummy(void) { + platform_device_unregister(dummy); + dummy = NULL; + + kfree(dummy_data); + dummy_data = NULL; +} + +static void __init ramoops_register_dummy(void) +{ + /* + * Prepare a dummy platform data structure to carry the module + * parameters. If mem_size isn't set, then there are no module + * parameters, and we can skip this. + */ if (!mem_size) return; @@ -932,21 +946,28 @@ static void ramoops_register_dummy(void) if (IS_ERR(dummy)) { pr_info("could not create platform device: %ld\n", PTR_ERR(dummy)); + dummy = NULL; + ramoops_unregister_dummy(); } } static int __init ramoops_init(void) { + int ret; + ramoops_register_dummy(); - return platform_driver_register(&ramoops_driver); + ret = platform_driver_register(&ramoops_driver); + if (ret != 0) + ramoops_unregister_dummy(); + + return ret; } late_initcall(ramoops_init); static void __exit ramoops_exit(void) { platform_driver_unregister(&ramoops_driver); - platform_device_unregister(dummy); - kfree(dummy_data); + ramoops_unregister_dummy(); } module_exit(ramoops_exit); diff --git a/fs/read_write.c b/fs/read_write.c index 39b4a21dd933..8a2737f0d61d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1818,8 +1818,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1866,6 +1866,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; } +EXPORT_SYMBOL(do_clone_file_range); + +int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) +{ + int ret; + + file_start_write(file_out); + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} EXPORT_SYMBOL(vfs_clone_file_range); /* diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index bf000c8aeffb..fec62e9dfbe6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2337,8 +2337,8 @@ late_initcall(ubifs_init); static void __exit ubifs_exit(void) { - WARN_ON(list_empty(&ubifs_infos)); - WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + WARN_ON(!list_empty(&ubifs_infos)); + WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0); dbg_debugfs_exit(); ubifs_compressors_exit(); diff --git a/fs/xattr.c b/fs/xattr.c index daa732550088..0d6a6a4af861 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -948,17 +948,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, int err = 0; #ifdef CONFIG_FS_POSIX_ACL - if (inode->i_acl) { - err = xattr_list_one(&buffer, &remaining_size, - XATTR_NAME_POSIX_ACL_ACCESS); - if (err) - return err; - } - if (inode->i_default_acl) { - err = xattr_list_one(&buffer, &remaining_size, - XATTR_NAME_POSIX_ACL_DEFAULT); - if (err) - return err; + if (IS_POSIXACL(inode)) { + if (inode->i_acl) { + err = xattr_list_one(&buffer, &remaining_size, + XATTR_NAME_POSIX_ACL_ACCESS); + if (err) + return err; + } + if (inode->i_default_acl) { + err = xattr_list_one(&buffer, &remaining_size, + XATTR_NAME_POSIX_ACL_DEFAULT); + if (err) + return err; + } } #endif diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 1e671d4eb6fa..c6299f82a6e4 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -587,7 +587,7 @@ xfs_attr_leaf_addname( */ error = xfs_attr3_leaf_to_node(args); if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -675,7 +675,7 @@ xfs_attr_leaf_addname( error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -693,9 +693,6 @@ xfs_attr_leaf_addname( error = xfs_attr3_leaf_clearflag(args); } return error; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } /* @@ -738,15 +735,12 @@ xfs_attr_leaf_removename( error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; } return 0; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } /* @@ -864,7 +858,7 @@ restart: state = NULL; error = xfs_attr3_leaf_to_node(args); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -888,7 +882,7 @@ restart: */ error = xfs_da3_split(state); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -984,7 +978,7 @@ restart: if (retval && (state->path.active > 1)) { error = xfs_da3_join(state); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -1013,9 +1007,6 @@ out: if (error) return error; return retval; -out_defer_cancel: - xfs_defer_cancel(args->trans); - goto out; } /* @@ -1107,7 +1098,7 @@ xfs_attr_node_removename( if (retval && (state->path.active > 1)) { error = xfs_da3_join(state); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -1138,7 +1129,7 @@ xfs_attr_node_removename( error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -1150,9 +1141,6 @@ xfs_attr_node_removename( out: xfs_da_state_free(state); return error; -out_defer_cancel: - xfs_defer_cancel(args->trans); - goto out; } /* diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index af094063e402..d89363c6b523 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -485,7 +485,7 @@ xfs_attr_rmtval_set( blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, &nmap); if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -553,9 +553,6 @@ xfs_attr_rmtval_set( } ASSERT(valuelen == 0); return 0; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } /* @@ -625,7 +622,7 @@ xfs_attr_rmtval_remove( error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, XFS_BMAPI_ATTRFORK, 1, &done); if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -638,7 +635,4 @@ xfs_attr_rmtval_remove( return error; } return 0; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2760314fdf7f..a47670332326 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -673,7 +673,8 @@ xfs_bmap_extents_to_btree( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); /* - * Make space in the inode incore. + * Make space in the inode incore. This needs to be undone if we fail + * to expand the root. */ xfs_iroot_realloc(ip, 1, whichfork); ifp->if_flags |= XFS_IFBROOT; @@ -711,16 +712,15 @@ xfs_bmap_extents_to_btree( args.minlen = args.maxlen = args.prod = 1; args.wasdel = wasdel; *logflagsp = 0; - if ((error = xfs_alloc_vextent(&args))) { - ASSERT(ifp->if_broot == NULL); - goto err1; - } + error = xfs_alloc_vextent(&args); + if (error) + goto out_root_realloc; if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { - ASSERT(ifp->if_broot == NULL); error = -ENOSPC; - goto err1; + goto out_root_realloc; } + /* * Allocation can't fail, the space was reserved. */ @@ -732,9 +732,10 @@ xfs_bmap_extents_to_btree( xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); if (!abp) { - error = -ENOSPC; - goto err2; + error = -EFSCORRUPTED; + goto out_unreserve_dquot; } + /* * Fill in the child block. */ @@ -775,11 +776,12 @@ xfs_bmap_extents_to_btree( *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); return 0; -err2: +out_unreserve_dquot: xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); -err1: +out_root_realloc: xfs_iroot_realloc(ip, -1, whichfork); XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + ASSERT(ifp->if_broot == NULL); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 059bc44c27e8..afbe336600e1 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1016,6 +1016,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ +/* Do not use bit 15, di_flags is legacy and unchanging now */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 30d1d60f1d46..09d9c8cfa4a0 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -415,6 +415,31 @@ xfs_dinode_verify_fork( return NULL; } +static xfs_failaddr_t +xfs_dinode_verify_forkoff( + struct xfs_dinode *dip, + struct xfs_mount *mp) +{ + if (!XFS_DFORK_Q(dip)) + return NULL; + + switch (dip->di_format) { + case XFS_DINODE_FMT_DEV: + if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) + return __this_address; + break; + case XFS_DINODE_FMT_LOCAL: /* fall through ... */ + case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ + case XFS_DINODE_FMT_BTREE: + if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3)) + return __this_address; + break; + default: + return __this_address; + } + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -470,6 +495,11 @@ xfs_dinode_verify( if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) return __this_address; + /* check for illegal values of forkoff */ + fa = xfs_dinode_verify_forkoff(dip, mp); + if (fa) + return fa; + /* Do we have appropriate data fork formats for the mode? */ switch (mode & S_IFMT) { case S_IFIFO: diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 036b5c7021eb..376bcb585ae6 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -17,7 +17,6 @@ #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" -#include "xfs_alloc.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 5b3b177c0fc9..e386c9b0b4ab 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -126,6 +126,7 @@ xchk_inode_flags( { struct xfs_mount *mp = sc->mp; + /* di_flags are all taken, last bit cannot be used */ if (flags & ~XFS_DIFLAG_ANY) goto bad; @@ -172,8 +173,9 @@ xchk_inode_flags2( { struct xfs_mount *mp = sc->mp; + /* Unknown di_flags2 could be from a future kernel */ if (flags2 & ~XFS_DIFLAG2_ANY) - goto bad; + xchk_ino_set_warning(sc, ino); /* reflink flag requires reflink feature */ if ((flags2 & XFS_DIFLAG2_REFLINK) && diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index addbd74ecd8e..6de8d90041ff 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -702,13 +702,9 @@ xfs_bmap_punch_delalloc_range( struct xfs_iext_cursor icur; int error = 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); - if (error) - goto out_unlock; - } + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) goto out_unlock; @@ -1584,7 +1580,7 @@ xfs_swap_extent_rmap( tirec.br_blockcount, &irec, &nimaps, 0); if (error) - goto out_defer; + goto out; ASSERT(nimaps == 1); ASSERT(tirec.br_startoff == irec.br_startoff); trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); @@ -1599,22 +1595,22 @@ xfs_swap_extent_rmap( /* Remove the mapping from the donor file. */ error = xfs_bmap_unmap_extent(tp, tip, &uirec); if (error) - goto out_defer; + goto out; /* Remove the mapping from the source file. */ error = xfs_bmap_unmap_extent(tp, ip, &irec); if (error) - goto out_defer; + goto out; /* Map the donor file's blocks into the source file. */ error = xfs_bmap_map_extent(tp, ip, &uirec); if (error) - goto out_defer; + goto out; /* Map the source file's blocks into the donor file. */ error = xfs_bmap_map_extent(tp, tip, &irec); if (error) - goto out_defer; + goto out; error = xfs_defer_finish(tpp); tp = *tpp; @@ -1636,8 +1632,6 @@ xfs_swap_extent_rmap( tip->i_d.di_flags2 = tip_flags2; return 0; -out_defer: - xfs_defer_cancel(tp); out: trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); tip->i_d.di_flags2 = tip_flags2; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1c9d1398980b..12d8455bfbb2 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -532,6 +532,49 @@ xfs_buf_item_push( } /* + * Drop the buffer log item refcount and take appropriate action. This helper + * determines whether the bli must be freed or not, since a decrement to zero + * does not necessarily mean the bli is unused. + * + * Return true if the bli is freed, false otherwise. + */ +bool +xfs_buf_item_put( + struct xfs_buf_log_item *bip) +{ + struct xfs_log_item *lip = &bip->bli_item; + bool aborted; + bool dirty; + + /* drop the bli ref and return if it wasn't the last one */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + return false; + + /* + * We dropped the last ref and must free the item if clean or aborted. + * If the bli is dirty and non-aborted, the buffer was clean in the + * transaction but still awaiting writeback from previous changes. In + * that case, the bli is freed on buffer writeback completion. + */ + aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || + XFS_FORCED_SHUTDOWN(lip->li_mountp); + dirty = bip->bli_flags & XFS_BLI_DIRTY; + if (dirty && !aborted) + return false; + + /* + * The bli is aborted or clean. An aborted item may be in the AIL + * regardless of dirty state. For example, consider an aborted + * transaction that invalidated a dirty bli and cleared the dirty + * state. + */ + if (aborted) + xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip->bli_buf); + return true; +} + +/* * Release the buffer associated with the buf log item. If there is no dirty * logged data associated with the buffer recorded in the buf log item, then * free the buf log item and remove the reference to it in the buffer. @@ -556,76 +599,42 @@ xfs_buf_item_unlock( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool aborted; - bool hold = !!(bip->bli_flags & XFS_BLI_HOLD); - bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY); + bool released; + bool hold = bip->bli_flags & XFS_BLI_HOLD; + bool stale = bip->bli_flags & XFS_BLI_STALE; #if defined(DEBUG) || defined(XFS_WARN) - bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED); + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; #endif - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); - - /* Clear the buffer's association with this transaction. */ - bp->b_transp = NULL; - - /* - * The per-transaction state has been copied above so clear it from the - * bli. - */ - bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); - - /* - * If the buf item is marked stale, then don't do anything. We'll - * unlock the buffer and free the buf item when the buffer is unpinned - * for the last time. - */ - if (bip->bli_flags & XFS_BLI_STALE) { - trace_xfs_buf_item_unlock_stale(bip); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - if (!aborted) { - atomic_dec(&bip->bli_refcount); - return; - } - } - trace_xfs_buf_item_unlock(bip); /* - * If the buf item isn't tracking any data, free it, otherwise drop the - * reference we hold to it. If we are aborting the transaction, this may - * be the only reference to the buf item, so we free it anyway - * regardless of whether it is dirty or not. A dirty abort implies a - * shutdown, anyway. - * * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. */ ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) || (ordered && dirty && !xfs_buf_item_dirty_format(bip))); + ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); /* - * Clean buffers, by definition, cannot be in the AIL. However, aborted - * buffers may be in the AIL regardless of dirty state. An aborted - * transaction that invalidates a buffer already in the AIL may have - * marked it stale and cleared the dirty state, for example. - * - * Therefore if we are aborting a buffer and we've just taken the last - * reference away, we have to check if it is in the AIL before freeing - * it. We need to free it in this case, because an aborted transaction - * has already shut the filesystem down and this is the last chance we - * will have to do so. + * Clear the buffer's association with this transaction and + * per-transaction state from the bli, which has been copied above. */ - if (atomic_dec_and_test(&bip->bli_refcount)) { - if (aborted) { - ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); - xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - } else if (!dirty) - xfs_buf_item_relse(bp); - } + bp->b_transp = NULL; + bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); - if (!hold) - xfs_buf_relse(bp); + /* + * Unref the item and unlock the buffer unless held or stale. Stale + * buffers remain locked until final unpin unless the bli is freed by + * the unref call. The latter implies shutdown because buffer + * invalidation dirties the bli and transaction. + */ + released = xfs_buf_item_put(bip); + if (hold || (stale && !released)) + return; + ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags)); + xfs_buf_relse(bp); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 3f7d7b72e7e6..90f65f891fab 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -51,6 +51,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); +bool xfs_buf_item_put(struct xfs_buf_log_item *); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d957a46dc1cb..05db9540e459 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1563,7 +1563,7 @@ xfs_itruncate_extents_flags( error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, XFS_ITRUNC_MAX_EXTENTS, &done); if (error) - goto out_bmap_cancel; + goto out; /* * Duplicate the transaction that has the permanent @@ -1599,14 +1599,6 @@ xfs_itruncate_extents_flags( out: *tpp = tp; return error; -out_bmap_cancel: - /* - * If the bunmapi call encounters an error, return to the caller where - * the transaction can be properly aborted. We just need to make sure - * we're not holding any resources that we were not when we came in. - */ - xfs_defer_cancel(tp); - goto out; } int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c3e74f9128e8..f48ffd7a8d3e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -471,8 +471,18 @@ xfs_vn_get_link_inline( struct inode *inode, struct delayed_call *done) { + char *link; + ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); - return XFS_I(inode)->i_df.if_u1.if_data; + + /* + * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if + * if_data is junk. + */ + link = XFS_I(inode)->i_df.if_u1.if_data; + if (!link) + return ERR_PTR(-EFSCORRUPTED); + return link; } STATIC int diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a21dc61ec09e..1fc9e9042e0e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1570,16 +1570,6 @@ xlog_find_zeroed( if (last_cycle != 0) { /* log completely written to */ xlog_put_bp(bp); return 0; - } else if (first_cycle != 1) { - /* - * If the cycle of the last block is zero, the cycle of - * the first block must be 1. If it's not, maybe we're - * not looking at a log... Bail out. - */ - xfs_warn(log->l_mp, - "Log inconsistent or not a log (last==0, first!=1)"); - error = -EINVAL; - goto bp_err; } /* we have a partially zeroed log */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 38f405415b88..42ea7bab9144 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -352,6 +352,47 @@ xfs_reflink_convert_cow( return error; } +/* + * Find the extent that maps the given range in the COW fork. Even if the extent + * is not shared we might have a preallocation for it in the COW fork. If so we + * use it that rather than trigger a new allocation. + */ +static int +xfs_find_trim_cow_extent( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + bool *shared, + bool *found) +{ + xfs_fileoff_t offset_fsb = imap->br_startoff; + xfs_filblks_t count_fsb = imap->br_blockcount; + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec got; + bool trimmed; + + *found = false; + + /* + * If we don't find an overlapping extent, trim the range we need to + * allocate to fit the hole we found. + */ + if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) || + got.br_startoff > offset_fsb) + return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); + + *shared = true; + if (isnullstartblock(got.br_startblock)) { + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + return 0; + } + + /* real extent found - no need to allocate */ + xfs_trim_extent(&got, offset_fsb, count_fsb); + *imap = got; + *found = true; + return 0; +} + /* Allocate all CoW reservations covering a range of blocks in a file. */ int xfs_reflink_allocate_cow( @@ -363,78 +404,64 @@ xfs_reflink_allocate_cow( struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; - struct xfs_bmbt_irec got; - struct xfs_trans *tp = NULL; + struct xfs_trans *tp; int nimaps, error = 0; - bool trimmed; + bool found; xfs_filblks_t resaligned; xfs_extlen_t resblks = 0; - struct xfs_iext_cursor icur; -retry: - ASSERT(xfs_is_reflink_inode(ip)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(xfs_is_reflink_inode(ip)); - /* - * Even if the extent is not shared we might have a preallocation for - * it in the COW fork. If so use it. - */ - if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) && - got.br_startoff <= offset_fsb) { - *shared = true; - - /* If we have a real allocation in the COW fork we're done. */ - if (!isnullstartblock(got.br_startblock)) { - xfs_trim_extent(&got, offset_fsb, count_fsb); - *imap = got; - goto convert; - } + error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + if (error || !*shared) + return error; + if (found) + goto convert; - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); - } else { - error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); - if (error || !*shared) - goto out; - } + resaligned = xfs_aligned_fsb_count(imap->br_startoff, + imap->br_blockcount, xfs_get_cowextsz_hint(ip)); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); - if (!tp) { - resaligned = xfs_aligned_fsb_count(imap->br_startoff, - imap->br_blockcount, xfs_get_cowextsz_hint(ip)); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); + xfs_iunlock(ip, *lockmode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + *lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, *lockmode); - xfs_iunlock(ip, *lockmode); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); - *lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, *lockmode); + if (error) + return error; - if (error) - return error; + error = xfs_qm_dqattach_locked(ip, false); + if (error) + goto out_trans_cancel; - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out; - goto retry; + /* + * Check for an overlapping extent again now that we dropped the ilock. + */ + error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + if (error || !*shared) + goto out_trans_cancel; + if (found) { + xfs_trans_cancel(tp); + goto convert; } error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) - goto out; + goto out_trans_cancel; xfs_trans_ijoin(tp, ip, 0); - nimaps = 1; - /* Allocate the entire reservation as unwritten blocks. */ + nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, resblks, imap, &nimaps); if (error) - goto out_trans_cancel; + goto out_unreserve; xfs_inode_set_cowblocks_tag(ip); - - /* Finish up. */ error = xfs_trans_commit(tp); if (error) return error; @@ -447,12 +474,12 @@ retry: return -ENOSPC; convert: return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb); -out_trans_cancel: + +out_unreserve: xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, XFS_QMOPT_RES_REGBLKS); -out: - if (tp) - xfs_trans_cancel(tp); +out_trans_cancel: + xfs_trans_cancel(tp); return error; } @@ -666,14 +693,12 @@ xfs_reflink_end_cow( if (!del.br_blockcount) goto prev_extent; - ASSERT(!isnullstartblock(got.br_startblock)); - /* - * Don't remap unwritten extents; these are - * speculatively preallocated CoW extents that have been - * allocated but have not yet been involved in a write. + * Only remap real extent that contain data. With AIO + * speculatively preallocations can leak into the range we + * are called upon, and we need to skip them. */ - if (got.br_state == XFS_EXT_UNWRITTEN) + if (!xfs_bmap_is_real_extent(&got)) goto prev_extent; /* Unmap the old blocks in the data fork. */ @@ -1195,35 +1220,92 @@ retry: return 0; } +/* Unlock both inodes after they've been prepped for a range clone. */ +STATIC void +xfs_reflink_remap_unlock( + struct file *file_in, + struct file *file_out) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + bool same_inode = (inode_in == inode_out); + + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); + if (!same_inode) + inode_unlock_shared(inode_in); +} + /* - * Link a range of blocks from one file to another. + * If we're reflinking to a point past the destination file's EOF, we must + * zero any speculative post-EOF preallocations that sit between the old EOF + * and the destination file offset. */ -int -xfs_reflink_remap_range( +static int +xfs_reflink_zero_posteof( + struct xfs_inode *ip, + loff_t pos) +{ + loff_t isize = i_size_read(VFS_I(ip)); + + if (pos <= isize) + return 0; + + trace_xfs_zero_eof(ip, isize, pos - isize); + return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, + &xfs_iomap_ops); +} + +/* + * Prepare two files for range cloning. Upon a successful return both inodes + * will have the iolock and mmaplock held, the page cache of the out file will + * be truncated, and any leases on the out file will have been broken. This + * function borrows heavily from xfs_file_aio_write_checks. + * + * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't + * checked that the bytes beyond EOF physically match. Hence we cannot use the + * EOF block in the source dedupe range because it's not a complete block match, + * hence can introduce a corruption into the file that has it's block replaced. + * + * In similar fashion, the VFS file cloning also allows partial EOF blocks to be + * "block aligned" for the purposes of cloning entire files. However, if the + * source file range includes the EOF block and it lands within the existing EOF + * of the destination file, then we can expose stale data from beyond the source + * file EOF in the destination file. + * + * XFS doesn't support partial block sharing, so in both cases we have check + * these cases ourselves. For dedupe, we can simply round the length to dedupe + * down to the previous whole block and ignore the partial EOF block. While this + * means we can't dedupe the last block of a file, this is an acceptible + * tradeoff for simplicity on implementation. + * + * For cloning, we want to share the partial EOF block if it is also the new EOF + * block of the destination file. If the partial EOF block lies inside the + * existing destination EOF, then we have to abort the clone to avoid exposing + * stale data in the destination file. Hence we reject these clone attempts with + * -EINVAL in this case. + */ +STATIC int +xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + u64 *len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); - struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); - xfs_fileoff_t sfsbno, dfsbno; - xfs_filblks_t fsblen; - xfs_extlen_t cowextsize; + u64 blkmask = i_blocksize(inode_in) - 1; ssize_t ret; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return -EOPNOTSUPP; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - /* Lock both files against IO */ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); if (ret) @@ -1245,33 +1327,115 @@ xfs_reflink_remap_range( goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, - &len, is_dedupe); + len, is_dedupe); if (ret <= 0) goto out_unlock; + /* + * If the dedupe data matches, chop off the partial EOF block + * from the source file so we don't try to dedupe the partial + * EOF block. + */ + if (is_dedupe) { + *len &= ~blkmask; + } else if (*len & blkmask) { + /* + * The user is attempting to share a partial EOF block, + * if it's inside the destination EOF then reject it. + */ + if (pos_out + *len < i_size_read(inode_out)) { + ret = -EINVAL; + goto out_unlock; + } + } + /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - /* - * Clear out post-eof preallocations because we don't have page cache - * backing the delayed allocations and they'll never get freed on - * their own. + * Zero existing post-eof speculative preallocations in the destination + * file. */ - if (xfs_can_free_eofblocks(dest, true)) { - ret = xfs_free_eofblocks(dest); - if (ret) - goto out_unlock; - } + ret = xfs_reflink_zero_posteof(dest, pos_out); + if (ret) + goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + *len) - 1); + + /* If we're altering the file contents... */ + if (!is_dedupe) { + /* + * ...update the timestamps (which will grab the ilock again + * from xfs_fs_dirty_inode, so we have to call it before we + * take the ilock). + */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + goto out_unlock; + } + + /* + * ...clear the security bits if the process is not being run + * by root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + goto out_unlock; + } + + return 1; +out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + return ret; +} + +/* + * Link a range of blocks from one file to another. + */ +int +xfs_reflink_remap_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + struct xfs_mount *mp = src->i_mount; + xfs_fileoff_t sfsbno, dfsbno; + xfs_filblks_t fsblen; + xfs_extlen_t cowextsize; + ssize_t ret; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Prepare and then clone file data. */ + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, + &len, is_dedupe); + if (ret <= 0) + return ret; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); @@ -1280,10 +1444,6 @@ xfs_reflink_remap_range( if (ret) goto out_unlock; - /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file @@ -1300,12 +1460,7 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_SHARED); - inode_unlock(inode_out); - if (!same_inode) - inode_unlock_shared(inode_in); + xfs_reflink_remap_unlock(file_in, file_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ad315e83bc02..3043e5ed6495 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -473,7 +473,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index bedc5a5133a5..912b42f5fe4a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -259,6 +259,14 @@ xfs_trans_alloc( struct xfs_trans *tp; int error; + /* + * Allocate the handle before we do our freeze accounting and setting up + * GFP_NOFS allocation context so that we avoid lockdep false positives + * by doing GFP_KERNEL allocations inside sb_start_intwrite(). + */ + tp = kmem_zone_zalloc(xfs_trans_zone, + (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); + if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); @@ -270,8 +278,6 @@ xfs_trans_alloc( mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); atomic_inc(&mp->m_active_trans); - tp = kmem_zone_zalloc(xfs_trans_zone, - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; tp->t_mountp = mp; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 15919f67a88f..286a287ac57a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -322,49 +322,38 @@ xfs_trans_read_buf_map( } /* - * Release the buffer bp which was previously acquired with one of the - * xfs_trans_... buffer allocation routines if the buffer has not - * been modified within this transaction. If the buffer is modified - * within this transaction, do decrement the recursion count but do - * not release the buffer even if the count goes to 0. If the buffer is not - * modified within the transaction, decrement the recursion count and - * release the buffer if the recursion count goes to 0. + * Release a buffer previously joined to the transaction. If the buffer is + * modified within this transaction, decrement the recursion count but do not + * release the buffer even if the count goes to 0. If the buffer is not modified + * within the transaction, decrement the recursion count and release the buffer + * if the recursion count goes to 0. * - * If the buffer is to be released and it was not modified before - * this transaction began, then free the buf_log_item associated with it. + * If the buffer is to be released and it was not already dirty before this + * transaction began, then also free the buf_log_item associated with it. * - * If the transaction pointer is NULL, make this just a normal - * brelse() call. + * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call. */ void xfs_trans_brelse( - xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_trans *tp, + struct xfs_buf *bp) { - struct xfs_buf_log_item *bip; - int freed; + struct xfs_buf_log_item *bip = bp->b_log_item; - /* - * Default to a normal brelse() call if the tp is NULL. - */ - if (tp == NULL) { - ASSERT(bp->b_transp == NULL); + ASSERT(bp->b_transp == tp); + + if (!tp) { xfs_buf_relse(bp); return; } - ASSERT(bp->b_transp == tp); - bip = bp->b_log_item; + trace_xfs_trans_brelse(bip); ASSERT(bip->bli_item.li_type == XFS_LI_BUF); - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); - trace_xfs_trans_brelse(bip); - /* - * If the release is just for a recursive lock, - * then decrement the count and return. + * If the release is for a recursive lookup, then decrement the count + * and return. */ if (bip->bli_recur > 0) { bip->bli_recur--; @@ -372,64 +361,24 @@ xfs_trans_brelse( } /* - * If the buffer is dirty within this transaction, we can't + * If the buffer is invalidated or dirty in this transaction, we can't * release it until we commit. */ if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)) return; - - /* - * If the buffer has been invalidated, then we can't release - * it until the transaction commits to disk unless it is re-dirtied - * as part of this transaction. This prevents us from pulling - * the item from the AIL before we should. - */ if (bip->bli_flags & XFS_BLI_STALE) return; - ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); - /* - * Free up the log item descriptor tracking the released item. + * Unlink the log item from the transaction and clear the hold flag, if + * set. We wouldn't want the next user of the buffer to get confused. */ + ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); xfs_trans_del_item(&bip->bli_item); + bip->bli_flags &= ~XFS_BLI_HOLD; - /* - * Clear the hold flag in the buf log item if it is set. - * We wouldn't want the next user of the buffer to - * get confused. - */ - if (bip->bli_flags & XFS_BLI_HOLD) { - bip->bli_flags &= ~XFS_BLI_HOLD; - } - - /* - * Drop our reference to the buf log item. - */ - freed = atomic_dec_and_test(&bip->bli_refcount); - - /* - * If the buf item is not tracking data in the log, then we must free it - * before releasing the buffer back to the free pool. - * - * If the fs has shutdown and we dropped the last reference, it may fall - * on us to release a (possibly dirty) bli if it never made it to the - * AIL (e.g., the aborted unpin already happened and didn't release it - * due to our reference). Since we're already shutdown and need - * ail_lock, just force remove from the AIL and release the bli here. - */ - if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { - xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - } else if (!(bip->bli_flags & XFS_BLI_DIRTY)) { -/*** - ASSERT(bp->b_pincount == 0); -***/ - ASSERT(atomic_read(&bip->bli_refcount) == 0); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); - xfs_buf_item_relse(bp); - } + /* drop the reference to the bli */ + xfs_buf_item_put(bip); bp->b_transp = NULL; xfs_buf_relse(bp); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7b75ff6e2fce..d7701d466b60 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -68,7 +68,7 @@ */ #ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* @@ -613,8 +613,8 @@ #define EXIT_DATA \ *(.exit.data .exit.data.*) \ - *(.fini_array) \ - *(.dtors) \ + *(.fini_array .fini_array.*) \ + *(.dtors .dtors.*) \ MEM_DISCARD(exit.data*) \ MEM_DISCARD(exit.rodata*) diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index 989f8e52864d..971bb7853776 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -87,9 +87,10 @@ struct drm_client_dev { struct drm_file *file; }; -int drm_client_new(struct drm_device *dev, struct drm_client_dev *client, - const char *name, const struct drm_client_funcs *funcs); +int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, + const char *name, const struct drm_client_funcs *funcs); void drm_client_release(struct drm_client_dev *client); +void drm_client_add(struct drm_client_dev *client); void drm_client_dev_unregister(struct drm_device *dev); void drm_client_dev_hotplug(struct drm_device *dev); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ff20b677fb9f..22254c1fe1c5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -412,6 +412,7 @@ struct cgroup { * specific task are charged to the dom_cgrp. */ struct cgroup *dom_cgrp; + struct cgroup *old_dom_cgrp; /* used while enabling threaded */ /* per-cpu recursive resource statistics */ struct cgroup_rstat_cpu __percpu *rstat_cpu; diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 8942e61f0028..8ab5df769923 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -53,12 +53,20 @@ enum fpga_mgr_states { FPGA_MGR_STATE_OPERATING, }; -/* - * FPGA Manager flags - * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported - * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting - * FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first - * FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed +/** + * DOC: FPGA Manager flags + * + * Flags used in the &fpga_image_info->flags field + * + * %FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported + * + * %FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting + * + * %FPGA_MGR_ENCRYPTED_BITSTREAM: indicates bitstream is encrypted + * + * %FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first + * + * %FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) #define FPGA_MGR_EXTERNAL_CONFIG BIT(1) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c0b4a1c22ff..897eae8faee1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1828,8 +1828,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, struct inode *inode_out, loff_t pos_out, u64 *len, bool is_dedupe); +extern int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + struct file *file_out, loff_t pos_out, u64 len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); @@ -2773,19 +2775,6 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - file_start_write(file_out); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - file_end_write(file_out); - - return ret; -} - /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0ea328e71ec9..a4d5eb37744a 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -95,6 +95,13 @@ struct gpio_irq_chip { unsigned int num_parents; /** + * @parent_irq: + * + * For use by gpiochip_set_cascaded_irqchip() + */ + unsigned int parent_irq; + + /** * @parents: * * A list of interrupt parents of a GPIO chip. This is owned by the diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b68e345f0ca..087fd5f48c91 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pd(struct vm_area_struct *vma, @@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, + pte_t *ptep) +{ + return 0; +} + +static inline void adjust_range_if_pmd_sharing_possible( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} + #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 83a33a1873a6..7f5ca2cd3a32 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -90,6 +90,8 @@ struct mlx5_hairpin { u32 *rqn; u32 *sqn; + + bool peer_gone; }; struct mlx5_hairpin * diff --git a/include/linux/mm.h b/include/linux/mm.h index a61ebe8ad4ca..0416a7204be3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, return vma; } +static inline bool range_in_vma(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + return (vma && vma->vm_start <= start && end <= vma->vm_end); +} + #ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); void vma_set_page_prot(struct vm_area_struct *vma); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1e22d96734e0..d4b0c79d2924 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -668,16 +668,6 @@ typedef struct pglist_data { wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; #endif -#ifdef CONFIG_NUMA_BALANCING - /* Lock serializing the migrate rate limiting window */ - spinlock_t numabalancing_migrate_lock; - - /* Rate limiting time interval */ - unsigned long numabalancing_migrate_next_window; - - /* Number of pages migrated during the rate limiting time interval */ - unsigned long numabalancing_migrate_nr_pages; -#endif /* * This is a per-node reserve of pages that are not available * to userspace allocations. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..d837dad24b4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1730,6 +1730,8 @@ enum netdev_priv_flags { * switch driver and used to set the phys state of the * switch port. * + * @wol_enabled: Wake-on-LAN is enabled + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2014,6 +2016,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; + unsigned wol_enabled:1; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2455,6 +2458,13 @@ struct netdev_notifier_info { struct netlink_ext_ack *extack; }; +struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; +}; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 07efffd0c759..bbe99d2b28b4 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -215,6 +215,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, break; case NFPROTO_ARP: #ifdef CONFIG_NETFILTER_FAMILY_ARP + if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp))) + break; hook_head = rcu_dereference(net->nf.hooks_arp[hook]); #endif break; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 10f92e1d8e7b..bf309ff6f244 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -99,6 +99,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + int (*filter_match)(struct perf_event *event); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index c0e795d95477..1c89611e0e06 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -36,6 +36,7 @@ enum { SCIx_SH4_SCIF_FIFODATA_REGTYPE, SCIx_SH7705_SCIF_REGTYPE, SCIx_HSCIF_REGTYPE, + SCIx_RZ_SCIFA_REGTYPE, SCIx_NR_REGTYPES, }; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5a28ac9284f0..3f529ad9a9d2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -251,6 +251,7 @@ static inline bool idle_should_enter_s2idle(void) return unlikely(s2idle_state == S2IDLE_STATE_ENTER); } +extern bool pm_suspend_via_s2idle(void); extern void __init pm_states_init(void); extern void s2idle_set_ops(const struct platform_s2idle_ops *ops); extern void s2idle_wake(void); @@ -282,6 +283,7 @@ static inline void pm_set_suspend_via_firmware(void) {} static inline void pm_set_resume_via_firmware(void) {} static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } +static inline bool pm_suspend_via_s2idle(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 9397628a1967..cb462f9ab7dd 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -5,6 +5,24 @@ #include <linux/if_vlan.h> #include <uapi/linux/virtio_net.h> +static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_UDP: + skb->protocol = cpu_to_be16(ETH_P_IP); + break; + case VIRTIO_NET_HDR_GSO_TCPV6: + skb->protocol = cpu_to_be16(ETH_P_IPV6); + break; + default: + return -EINVAL; + } + + return 0; +} + static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index ea73fef8bdc0..8586cfb49828 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -38,10 +38,13 @@ struct v4l2_ctrl_handler; * @prio: priority of the file handler, as defined by &enum v4l2_priority * * @wait: event' s wait queue + * @subscribe_lock: serialise changes to the subscribed list; guarantee that + * the add and del event callbacks are orderly called * @subscribed: list of subscribed events * @available: list of events waiting to be dequeued * @navailable: number of available events at @available list * @sequence: event sequence number + * * @m2m_ctx: pointer to &struct v4l2_m2m_ctx */ struct v4l2_fh { @@ -52,6 +55,7 @@ struct v4l2_fh { /* Events */ wait_queue_head_t wait; + struct mutex subscribe_lock; struct list_head subscribed; struct list_head available; unsigned int navailable; diff --git a/include/net/bonding.h b/include/net/bonding.h index a2d058170ea3..b46d68acf701 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -139,12 +139,6 @@ struct bond_parm_tbl { int mode; }; -struct netdev_notify_work { - struct delayed_work work; - struct net_device *dev; - struct netdev_bonding_info bonding_info; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -172,6 +166,7 @@ struct slave { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif + struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8ebabc9873d1..4de121e24ce5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4852,8 +4852,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried. * @freq: the freqency(in MHz) to be queried. - * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if - * irrelevant). This can be used later for deduplication. * @rule: pointer to store the wmm rule from the regulatory db. * * Self-managed wireless drivers can use this function to query diff --git a/include/net/devlink.h b/include/net/devlink.h index b9b89d6604d4..99efc156a309 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -298,7 +298,7 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 -#define DEVLINK_PARAM_MAX_STRING_VALUE 32 +#define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, DEVLINK_PARAM_TYPE_U16, @@ -311,7 +311,7 @@ union devlink_param_value { u8 vu8; u16 vu16; u32 vu32; - const char *vstr; + char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; @@ -553,6 +553,8 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, const char *region_name, u32 region_max_snapshots, @@ -789,6 +791,12 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id) { } +static inline void +devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ +} + static inline struct devlink_region * devlink_region_create(struct devlink *devlink, const char *region_name, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e03b93360f33..a80fd0ac4563 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -130,12 +130,6 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } -static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) -{ - return rcu_dereference_check(ireq->ireq_opt, - refcount_read(&ireq->req.rsk_refcnt) > 0); -} - struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69c91d1934c1..c9b7b136939d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, diff --git a/include/net/netlink.h b/include/net/netlink.h index 0c154f98e987..39e1d875d507 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -153,7 +153,7 @@ * nla_find() find attribute in stream of attributes * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs - * nla_parse_nested() parse nested attribuets + * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= diff --git a/include/soc/fsl/bman.h b/include/soc/fsl/bman.h index eaaf56df4086..5b99cb2ea5ef 100644 --- a/include/soc/fsl/bman.h +++ b/include/soc/fsl/bman.h @@ -126,4 +126,12 @@ int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num); */ int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num); +/** + * bman_is_probed - Check if bman is probed + * + * Returns 1 if the bman driver successfully probed, -1 if the bman driver + * failed to probe or 0 if the bman driver did not probed yet. + */ +int bman_is_probed(void); + #endif /* __FSL_BMAN_H */ diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h index d4dfefdee6c1..597783b8a3a0 100644 --- a/include/soc/fsl/qman.h +++ b/include/soc/fsl/qman.h @@ -1186,4 +1186,12 @@ int qman_alloc_cgrid_range(u32 *result, u32 count); */ int qman_release_cgrid(u32 id); +/** + * qman_is_probed - Check if qman is probed + * + * Returns 1 if the qman driver successfully probed, -1 if the qman driver + * failed to probe or 0 if the qman driver did not probed yet. + */ +int qman_is_probed(void); + #endif /* __FSL_QMAN_H */ diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 711372845945..705b33d1e395 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -70,33 +70,6 @@ TRACE_EVENT(mm_migrate_pages, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); - -TRACE_EVENT(mm_numa_migrate_ratelimit, - - TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages), - - TP_ARGS(p, dst_nid, nr_pages), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN) - __field( pid_t, pid) - __field( int, dst_nid) - __field( unsigned long, nr_pages) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->dst_nid = dst_nid; - __entry->nr_pages = nr_pages; - ), - - TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu", - __entry->comm, - __entry->pid, - __entry->dst_nid, - __entry->nr_pages) -); #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 196587b8f204..573d5b901fb1 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -56,7 +56,6 @@ enum rxrpc_peer_trace { rxrpc_peer_new, rxrpc_peer_processing, rxrpc_peer_put, - rxrpc_peer_queued_error, }; enum rxrpc_conn_trace { @@ -257,8 +256,7 @@ enum rxrpc_tx_point { EM(rxrpc_peer_got, "GOT") \ EM(rxrpc_peer_new, "NEW") \ EM(rxrpc_peer_processing, "PRO") \ - EM(rxrpc_peer_put, "PUT") \ - E_(rxrpc_peer_queued_error, "QER") + E_(rxrpc_peer_put, "PUT") #define rxrpc_conn_traces \ EM(rxrpc_conn_got, "GOT") \ @@ -933,6 +931,7 @@ TRACE_EVENT(rxrpc_tx_packet, TP_fast_assign( __entry->call = call_id; memcpy(&__entry->whdr, whdr, sizeof(__entry->whdr)); + __entry->where = where; ), TP_printk("c=%08x %08x:%08x:%08x:%04x %08x %08x %02x %02x %s %s", diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h index e4732d3c2998..b0f8e87235bd 100644 --- a/include/uapi/asm-generic/hugetlb_encode.h +++ b/include/uapi/asm-generic/hugetlb_encode.h @@ -26,7 +26,9 @@ #define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h index 015a4c0bbb47..7a8a26751c23 100644 --- a/include/uapi/linux/memfd.h +++ b/include/uapi/linux/memfd.h @@ -25,7 +25,9 @@ #define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB #define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB #define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index bfd5938fede6..d0f515d53299 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -28,7 +28,9 @@ #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index dde1344f047c..6507ad0afc81 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -65,7 +65,9 @@ struct shmid_ds { #define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define SHM_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB #define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define SHM_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB #define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index ac9e8c96d9bd..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -18,14 +18,17 @@ struct smc_diag_req { * on the internal clcsock, and more SMC-related socket data */ struct smc_diag_msg { - __u8 diag_family; - __u8 diag_state; - __u8 diag_mode; - __u8 diag_shutdown; + __u8 diag_family; + __u8 diag_state; + union { + __u8 diag_mode; + __u8 diag_fallback; /* the old name of the field */ + }; + __u8 diag_shutdown; struct inet_diag_sockid id; - __u32 diag_uid; - __u64 diag_inode; + __u32 diag_uid; + __aligned_u64 diag_inode; }; /* Mode of a connection */ @@ -99,11 +102,11 @@ struct smc_diag_fallback { }; struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ - __u32 linkid; /* Link identifier */ - __u64 peer_gid; /* Peer GID */ - __u64 my_gid; /* My GID */ - __u64 token; /* Token of DMB */ - __u64 peer_token; /* Token of remote DMBE */ + __u32 linkid; /* Link identifier */ + __aligned_u64 peer_gid; /* Peer GID */ + __aligned_u64 my_gid; /* My GID */ + __aligned_u64 token; /* Token of DMB */ + __aligned_u64 peer_token; /* Token of remote DMBE */ }; #endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09d00f8c442b..09502de447f5 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -40,5 +40,6 @@ struct udphdr { #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ +#define UDP_ENCAP_RXRPC 6 #endif /* _UAPI_LINUX_UDP_H */ diff --git a/ipc/shm.c b/ipc/shm.c index 4cd402e4cfeb..1c65fb357395 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -206,7 +206,7 @@ err: * Callers of shm_lock() must validate the status of the returned ipc * object pointer and error out as appropriate. */ - return (void *)ipcp; + return ERR_CAST(ipcp); } static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 22ad967d1e5f..830d7f095748 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -129,7 +129,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; - if (flags & BPF_NOEXIST) + if (flags != BPF_ANY && flags != BPF_EXIST) return -EINVAL; storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, @@ -195,6 +195,9 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) return ERR_PTR(-EINVAL); + if (attr->value_size == 0) + return ERR_PTR(-EINVAL); + if (attr->value_size > PAGE_SIZE) return ERR_PTR(-E2BIG); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb07e74b34a2..465952a8e465 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2896,6 +2896,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, u64 umin_val, umax_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + if (insn_bitness == 32) { + /* Relevant for 32-bit RSH: Information can propagate towards + * LSB, so it isn't sufficient to only truncate the output to + * 32 bits. + */ + coerce_reg_to_size(dst_reg, 4); + coerce_reg_to_size(&src_reg, 4); + } + smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; umin_val = src_reg.umin_value; @@ -3131,7 +3140,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops are (32,32)->32 */ coerce_reg_to_size(dst_reg, 4); - coerce_reg_to_size(&src_reg, 4); } __reg_deduce_bounds(dst_reg); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index aae10baf1902..4a3dae2a8283 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2836,11 +2836,12 @@ restart: } /** - * cgroup_save_control - save control masks of a subtree + * cgroup_save_control - save control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * - * Save ->subtree_control and ->subtree_ss_mask to the respective old_ - * prefixed fields for @cgrp's subtree including @cgrp itself. + * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the + * respective old_ prefixed fields for @cgrp's subtree including @cgrp + * itself. */ static void cgroup_save_control(struct cgroup *cgrp) { @@ -2850,6 +2851,7 @@ static void cgroup_save_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { dsct->old_subtree_control = dsct->subtree_control; dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; + dsct->old_dom_cgrp = dsct->dom_cgrp; } } @@ -2875,11 +2877,12 @@ static void cgroup_propagate_control(struct cgroup *cgrp) } /** - * cgroup_restore_control - restore control masks of a subtree + * cgroup_restore_control - restore control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * - * Restore ->subtree_control and ->subtree_ss_mask from the respective old_ - * prefixed fields for @cgrp's subtree including @cgrp itself. + * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the + * respective old_ prefixed fields for @cgrp's subtree including @cgrp + * itself. */ static void cgroup_restore_control(struct cgroup *cgrp) { @@ -2889,6 +2892,7 @@ static void cgroup_restore_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { dsct->subtree_control = dsct->old_subtree_control; dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; + dsct->dom_cgrp = dsct->old_dom_cgrp; } } @@ -3196,6 +3200,8 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup *dom_cgrp = parent->dom_cgrp; + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; int ret; lockdep_assert_held(&cgroup_mutex); @@ -3225,12 +3231,13 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) */ cgroup_save_control(cgrp); - cgrp->dom_cgrp = dom_cgrp; + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) + if (dsct == cgrp || cgroup_is_threaded(dsct)) + dsct->dom_cgrp = dom_cgrp; + ret = cgroup_apply_control(cgrp); if (!ret) parent->nr_threaded_children++; - else - cgrp->dom_cgrp = cgrp; cgroup_finalize_control(cgrp, ret); return ret; diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 0be047dbd897..65a3b7e55b9f 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work) { struct test_cycle *cycle = container_of(work, typeof(*cycle), work); struct ww_acquire_ctx ctx; - int err; + int err, erra = 0; ww_acquire_init(&ctx, &ww_class); ww_mutex_lock(&cycle->a_mutex, &ctx); @@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work) err = ww_mutex_lock(cycle->b_mutex, &ctx); if (err == -EDEADLK) { + err = 0; ww_mutex_unlock(&cycle->a_mutex); ww_mutex_lock_slow(cycle->b_mutex, &ctx); - err = ww_mutex_lock(&cycle->a_mutex, &ctx); + erra = ww_mutex_lock(&cycle->a_mutex, &ctx); } if (!err) ww_mutex_unlock(cycle->b_mutex); - ww_mutex_unlock(&cycle->a_mutex); + if (!erra) + ww_mutex_unlock(&cycle->a_mutex); ww_acquire_fini(&ctx); - cycle->result = err; + cycle->result = err ?: erra; } static int __test_cycle(unsigned int nthreads) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5342f6fc022e..0bd595a0b610 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -63,6 +63,12 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head); enum s2idle_states __read_mostly s2idle_state; static DEFINE_RAW_SPINLOCK(s2idle_lock); +bool pm_suspend_via_s2idle(void) +{ + return mem_sleep_current == PM_SUSPEND_TO_IDLE; +} +EXPORT_SYMBOL_GPL(pm_suspend_via_s2idle); + void s2idle_set_ops(const struct platform_s2idle_ops *ops) { lock_system_sleep(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 625bc9897f62..ad97f3ba5ec5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1167,7 +1167,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_cpu(p) != new_cpu) { if (p->sched_class->migrate_task_rq) - p->sched_class->migrate_task_rq(p); + p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; rseq_migrate(p); perf_event_task_migrate(p); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 997ea7b839fa..91e4202b0634 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1607,7 +1607,7 @@ out: return cpu; } -static void migrate_task_rq_dl(struct task_struct *p) +static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused) { struct rq *rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f808ddf2a868..7fc4a371bdd2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1392,6 +1392,17 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, int last_cpupid, this_cpupid; this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); + last_cpupid = page_cpupid_xchg_last(page, this_cpupid); + + /* + * Allow first faults or private faults to migrate immediately early in + * the lifetime of a task. The magic number 4 is based on waiting for + * two full passes of the "multi-stage node selection" test that is + * executed below. + */ + if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) && + (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid))) + return true; /* * Multi-stage node selection is used in conjunction with a periodic @@ -1410,7 +1421,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, * This quadric squishes small probabilities, making it less likely we * act on an unlikely task<->page relation. */ - last_cpupid = page_cpupid_xchg_last(page, this_cpupid); if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != dst_nid) return false; @@ -1514,6 +1524,21 @@ struct task_numa_env { static void task_numa_assign(struct task_numa_env *env, struct task_struct *p, long imp) { + struct rq *rq = cpu_rq(env->dst_cpu); + + /* Bail out if run-queue part of active NUMA balance. */ + if (xchg(&rq->numa_migrate_on, 1)) + return; + + /* + * Clear previous best_cpu/rq numa-migrate flag, since task now + * found a better CPU to move/swap. + */ + if (env->best_cpu != -1) { + rq = cpu_rq(env->best_cpu); + WRITE_ONCE(rq->numa_migrate_on, 0); + } + if (env->best_task) put_task_struct(env->best_task); if (p) @@ -1553,6 +1578,13 @@ static bool load_too_imbalanced(long src_load, long dst_load, } /* + * Maximum NUMA importance can be 1998 (2*999); + * SMALLIMP @ 30 would be close to 1998/64. + * Used to deter task migration. + */ +#define SMALLIMP 30 + +/* * This checks if the overall compute and NUMA accesses of the system would * be improved if the source tasks was migrated to the target dst_cpu taking * into account that it might be best if task running on the dst_cpu should @@ -1569,6 +1601,9 @@ static void task_numa_compare(struct task_numa_env *env, long moveimp = imp; int dist = env->dist; + if (READ_ONCE(dst_rq->numa_migrate_on)) + return; + rcu_read_lock(); cur = task_rcu_dereference(&dst_rq->curr); if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) @@ -1582,7 +1617,7 @@ static void task_numa_compare(struct task_numa_env *env, goto unlock; if (!cur) { - if (maymove || imp > env->best_imp) + if (maymove && moveimp >= env->best_imp) goto assign; else goto unlock; @@ -1625,16 +1660,22 @@ static void task_numa_compare(struct task_numa_env *env, task_weight(cur, env->dst_nid, dist); } - if (imp <= env->best_imp) - goto unlock; - if (maymove && moveimp > imp && moveimp > env->best_imp) { - imp = moveimp - 1; + imp = moveimp; cur = NULL; goto assign; } /* + * If the NUMA importance is less than SMALLIMP, + * task migration might only result in ping pong + * of tasks and also hurt performance due to cache + * misses. + */ + if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2) + goto unlock; + + /* * In the overloaded case, try and keep the load balanced. */ load = task_h_load(env->p) - task_h_load(cur); @@ -1710,6 +1751,7 @@ static int task_numa_migrate(struct task_struct *p) .best_cpu = -1, }; struct sched_domain *sd; + struct rq *best_rq; unsigned long taskweight, groupweight; int nid, ret, dist; long taskimp, groupimp; @@ -1805,20 +1847,17 @@ static int task_numa_migrate(struct task_struct *p) if (env.best_cpu == -1) return -EAGAIN; - /* - * Reset the scan period if the task is being rescheduled on an - * alternative node to recheck if the tasks is now properly placed. - */ - p->numa_scan_period = task_scan_start(p); - + best_rq = cpu_rq(env.best_cpu); if (env.best_task == NULL) { ret = migrate_task_to(p, env.best_cpu); + WRITE_ONCE(best_rq->numa_migrate_on, 0); if (ret != 0) trace_sched_stick_numa(p, env.src_cpu, env.best_cpu); return ret; } ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu); + WRITE_ONCE(best_rq->numa_migrate_on, 0); if (ret != 0) trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task)); @@ -2596,6 +2635,39 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr) } } +static void update_scan_period(struct task_struct *p, int new_cpu) +{ + int src_nid = cpu_to_node(task_cpu(p)); + int dst_nid = cpu_to_node(new_cpu); + + if (!static_branch_likely(&sched_numa_balancing)) + return; + + if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING)) + return; + + if (src_nid == dst_nid) + return; + + /* + * Allow resets if faults have been trapped before one scan + * has completed. This is most likely due to a new task that + * is pulled cross-node due to wakeups or load balancing. + */ + if (p->numa_scan_seq) { + /* + * Avoid scan adjustments if moving to the preferred + * node or if the task was not previously running on + * the preferred node. + */ + if (dst_nid == p->numa_preferred_nid || + (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid)) + return; + } + + p->numa_scan_period = task_scan_start(p); +} + #else static void task_tick_numa(struct rq *rq, struct task_struct *curr) { @@ -2609,6 +2681,10 @@ static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p) { } +static inline void update_scan_period(struct task_struct *p, int new_cpu) +{ +} + #endif /* CONFIG_NUMA_BALANCING */ static void @@ -6275,7 +6351,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se); * cfs_rq_of(p) references at time of call are still valid and identify the * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held. */ -static void migrate_task_rq_fair(struct task_struct *p) +static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) { /* * As blocked tasks retain absolute vruntime the migration needs to @@ -6328,6 +6404,8 @@ static void migrate_task_rq_fair(struct task_struct *p) /* We have migrated, no longer consider this task hot */ p->se.exec_start = 0; + + update_scan_period(p, new_cpu); } static void task_dead_fair(struct task_struct *p) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4a2e8cae63c4..455fa330de04 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -783,6 +783,7 @@ struct rq { #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; + unsigned int numa_migrate_on; #endif #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; @@ -1523,7 +1524,7 @@ struct sched_class { #ifdef CONFIG_SMP int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); - void (*migrate_task_rq)(struct task_struct *p); + void (*migrate_task_rq)(struct task_struct *p, int new_cpu); void (*task_woken)(struct rq *this_rq, struct task_struct *task); diff --git a/lib/Makefile b/lib/Makefile index ca3f7ebb900d..423876446810 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -119,7 +119,6 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o -CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500) obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ diff --git a/lib/bch.c b/lib/bch.c index 7b0f2006698b..5db6d3a4c8a6 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -79,20 +79,19 @@ #define GF_T(_p) (CONFIG_BCH_CONST_T) #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) #define BCH_MAX_M (CONFIG_BCH_CONST_M) +#define BCH_MAX_T (CONFIG_BCH_CONST_T) #else #define GF_M(_p) ((_p)->m) #define GF_T(_p) ((_p)->t) #define GF_N(_p) ((_p)->n) -#define BCH_MAX_M 15 +#define BCH_MAX_M 15 /* 2KB */ +#define BCH_MAX_T 64 /* 64 bit correction */ #endif -#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M) - #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) #define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32) -#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8) #ifndef dbg #define dbg(_fmt, args...) do {} while (0) @@ -202,6 +201,9 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, const uint32_t * const tab3 = tab2 + 256*(l+1); const uint32_t *pdata, *p0, *p1, *p2, *p3; + if (WARN_ON(r_bytes > sizeof(r))) + return; + if (ecc) { /* load ecc parity bytes into internal 32-bit buffer */ load_ecc8(bch, bch->ecc_buf, ecc); @@ -1285,6 +1287,13 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) */ goto fail; + if (t > BCH_MAX_T) + /* + * we can support larger than 64 bits if necessary, at the + * cost of higher stack usage. + */ + goto fail; + /* sanity checks */ if ((t < 1) || (m*t >= ((1 << m)-1))) /* invalid t value */ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d5b3a3f95c01..812e59e13fe6 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2794,7 +2794,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) copy = end - str; memcpy(str, args, copy); str += len; - args += len; + args += len + 1; } } if (process) diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 25a5d87e2e4c..912aae5fa09e 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -15,7 +15,6 @@ * but they are bigger and use more memory for the lookup table. */ -#include <linux/crc32poly.h> #include "xz_private.h" /* diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index 482b90f363fe..09360ebb510e 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -102,6 +102,10 @@ # endif #endif +#ifndef CRC32_POLY_LE +#define CRC32_POLY_LE 0xedb88320 +#endif + /* * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used * before calling xz_dec_lzma2_run(). diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index 6a473709e9b6..7405c9d89d65 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd, struct gup_benchmark *gup) { ktime_t start_time, end_time; - unsigned long i, nr, nr_pages, addr, next; + unsigned long i, nr_pages, addr, next; + int nr; struct page **pages; nr_pages = gup->size / PAGE_SIZE; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 533f9b00147d..58269f8ba7c4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2885,9 +2885,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, if (!(pvmw->pmd && !pvmw->pte)) return; - mmu_notifier_invalidate_range_start(mm, address, - address + HPAGE_PMD_SIZE); - flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = *pvmw->pmd; pmdp_invalidate(vma, address, pvmw->pmd); @@ -2900,9 +2897,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, set_pmd_at(mm, address, pvmw->pmd, pmdswp); page_remove_rmap(page, true); put_page(page); - - mmu_notifier_invalidate_range_end(mm, address, - address + HPAGE_PMD_SIZE); } void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) @@ -2931,7 +2925,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) else page_add_file_rmap(new, true); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) mlock_vma_page(new); update_mmu_cache_pmd(vma, address, pvmw->pmd); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3c21775f196b..5c390f5a5207 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - const unsigned long mmun_start = start; /* For mmu_notifiers */ - const unsigned long mmun_end = end; /* For mmu_notifiers */ + unsigned long mmun_start = start; /* For mmu_notifiers */ + unsigned long mmun_end = end; /* For mmu_notifiers */ WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, */ tlb_remove_check_page_size_change(tlb, sz); tlb_start_vma(tlb, vma); + + /* + * If sharing possible, alert mmu notifiers of worst case. + */ + adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); address = start; for (; address < end; address += sz) { @@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { spin_unlock(ptl); + /* + * We just unmapped a page of PMDs by clearing a PUD. + * The caller's TLB flush range should cover this area. + */ continue; } @@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm; struct mmu_gather tlb; + unsigned long tlb_start = start; + unsigned long tlb_end = end; + + /* + * If shared PMDs were possibly used within this vma range, adjust + * start/end for worst case tlb flushing. + * Note that we can not be sure if PMDs are shared until we try to + * unmap pages. However, we want to make sure TLB flushing covers + * the largest possible range. + */ + adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); mm = vma->vm_mm; - tlb_gather_mmu(&tlb, mm, start, end); + tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); __unmap_hugepage_range(&tlb, vma, start, end, ref_page); - tlb_finish_mmu(&tlb, start, end); + tlb_finish_mmu(&tlb, tlb_start, tlb_end); } /* @@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, pte_t pte; struct hstate *h = hstate_vma(vma); unsigned long pages = 0; + unsigned long f_start = start; + unsigned long f_end = end; + bool shared_pmd = false; + + /* + * In the case of shared PMDs, the area to flush could be beyond + * start/end. Set f_start/f_end to cover the maximum possible + * range if PMD sharing is possible. + */ + adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end); BUG_ON(address >= end); - flush_cache_range(vma, address, end); + flush_cache_range(vma, f_start, f_end); - mmu_notifier_invalidate_range_start(mm, start, end); + mmu_notifier_invalidate_range_start(mm, f_start, f_end); i_mmap_lock_write(vma->vm_file->f_mapping); for (; address < end; address += huge_page_size(h)) { spinlock_t *ptl; @@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (huge_pmd_unshare(mm, &address, ptep)) { pages++; spin_unlock(ptl); + shared_pmd = true; continue; } pte = huge_ptep_get(ptep); @@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare * may have cleared our pud entry and done put_page on the page table: * once we release i_mmap_rwsem, another task can do the final put_page - * and that page table be reused and filled with junk. + * and that page table be reused and filled with junk. If we actually + * did unshare a page of pmds, flush the range corresponding to the pud. */ - flush_hugetlb_tlb_range(vma, start, end); + if (shared_pmd) + flush_hugetlb_tlb_range(vma, f_start, f_end); + else + flush_hugetlb_tlb_range(vma, start, end); /* * No need to call mmu_notifier_invalidate_range() we are downgrading * page table protection not changing it to point to a new page. @@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * See Documentation/vm/mmu_notifier.rst */ i_mmap_unlock_write(vma->vm_file->f_mapping); - mmu_notifier_invalidate_range_end(mm, start, end); + mmu_notifier_invalidate_range_end(mm, f_start, f_end); return pages << h->order; } @@ -4545,13 +4580,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) /* * check on proper vm_flags and page table alignment */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) + if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) return true; return false; } /* + * Determine if start,end range within vma could be mapped by shared pmd. + * If yes, adjust start and end to cover range associated with possible + * shared pmd mappings. + */ +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + unsigned long check_addr = *start; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { + unsigned long a_start = check_addr & PUD_MASK; + unsigned long a_end = a_start + PUD_SIZE; + + /* + * If sharing is possible, adjust start/end if necessary. + */ + if (range_in_vma(vma, a_start, a_end)) { + if (a_start < *start) + *start = a_start; + if (a_end > *end) + *end = a_end; + } + } +} + +/* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the * !shared pmd case because we can allocate the pmd later as well, it makes the @@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } + +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} #define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ diff --git a/mm/madvise.c b/mm/madvise.c index 972a9eaa898b..71d21df2a3f3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma, new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - if (new_flags & VM_SPECIAL) { + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { error = -EINVAL; goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index d6a2e89b086a..84381b55b2bd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) mlock_vma_page(new); + if (PageTransHuge(page) && PageMlocked(page)) + clear_page_mlock(page); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, pvmw.address, pvmw.pte); } @@ -1411,7 +1414,7 @@ retry: * we encounter them after the rest of the list * is processed. */ - if (PageTransHuge(page)) { + if (PageTransHuge(page) && !PageHuge(page)) { lock_page(page); rc = split_huge_page_to_list(page, from); unlock_page(page); @@ -1855,46 +1858,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page, return newpage; } -/* - * page migration rate limiting control. - * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs - * window of time. Default here says do not migrate more than 1280M per second. - */ -static unsigned int migrate_interval_millisecs __read_mostly = 100; -static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); - -/* Returns true if the node is migrate rate-limited after the update */ -static bool numamigrate_update_ratelimit(pg_data_t *pgdat, - unsigned long nr_pages) -{ - /* - * Rate-limit the amount of data that is being migrated to a node. - * Optimal placement is no good if the memory bus is saturated and - * all the time is being spent migrating! - */ - if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { - spin_lock(&pgdat->numabalancing_migrate_lock); - pgdat->numabalancing_migrate_nr_pages = 0; - pgdat->numabalancing_migrate_next_window = jiffies + - msecs_to_jiffies(migrate_interval_millisecs); - spin_unlock(&pgdat->numabalancing_migrate_lock); - } - if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { - trace_mm_numa_migrate_ratelimit(current, pgdat->node_id, - nr_pages); - return true; - } - - /* - * This is an unlocked non-atomic update so errors are possible. - * The consequences are failing to migrate when we potentiall should - * have which is not severe enough to warrant locking. If it is ever - * a problem, it can be converted to a per-cpu counter. - */ - pgdat->numabalancing_migrate_nr_pages += nr_pages; - return false; -} - static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { int page_lru; @@ -1967,14 +1930,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, if (page_is_file_cache(page) && PageDirty(page)) goto out; - /* - * Rate-limit the amount of data that is being migrated to a node. - * Optimal placement is no good if the memory bus is saturated and - * all the time is being spent migrating! - */ - if (numamigrate_update_ratelimit(pgdat, 1)) - goto out; - isolated = numamigrate_isolate_page(pgdat, page); if (!isolated) goto out; @@ -2021,14 +1976,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, unsigned long mmun_start = address & HPAGE_PMD_MASK; unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; - /* - * Rate-limit the amount of data that is being migrated to a node. - * Optimal placement is no good if the memory bus is saturated and - * all the time is being spent migrating! - */ - if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR)) - goto out_dropref; - new_page = alloc_pages_node(node, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), HPAGE_PMD_ORDER); @@ -2125,7 +2072,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, out_fail: count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); -out_dropref: ptl = pmd_lock(mm, pmd); if (pmd_same(*pmd, entry)) { entry = pmd_modify(entry, vma->vm_page_prot); diff --git a/mm/mmap.c b/mm/mmap.c index 5f2b2b184c60..f7cd9cb966c0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (flags & MAP_FIXED_NOREPLACE) { struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && vma->vm_start <= addr) + if (vma && vma->vm_start < addr + len) return -EEXIST; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 89d2a2ab3fe6..e2ef1c17942f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6193,17 +6193,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages, return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; } -#ifdef CONFIG_NUMA_BALANCING -static void pgdat_init_numabalancing(struct pglist_data *pgdat) -{ - spin_lock_init(&pgdat->numabalancing_migrate_lock); - pgdat->numabalancing_migrate_nr_pages = 0; - pgdat->numabalancing_migrate_next_window = jiffies; -} -#else -static void pgdat_init_numabalancing(struct pglist_data *pgdat) {} -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void pgdat_init_split_queue(struct pglist_data *pgdat) { @@ -6228,7 +6217,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) { pgdat_resize_init(pgdat); - pgdat_init_numabalancing(pgdat); pgdat_init_split_queue(pgdat); pgdat_init_kcompactd(pgdat); diff --git a/mm/percpu.c b/mm/percpu.c index a749d4d96e3e..4b90682623e9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1212,6 +1212,7 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; + pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); pcpu_mem_free(chunk->alloc_map); pcpu_mem_free(chunk); diff --git a/mm/rmap.c b/mm/rmap.c index eb477809a5c0..1e79fac3186b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* - * We have to assume the worse case ie pmd for invalidation. Note that - * the page can not be free in this function as call of try_to_unmap() - * must hold a reference on the page. + * For THP, we have to assume the worse case ie pmd for invalidation. + * For hugetlb, it could be much worse if we need to do pud + * invalidation in the case of pmd sharing. + * + * Note that the page can not be free in this function as call of + * try_to_unmap() must hold a reference on the page. */ end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); + if (PageHuge(page)) { + /* + * If sharing is possible, start and end will be adjusted + * accordingly. + */ + adjust_range_if_pmd_sharing_possible(vma, &start, &end); + } mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); while (page_vma_mapped_walk(&pvmw)) { @@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; + if (PageHuge(page)) { + if (huge_pmd_unshare(mm, &address, pvmw.pte)) { + /* + * huge_pmd_unshare unmapped an entire PMD + * page. There is no way of knowing exactly + * which PMDs may be cached for this mm, so + * we must flush them all. start/end were + * already adjusted above to cover this range. + */ + flush_cache_range(vma, start, end); + flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); + + /* + * The ref count of the PMD page was dropped + * which is part of the way map counting + * is done for shared PMDs. Return 'true' + * here. When there is no other sharing, + * huge_pmd_unshare returns false and we will + * unmap the actual page and drop map count + * to zero. + */ + page_vma_mapped_walk_done(&pvmw); + break; + } + } if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && diff --git a/mm/vmscan.c b/mm/vmscan.c index c7ce2c161225..c5ef7240cbcb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -580,8 +580,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { struct memcg_shrinker_map *map; - unsigned long freed = 0; - int ret, i; + unsigned long ret, freed = 0; + int i; if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)) return 0; @@ -677,9 +677,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { + unsigned long ret, freed = 0; struct shrinker *shrinker; - unsigned long freed = 0; - int ret; if (!mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); diff --git a/mm/vmstat.c b/mm/vmstat.c index 8ba0870ecddd..7878da76abf2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1275,6 +1275,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", +#else + "", /* nr_tlb_remote_flush */ + "", /* nr_tlb_remote_flush_received */ #endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", @@ -1283,7 +1286,6 @@ const char * const vmstat_text[] = { #ifdef CONFIG_DEBUG_VM_VMACACHE "vmacache_find_calls", "vmacache_find_hits", - "vmacache_full_flushes", #endif #ifdef CONFIG_SWAP "swap_ra", diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3bdc8f3ca259..ccce954f8146 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2434,9 +2434,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -2450,8 +2449,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3a7b0773536b..73f7211d0431 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2422,30 +2422,51 @@ unlock: return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 0ff6247eaa6c..121edadd5f8d 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -181,7 +181,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index f0fc182d3db7..b64e1649993b 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -59,7 +59,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.is_set = is_set; req.pid = current->pid; req.cmd = optname; - req.addr = (long)optval; + req.addr = (long __force __user)optval; req.len = optlen; mutex_lock(&bpfilter_lock); if (!info.pid) @@ -98,7 +98,7 @@ static int __init load_umh(void) pr_info("Loaded bpfilter_umh pid %d\n", info.pid); /* health check that usermode process started correctly */ - if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { + if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { stop_umh(); return -EFAULT; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 6e0dc6bcd32a..37278dc280eb 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -835,7 +835,8 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; } diff --git a/net/core/dev.c b/net/core/dev.c index 82114e1111e6..93243479085f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +/** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ +static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) +{ + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, &info.info); +} + #ifdef CONFIG_NET_INGRESS static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); @@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index 8c0ed225e280..6bc42933be4a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2995,6 +2995,8 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + int len; + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) return -EINVAL; @@ -3010,10 +3012,13 @@ devlink_param_value_get_from_info(const struct devlink_param *param, value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); break; case DEVLINK_PARAM_TYPE_STRING: - if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) > - DEVLINK_PARAM_MAX_STRING_VALUE) + len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), + nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + strcpy(value->vstr, + nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); break; case DEVLINK_PARAM_TYPE_BOOL: value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? @@ -3100,7 +3105,10 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, return -EOPNOTSUPP; if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) { - param_item->driverinit_value = value; + if (param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, value.vstr); + else + param_item->driverinit_value = value; param_item->driverinit_value_valid = true; } else { if (!param->set) @@ -4540,7 +4548,10 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - *init_val = param_item->driverinit_value; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(init_val->vstr, param_item->driverinit_value.vstr); + else + *init_val = param_item->driverinit_value; return 0; } @@ -4571,7 +4582,10 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - param_item->driverinit_value = init_val; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); @@ -4604,6 +4618,23 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** + * devlink_param_value_str_fill - Safely fill-up the string preventing + * from overflow of the preallocated buffer + * + * @dst_val: destination devlink_param_value + * @src: source buffer + */ +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ + size_t len; + + len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE); + WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE); +} +EXPORT_SYMBOL_GPL(devlink_param_value_str_fill); + +/** * devlink_region_create - create a new address region * * @devlink: devlink diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 234a0ec2e932..0762aaf8e964 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1483,6 +1483,7 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol; + int ret; if (!dev->ethtool_ops->set_wol) return -EOPNOTSUPP; @@ -1490,7 +1491,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (copy_from_user(&wol, useraddr, sizeof(wol))) return -EFAULT; - return dev->ethtool_ops->set_wol(dev, &wol); + ret = dev->ethtool_ops->set_wol(dev, &wol); + if (ret) + return ret; + + dev->wol_enabled = !!wol.wolopts; + + return 0; } static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3219a2932463..de1d1ba92f2d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work) } } -/* - * Check whether delayed processing was scheduled for our NIC. If so, - * we attempt to grab the poll lock and use ->poll() to pump the card. - * If this fails, either we've recursed in ->poll() or it's already - * running on another CPU. - * - * Note: we don't mask interrupts with this lock because we're using - * trylock here and interrupts are already disabled in the softirq - * case. Further, we test the poll_owner to avoid recursion on UP - * systems where the lock doesn't exist. - */ static void poll_one_napi(struct napi_struct *napi) { - int work = 0; - - /* net_rx_action's ->poll() invocations and our's are - * synchronized by this test which is only made while - * holding the napi->poll_lock. - */ - if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return; + int work; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need @@ -330,6 +312,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; + rcu_read_lock_bh(); lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); @@ -374,6 +357,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } + rcu_read_unlock_bh(); } EXPORT_SYMBOL(netpoll_send_skb_on_dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 63ce2283a456..37c7936124e6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1898,10 +1898,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); tgt_net = get_target_net(skb->sk, netnsid); - if (IS_ERR(tgt_net)) { - tgt_net = net; - netnsid = -1; - } + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); } if (tb[IFLA_EXT_MASK]) @@ -2837,6 +2835,12 @@ struct net_device *rtnl_create_link(struct net *net, else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); + if (num_tx_queues < 1 || num_tx_queues > 4096) + return ERR_PTR(-EINVAL); + + if (num_rx_queues < 1 || num_rx_queues > 4096) + return ERR_PTR(-EINVAL); + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, ops->setup, num_tx_queues, num_rx_queues); if (!dev) @@ -3744,16 +3748,27 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; - err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, NULL); - if (err < 0) { - return -EINVAL; - } else if (err == 0) { - if (tb[IFLA_MASTER]) - br_idx = nla_get_u32(tb[IFLA_MASTER]); - } + /* A hack to preserve kernel<->userspace interface. + * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. + * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. + * So, check for ndmsg with an optional u32 attribute (not used here). + * Fortunately these sizes don't conflict with the size of ifinfomsg + * with an optional attribute. + */ + if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) && + (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) + + nla_attr_size(sizeof(u32)))) { + err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, NULL); + if (err < 0) { + return -EINVAL; + } else if (err == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } - brport_idx = ifm->ifi_index; + brport_idx = ifm->ifi_index; + } if (br_idx) { br_dev = __dev_get_by_index(net, br_idx); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2c807f67aba..428094b577fc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4452,14 +4452,16 @@ EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { - if (unlikely(start > skb_headlen(skb)) || - unlikely((int)start + off > skb_headlen(skb) - 2)) { - net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", - start, off, skb_headlen(skb)); + u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); + u32 csum_start = skb_headroom(skb) + (u32)start; + + if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { + net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", + start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + start; + skb->csum_start = csum_start; skb->csum_offset = off; skb_set_transport_header(skb, start); return true; diff --git a/net/dccp/input.c b/net/dccp/input.c index d28d46bff6ab..85d6c879383d 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,11 +606,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; consume_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b08feb219b44..8e08cea6f178 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2998b0e47d4b..0113993e9b2c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f3c89ccf14c5..446204ca7406 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return NOTIFY_DONE; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dfd5009f96ef..15e7f7915a21 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -544,7 +544,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = ireq_opt_deref(ireq); + rcu_read_lock(); + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, @@ -558,11 +559,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c0fe5ad996f2..26c36cccabdc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - const struct iphdr *iph = ip_hdr(skb); __be16 *ports; int end; @@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = iph->daddr; + sin.sin_addr.s_addr = ip_hdr(skb)->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b678466da451..8501554e96a4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1001,21 +1001,22 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b92f422f2fa8..891ed2f91467 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,6 +48,7 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; +static u32 u32_max_div_HZ = UINT_MAX / HZ; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -745,9 +746,10 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_probe_interval", .data = &init_net.ipv4.sysctl_tcp_probe_interval, - .maxlen = sizeof(int), + .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec_minmax, + .extra2 = &u32_max_div_HZ, }, { .procname = "igmp_link_local_mcast_reports", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4cf2f7bb2802..47e08c1b5bc3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6009,11 +6009,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c09eddbb78..cd426313a298 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -943,9 +943,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d69dd6fa7e8..c32a4c16b7ff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1627,7 +1627,7 @@ busy_check: *err = error; return NULL; } -EXPORT_SYMBOL_GPL(__skb_recv_udp); +EXPORT_SYMBOL(__skb_recv_udp); /* * This should be easy, if there is something there we diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bcfc00e88756..f8de2482a529 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 3d36644890bb..1ad2c2c4e250 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5516f55e214b..cbe46175bb59 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -196,6 +196,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head) *ppcpu_rt = NULL; } } + + free_percpu(f6i->rt6i_pcpu); } lwtstate_put(f6i->fib6_nh.nh_lwtstate); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 413d98bf24f4..5e0efd3954e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb->tstamp = sockc->transmit_time; - skb_dst_set(skb, &rt->dst); - *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -665,8 +663,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); - if (err) - goto error_fault; + if (err) { + err = -EFAULT; + kfree_skb(skb); + goto error; + } + + skb_dst_set(skb, &rt->dst); + *dstp = NULL; /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -675,21 +679,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (unlikely(!skb)) return 0; + /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev + * in the error path. Since skb has been freed, the dst could + * have been queued for deletion. + */ + rcu_read_lock(); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); - if (err) - goto error; + if (err) { + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); + goto error_check; + } + rcu_read_unlock(); out: return 0; -error_fault: - err = -EFAULT; - kfree_skb(skb); error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); +error_check: if (err == -ENOBUFS && !np->recverr) err = 0; return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 826b14de7dbb..a366c05a239d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4321,11 +4321,6 @@ static int ip6_route_info_append(struct net *net, if (!nh) return -ENOMEM; nh->fib6_info = rt; - err = ip6_convert_metrics(net, rt, r_cfg); - if (err) { - kfree(nh); - return err; - } memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); list_add_tail(&nh->next, rt6_nh_list); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841f4a07438e..9ef490dddcea 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return -1; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 9ad07a91708e..3c29da5defe6 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5959ce9620eb..6a74080005cf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d25da0e66da1..5d22eda8a6b1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -427,7 +427,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ - if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) + if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5e6cf2cee965..5836ddeac9e3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1756,7 +1756,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (local->ops->wake_tx_queue && type != NL80211_IFTYPE_AP_VLAN && - type != NL80211_IFTYPE_MONITOR) + (type != NL80211_IFTYPE_MONITOR || + (params->flags & MONITOR_FLAG_ACTIVE))) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index ee56f18cad3f..21526630bf65 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -217,7 +217,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); + struct sta_info *sta, + struct ieee80211_tx_status *st); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index daf9db3c8f24..6950cd0bf594 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -295,15 +295,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, } void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb) + struct sta_info *sta, + struct ieee80211_tx_status *st) { - struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *txinfo = st->info; int failed; - if (!ieee80211_is_data(hdr->frame_control)) - return; - failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); /* moving average, scaled to 100. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 9a6d7208bf4f..91d7c0cd1882 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -479,11 +479,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -507,6 +502,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, rcu_read_unlock(); dev_kfree_skb_any(skb); + } else if (dropped) { + dev_kfree_skb_any(skb); } else { /* consumes skb */ skb_complete_wifi_ack(skb, acked); @@ -811,7 +808,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, status); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - ieee80211s_update_metric(local, sta, skb); + ieee80211s_update_metric(local, sta, status); if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); @@ -972,6 +969,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, } rate_control_tx_status(local, sband, status); + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) + ieee80211s_update_metric(local, sta, status); } if (acked || noack_success) { diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 5cd5e6e5834e..6c647f425e05 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -16,6 +16,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" +#include "wme.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -1010,14 +1011,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; + skb->priority = 256 + 2; break; default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; + skb->priority = 256 + 5; break; } + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f353d9db54bc..25ba24bef8f5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -214,6 +214,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) { struct ieee80211_local *local = tx->local; struct ieee80211_if_managed *ifmgd; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); /* driver doesn't support power save */ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) @@ -242,6 +243,9 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type != NL80211_IFTYPE_STATION) return TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_OFFCHAN_TX_OK)) + return TX_CONTINUE; + ifmgd = &tx->sdata->u.mgd; /* @@ -1890,7 +1894,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; if (invoke_tx_handlers_early(&tx)) - return false; + return true; if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) return true; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b4bdf9eda7b7..247b89784a6f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1213,8 +1213,8 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { #define TCP_NLATTR_SIZE ( \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) + NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \ + NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags))) static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 5af74b37f423..a35fb59ace73 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -49,7 +49,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN); + NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); if (err < 0) return err; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 55e2d9215c0d..0e5ec126f6ad 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -355,12 +355,11 @@ cont: static void nft_rbtree_gc(struct work_struct *work) { + struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; struct nft_set_gc_batch *gcb = NULL; - struct rb_node *node, *prev = NULL; - struct nft_rbtree_elem *rbe; struct nft_rbtree *priv; + struct rb_node *node; struct nft_set *set; - int i; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); @@ -371,7 +370,7 @@ static void nft_rbtree_gc(struct work_struct *work) rbe = rb_entry(node, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe)) { - prev = node; + rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) @@ -379,29 +378,30 @@ static void nft_rbtree_gc(struct work_struct *work) if (nft_set_elem_mark_busy(&rbe->ext)) continue; + if (rbe_prev) { + rb_erase(&rbe_prev->node, &priv->root); + rbe_prev = NULL; + } gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); if (!gcb) break; atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, rbe); + rbe_prev = rbe; - if (prev) { - rbe = rb_entry(prev, struct nft_rbtree_elem, node); + if (rbe_end) { atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - prev = NULL; + nft_set_gc_batch_add(gcb, rbe_end); + rb_erase(&rbe_end->node, &priv->root); + rbe_end = NULL; } node = rb_next(node); if (!node) break; } - if (gcb) { - for (i = 0; i < gcb->head.cnt; i++) { - rbe = gcb->elems[i]; - rb_erase(&rbe->node, &priv->root); - } - } + if (rbe_prev) + rb_erase(&rbe_prev->node, &priv->root); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 0472f3472842..ada144e5645b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -56,7 +56,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; - if (!net_eq(xt_net(par), sock_net(sk))) + if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) @@ -117,7 +117,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; - if (!net_eq(xt_net(par), sock_net(sk))) + if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 86a75105af1a..35ae64cbef33 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1312,6 +1312,10 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, rcu_assign_pointer(help->helper, helper); info->helper = helper; + + if (info->nat) + request_module("ip_nat_%s", name); + return 0; } @@ -1624,10 +1628,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, OVS_NLERR(log, "Failed to allocate conntrack template"); return -ENOMEM; } - - __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); - nf_conntrack_get(&ct_info.ct->ct_general); - if (helper) { err = ovs_ct_add_helper(&ct_info, helper, key, log); if (err) @@ -1639,6 +1639,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (err) goto err_free_ct; + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); return 0; err_free_ct: __ovs_ct_free_action(&ct_info); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 75c92a87e7b2..d6e94dc7e290 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2715,10 +2715,12 @@ tpacket_error: } } - if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, - vio_le())) { - tp_len = -EINVAL; - goto tpacket_error; + if (po->has_vnet_hdr) { + if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { + tp_len = -EINVAL; + goto tpacket_error; + } + virtio_net_hdr_set_proto(skb, vnet_hdr); } skb->destructor = tpacket_destruct_skb; @@ -2915,6 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; len += sizeof(vnet_hdr); + virtio_net_hdr_set_proto(skb, &vnet_hdr); } skb_probe_transport_header(skb, reserve); diff --git a/net/rds/send.c b/net/rds/send.c index 57b3d5a8b2db..fe785ee819dd 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1007,7 +1007,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) +static int rds_send_mprds_hash(struct rds_sock *rs, + struct rds_connection *conn, int nonblock) { int hash; @@ -1023,10 +1024,16 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) * used. But if we are interrupted, we have to use the zero * c_path in case the connection ends up being non-MP capable. */ - if (conn->c_npaths == 0) + if (conn->c_npaths == 0) { + /* Cannot wait for the connection be made, so just use + * the base c_path. + */ + if (nonblock) + return 0; if (wait_event_interruptible(conn->c_hs_waitq, conn->c_npaths != 0)) hash = 0; + } if (conn->c_npaths == 1) hash = 0; } @@ -1256,7 +1263,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } if (conn->c_trans->t_mp_capable) - cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; + cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, nonblock)]; else cpath = &conn->c_path[0]; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c97558710421..a6e6cae82c30 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -40,17 +40,12 @@ struct rxrpc_crypt { struct rxrpc_connection; /* - * Mark applied to socket buffers. + * Mark applied to socket buffers in skb->mark. skb->priority is used + * to pass supplementary information. */ enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ + RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ + RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; /* @@ -293,7 +288,6 @@ struct rxrpc_peer { struct hlist_node hash_link; struct rxrpc_local *local; struct hlist_head error_targets; /* targets for net error distribution */ - struct work_struct error_distributor; struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ time64_t last_tx_at; /* Last time packet sent here */ @@ -304,12 +298,11 @@ struct rxrpc_peer { unsigned int maxdata; /* data size (MTU - hdrsize) */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ int debug_id; /* debug ID for printks */ - int error_report; /* Net (+0) or local (+1000000) to distribute */ -#define RXRPC_LOCAL_ERROR_OFFSET 1000000 struct sockaddr_rxrpc srx; /* remote address */ /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ @@ -450,19 +443,29 @@ struct rxrpc_connection { spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ + u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ - u16 service_id; /* Service ID, possibly upgraded */ + u32 service_id; /* Service ID, possibly upgraded */ u8 size_align; /* data size alignment (for security) */ u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; +static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) +{ + return sp->hdr.flags & RXRPC_CLIENT_INITIATED; +} + +static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp) +{ + return !rxrpc_to_server(sp); +} + /* * Flags in call->flags. */ @@ -633,6 +636,8 @@ struct rxrpc_call { bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ + spinlock_t input_lock; /* Lock for packet input to this call */ + /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ @@ -717,7 +722,7 @@ extern struct workqueue_struct *rxrpc_workqueue; int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, - struct rxrpc_connection *, + struct rxrpc_sock *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, @@ -887,8 +892,9 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); -int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, gfp_t); +int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, + struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, + gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); @@ -908,7 +914,8 @@ extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, - struct sk_buff *); + struct sk_buff *, + struct rxrpc_peer **); void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); @@ -960,7 +967,7 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ -void rxrpc_data_ready(struct sock *); +int rxrpc_input_packet(struct sock *, struct sk_buff *); /* * insecure.c @@ -1031,7 +1038,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); * peer_event.c */ void rxrpc_error_report(struct sock *); -void rxrpc_peer_error_distributor(struct work_struct *); void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); @@ -1041,16 +1047,15 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); -struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, - struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, + struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); void rxrpc_put_peer(struct rxrpc_peer *); -void __rxrpc_queue_peer_error(struct rxrpc_peer *); /* * proc.c diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9d1e298b784c..652e314de38e 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -249,11 +249,11 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) */ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; - struct rxrpc_peer *peer, *xpeer; struct rxrpc_call *call; unsigned short call_head, conn_head, peer_head; unsigned short call_tail, conn_tail, peer_tail; @@ -276,21 +276,18 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, return NULL; if (!conn) { - /* No connection. We're going to need a peer to start off - * with. If one doesn't yet exist, use a spare from the - * preallocation set. We dump the address into the spare in - * anticipation - and to save on stack space. - */ - xpeer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0) - return NULL; - - peer = rxrpc_lookup_incoming_peer(local, xpeer); - if (peer == xpeer) { + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0) + return NULL; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + rxrpc_new_incoming_peer(rx, local, peer); } /* Now allocate and set up the connection */ @@ -335,45 +332,38 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * The call is returned with the user access mutex held. */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, - struct rxrpc_connection *conn, + struct rxrpc_sock *rx, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_sock *rx; + struct rxrpc_connection *conn; + struct rxrpc_peer *peer; struct rxrpc_call *call; - u16 service_id = sp->hdr.serviceId; _enter(""); - /* Get the socket providing the service */ - rx = rcu_dereference(local->service); - if (rx && (service_id == rx->srx.srx_service || - service_id == rx->second_service)) - goto found_service; - - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; - skb->priority = RX_INVALID_OPERATION; - _leave(" = NULL [service]"); - return NULL; - -found_service: spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; _leave(" = NULL [close]"); call = NULL; goto out; } - call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + /* The peer, connection and call may all have sprung into existence due + * to a duplicate packet being handled on another CPU in parallel, so + * we have to recheck the routing. However, we're now holding + * rx->incoming_lock, so the values should remain stable. + */ + conn = rxrpc_find_connection_rcu(local, skb, &peer); + + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); if (!call) { - skb->mark = RXRPC_SKB_MARK_BUSY; + skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; _leave(" = NULL [busy]"); call = NULL; goto out; @@ -413,20 +403,22 @@ found_service: case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (call->state < RXRPC_CALL_COMPLETE) { + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + } write_unlock(&call->state_lock); break; case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; default: BUG(); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9486293fef5c..8f1a8f85b1f9 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -138,6 +138,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); spin_lock_init(&call->notify_lock); + spin_lock_init(&call->input_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = debug_id; @@ -287,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(call, cp, srx, gfp); + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) goto error; @@ -339,7 +340,7 @@ int rxrpc_retry_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(call, cp, srx, gfp); + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) goto error; @@ -400,7 +401,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, rcu_assign_pointer(conn->channels[chan].call, call); spin_lock(&conn->params.peer->lock); - hlist_add_head(&call->error_link, &conn->params.peer->error_targets); + hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f8f37188a932..521189f4b666 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -276,7 +276,8 @@ dont_reuse: * If we return with a connection, the call will be on its waiting list. It's * left to the caller to assign a channel and wake up the call. */ -static int rxrpc_get_client_conn(struct rxrpc_call *call, +static int rxrpc_get_client_conn(struct rxrpc_sock *rx, + struct rxrpc_call *call, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -289,7 +290,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); + cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); if (!cp->peer) goto error; @@ -683,7 +684,8 @@ out: * find a connection for a call * - called in process context with IRQs enabled */ -int rxrpc_connect_call(struct rxrpc_call *call, +int rxrpc_connect_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -696,7 +698,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); rxrpc_cull_active_client_conns(rxnet); - ret = rxrpc_get_client_conn(call, cp, srx, gfp); + ret = rxrpc_get_client_conn(rx, call, cp, srx, gfp); if (ret < 0) goto out; @@ -710,8 +712,8 @@ int rxrpc_connect_call(struct rxrpc_call *call, } spin_lock_bh(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, - &call->conn->params.peer->error_targets); + hlist_add_head_rcu(&call->error_link, + &call->conn->params.peer->error_targets); spin_unlock_bh(&call->conn->params.peer->lock); out: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 6df56ce68861..b6fca8ebb117 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -126,7 +126,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, @@ -153,13 +153,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - u32 abort_code, int error) + enum rxrpc_call_completion compl) { struct rxrpc_call *call; int i; - _enter("{%d},%x", conn->debug_id, abort_code); + _enter("{%d},%x", conn->debug_id, conn->abort_code); spin_lock(&conn->channel_lock); @@ -172,9 +171,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, trace_rxrpc_abort(call->debug_id, "CON", call->cid, call->call_id, 0, - abort_code, error); + conn->abort_code, + conn->error); if (rxrpc_set_call_completion(call, compl, - abort_code, error)) + conn->abort_code, + conn->error)) rxrpc_notify_socket(call); } } @@ -207,10 +208,12 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, return 0; } + conn->error = error; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -229,7 +232,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(conn->local_abort); + word = htonl(conn->abort_code); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -240,7 +243,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -315,9 +318,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); + conn->error = -ECONNABORTED; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 77440a356b14..885dae829f4a 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -69,10 +69,14 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) * If successful, a pointer to the connection is returned, but no ref is taken. * NULL is returned if there is no match. * + * When searching for a service call, if we find a peer but no connection, we + * return that through *_peer in case we need to create a new service call. + * * The caller must be holding the RCU read lock. */ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, - struct sk_buff *skb) + struct sk_buff *skb, + struct rxrpc_peer **_peer) { struct rxrpc_connection *conn; struct rxrpc_conn_proto k; @@ -85,9 +89,6 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) goto not_found; - k.epoch = sp->hdr.epoch; - k.cid = sp->hdr.cid & RXRPC_CIDMASK; - /* We may have to handle mixing IPv4 and IPv6 */ if (srx.transport.family != local->srx.transport.family) { pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", @@ -99,7 +100,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) { + if (rxrpc_to_server(sp)) { /* We need to look up service connections by the full protocol * parameter set. We look up the peer first as an intermediate * step and then the connection from the peer's tree. @@ -107,6 +108,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, peer = rxrpc_lookup_peer_rcu(local, &srx); if (!peer) goto not_found; + *_peer = peer; conn = rxrpc_find_service_conn_rcu(peer, skb); if (!conn || atomic_read(&conn->usage) == 0) goto not_found; @@ -214,7 +216,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; spin_lock_bh(&conn->params.peer->lock); - hlist_del_init(&call->error_link); + hlist_del_rcu(&call->error_link); spin_unlock_bh(&conn->params.peer->lock); if (rxrpc_is_client_call(call)) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cfdc199c6351..570b49d2da42 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@ -273,23 +278,26 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, const char *abort_why) { + unsigned int state; ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); - switch (call->state) { + state = call->state; + switch (state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (reply_begun) - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); + state = call->state; break; default: @@ -297,11 +305,10 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, } write_unlock(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_end); - } _leave(" = ok"); return true; @@ -332,11 +339,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@ -452,13 +459,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, } } + spin_lock(&call->input_lock); + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - return; + goto unlock; call->ackr_prev_seq = seq; @@ -488,12 +497,16 @@ next_subpacket: if (flags & RXRPC_LAST_PACKET) { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) - return rxrpc_proto_abort("LSN", call, seq); + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) - return rxrpc_proto_abort("LSA", call, seq); + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); @@ -560,8 +573,10 @@ next_subpacket: skip: offset += len; if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) - return rxrpc_proto_abort("XJF", call, seq); + if (skb_copy_bits(skb, offset, &flags, 1) < 0) { + rxrpc_proto_abort("XJF", call, seq); + goto unlock; + } offset += sizeof(struct rxrpc_jumbo_header); seq++; serial++; @@ -601,6 +616,9 @@ ack: trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } + +unlock: + spin_unlock(&call->input_lock); _leave(" [queued]"); } @@ -622,13 +640,14 @@ static void rxrpc_input_requested_ack(struct rxrpc_call *call, if (!skb) continue; + sent_at = skb->tstamp; + smp_rmb(); /* Read timestamp before serial. */ sp = rxrpc_skb(skb); if (sp->hdr.serial != orig_serial) continue; - smp_rmb(); - sent_at = skb->tstamp; goto found; } + return; found: @@ -686,15 +705,14 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ping_serial; + ping_serial = READ_ONCE(call->ping_serial); if (orig_serial == call->acks_lost_ping) rxrpc_input_check_for_lost_ack(call); - if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || - before(orig_serial, ping_serial)) + if (before(orig_serial, ping_serial) || + !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags)) return; - clear_bit(RXRPC_CALL_PINGING, &call->flags); if (after(orig_serial, ping_serial)) return; @@ -860,15 +878,32 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + return; + + buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(buf.info)) { - if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + if (skb->len >= ioffset + sizeof(buf.info) && + skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + + spin_lock(&call->input_lock); + + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + goto out; + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ + if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); - } - if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + if (first_soft_ack == 0) { + rxrpc_proto_abort("AK0", call, 0); + goto out; + } /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -878,39 +913,35 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; - } - - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; + goto out; } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || - after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); - if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + after(hard_ack, call->tx_top)) { + rxrpc_proto_abort("AKW", call, 0); + goto out; + } + if (nr_acks > call->tx_top - hard_ack) { + rxrpc_proto_abort("AKN", call, 0); + goto out; + } - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + goto out; + } + } if (nr_acks > 0) { - if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { + rxrpc_proto_abort("XSA", call, 0); + goto out; + } rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@ -919,7 +950,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, skb, &summary, acked_serial); +out: + spin_unlock(&call->input_lock); } /* @@ -932,9 +965,12 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + spin_lock(&call->input_lock); + + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); + + spin_unlock(&call->input_lock); } /* @@ -1017,18 +1053,19 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, } /* - * Handle a new call on a channel implicitly completing the preceding call on - * that channel. + * Handle a new service call on a channel implicitly completing the preceding + * call on that channel. This does not apply to client conns. * * TODO: If callNumber > call_id + 1, renegotiate security. */ -static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, +static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct rxrpc_call *call) { switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); - break; + /* Fall through */ case RXRPC_CALL_COMPLETE: break; default: @@ -1036,11 +1073,13 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } + trace_rxrpc_improper_term(call); break; } - trace_rxrpc_improper_term(call); + spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); + spin_unlock(&rx->incoming_lock); rxrpc_notify_socket(call); } @@ -1119,43 +1158,29 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * The socket is locked by the caller and this prevents the socket from being * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. + * + * Called with the RCU read lock held from the IP layer via UDP. */ -void rxrpc_data_ready(struct sock *udp_sk) +int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; - struct rxrpc_call *call; + struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; struct rxrpc_local *local = udp_sk->sk_user_data; - struct sk_buff *skb; + struct rxrpc_peer *peer = NULL; + struct rxrpc_sock *rx = NULL; unsigned int channel; - int ret, skew; + int skew = 0; _enter("%p", udp_sk); - ASSERT(!irqs_disabled()); - - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - _debug("UDP socket error %d", ret); - return; - } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); rxrpc_new_skb(skb, rxrpc_skb_rx_received); - _net("recv skb %p", skb); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); - _leave(" [CSUM failed]"); - return; - } - - __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); + skb_pull(skb, sizeof(struct udphdr)); /* The UDP protocol already released all skb resources; * we are free to add our own data there. @@ -1170,69 +1195,104 @@ void rxrpc_data_ready(struct sock *udp_sk) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); - return; + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + return 0; } } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); trace_rxrpc_rx_packet(sp); - _net("Rx RxRPC %s ep=%x call=%x:%x", - sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", - sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber); - - if (sp->hdr.type >= RXRPC_N_PACKET_TYPES || - !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) { - _proto("Rx Bad Packet Type %u", sp->hdr.type); - goto bad_message; - } - switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) + if (rxrpc_to_client(sp)) goto discard; rxrpc_post_packet_to_local(local, skb); goto out; case RXRPC_PACKET_TYPE_BUSY: - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + if (rxrpc_to_server(sp)) goto discard; /* Fall through */ + case RXRPC_PACKET_TYPE_ACK: + case RXRPC_PACKET_TYPE_ACKALL: + if (sp->hdr.callNumber == 0) + goto bad_message; + /* Fall through */ + case RXRPC_PACKET_TYPE_ABORT: + break; case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0) + if (sp->hdr.callNumber == 0 || + sp->hdr.seq == 0) goto bad_message; if (sp->hdr.flags & RXRPC_JUMBO_PACKET && !rxrpc_validate_jumbo(skb)) goto bad_message; break; + case RXRPC_PACKET_TYPE_CHALLENGE: + if (rxrpc_to_server(sp)) + goto discard; + break; + case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_to_client(sp)) + goto discard; + break; + /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: goto discard; + + default: + _proto("Rx Bad Packet Type %u", sp->hdr.type); + goto bad_message; } - rcu_read_lock(); + if (sp->hdr.serviceId == 0) + goto bad_message; + + if (rxrpc_to_server(sp)) { + /* Weed out packets to services we're not offering. Packets + * that would begin a call are explicitly rejected and the rest + * are just discarded. + */ + rx = rcu_dereference(local->service); + if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && + sp->hdr.serviceId != rx->second_service)) { + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.seq == 1) + goto unsupported_service; + goto discard; + } + } - conn = rxrpc_find_connection_rcu(local, skb); + conn = rxrpc_find_connection_rcu(local, skb, &peer); if (conn) { if (sp->hdr.securityIndex != conn->security_ix) goto wrong_security; if (sp->hdr.serviceId != conn->service_id) { - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || - conn->service_id != conn->params.service_id) + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->params.service_id, + sp->hdr.serviceId); + + if (old_id != conn->params.service_id && + old_id != sp->hdr.serviceId) goto reupgrade; - conn->service_id = sp->hdr.serviceId; } if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } /* Note the serial number skew here */ @@ -1251,19 +1311,19 @@ void rxrpc_data_ready(struct sock *udp_sk) /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - goto discard_unlock; + goto discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard_unlock; + goto discard; /* For the previous service call, if completed * successfully, we discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard_unlock; + goto discard; /* But otherwise we need to retransmit the final packet * from data cached in the connection record. @@ -1274,18 +1334,16 @@ void rxrpc_data_ready(struct sock *udp_sk) sp->hdr.serial, sp->hdr.flags, 0); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) { - rcu_read_unlock(); + if (rxrpc_to_client(sp)) goto reject_packet; - } if (call) - rxrpc_input_implicit_end_call(conn, call); + rxrpc_input_implicit_end_call(rx, conn, call); call = NULL; } @@ -1297,66 +1355,57 @@ void rxrpc_data_ready(struct sock *udp_sk) if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) set_bit(RXRPC_CALL_RX_HEARD, &call->flags); } - } else { - skew = 0; - call = NULL; } if (!call || atomic_read(&call->usage) == 0) { - if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || - sp->hdr.callNumber == 0 || + if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message_unlock; + goto bad_message; if (sp->hdr.seq != 1) - goto discard_unlock; - call = rxrpc_new_incoming_call(local, conn, skb); - if (!call) { - rcu_read_unlock(); + goto discard; + call = rxrpc_new_incoming_call(local, rx, skb); + if (!call) goto reject_packet; - } rxrpc_send_ping(call, skb, skew); mutex_unlock(&call->user_mutex); } rxrpc_input_call_packet(call, skb, skew); - goto discard_unlock; + goto discard; -discard_unlock: - rcu_read_unlock(); discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); - return; - -out_unlock: - rcu_read_unlock(); - goto out; + return 0; wrong_security: - rcu_read_unlock(); trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; +unsupported_service: + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->priority = RX_INVALID_OPERATION; + goto post_abort; + reupgrade: - rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; -bad_message_unlock: - rcu_read_unlock(); bad_message: trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); protocol_error: skb->priority = RX_PROTOCOL_ERROR; post_abort: - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); + return 0; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 777c3ed4cfc0..cad0691c2bb4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -19,6 +19,7 @@ #include <linux/ip.h> #include <linux/hashtable.h> #include <net/sock.h> +#include <net/udp.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -108,7 +109,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { - struct sock *sock; + struct sock *usk; int ret, opt; _enter("%p{%d,%d}", @@ -122,6 +123,28 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) return ret; } + /* set the socket up */ + usk = local->socket->sk; + inet_sk(usk)->mc_loop = 0; + + /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ + inet_inc_convert_csum(usk); + + rcu_assign_sk_user_data(usk, local); + + udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; + udp_sk(usk)->encap_rcv = rxrpc_input_packet; + udp_sk(usk)->encap_destroy = NULL; + udp_sk(usk)->gro_receive = NULL; + udp_sk(usk)->gro_complete = NULL; + + udp_encap_enable(); +#if IS_ENABLED(CONFIG_IPV6) + if (local->srx.transport.family == AF_INET6) + udpv6_encap_enable(); +#endif + usk->sk_error_report = rxrpc_error_report; + /* if a local address was supplied then bind it */ if (local->srx.transport_len > sizeof(sa_family_t)) { _debug("bind"); @@ -135,10 +158,10 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } switch (local->srx.transport.family) { - case AF_INET: - /* we want to receive ICMP errors */ + case AF_INET6: + /* we want to receive ICMPv6 errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -146,19 +169,22 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, + opt = IPV6_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); goto error; } - break; - case AF_INET6: + /* Fall through and set IPv4 options too otherwise we don't get + * errors from IPv4 packets sent through the IPv6 socket. + */ + + case AF_INET: /* we want to receive ICMP errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -166,24 +192,28 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IPV6_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, + opt = IP_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); goto error; } + + /* We want receive timestamps. */ + opt = 1; + ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } break; default: BUG(); } - /* set the socket up */ - sock = local->socket->sk; - sock->sk_user_data = local; - sock->sk_data_ready = rxrpc_data_ready; - sock->sk_error_report = rxrpc_error_report; _leave(" = 0"); return 0; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ccf5de160444..e8fb8922bca8 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -124,7 +124,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, struct kvec iov[2]; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; - ktime_t now; size_t len, n; int ret; u8 reason; @@ -196,9 +195,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, /* We need to stick a time in before we send the packet in case * the reply gets back before kernel_sendmsg() completes - but * asking UDP to send the packet can take a relatively long - * time, so we update the time after, on the assumption that - * the packet transmission is more likely to happen towards the - * end of the kernel_sendmsg() call. + * time. */ call->ping_time = ktime_get_real(); set_bit(RXRPC_CALL_PINGING, &call->flags); @@ -206,9 +203,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - now = ktime_get_real(); - if (ping) - call->ping_time = now; conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, @@ -363,8 +357,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. + * + * However, we mustn't request an ACK on the last reply packet of a + * service call, lest OpenAFS incorrectly send us an ACK with some + * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && + if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || + rxrpc_to_server(sp) + ) && (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || @@ -390,6 +390,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, goto send_fragmentable; down_read(&conn->params.local->defrag_sem); + + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet * to go out of the interface @@ -413,12 +418,8 @@ done: trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, lost); if (ret >= 0) { - ktime_t now = ktime_get_real(); - skb->tstamp = now; - smp_wmb(); - sp->hdr.serial = serial; if (whdr.flags & RXRPC_REQUEST_ACK) { - call->peer->rtt_last_req = now; + call->peer->rtt_last_req = skb->tstamp; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); if (call->peer->rtt_usage > 1) { unsigned long nowj = jiffies, ack_lost_at; @@ -457,6 +458,10 @@ send_fragmentable: down_write(&conn->params.local->defrag_sem); + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + switch (conn->params.local->srx.transport.family) { case AF_INET: opt = IP_PMTUDISC_DONT; @@ -519,7 +524,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) struct kvec iov[2]; size_t size; __be32 code; - int ret; + int ret, ioc; _enter("%d", local->debug_id); @@ -527,7 +532,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local) iov[0].iov_len = sizeof(whdr); iov[1].iov_base = &code; iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); msg.msg_name = &srx.transport; msg.msg_control = NULL; @@ -535,17 +539,31 @@ void rxrpc_reject_packets(struct rxrpc_local *local) msg.msg_flags = 0; memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); + switch (skb->mark) { + case RXRPC_SKB_MARK_REJECT_BUSY: + whdr.type = RXRPC_PACKET_TYPE_BUSY; + size = sizeof(whdr); + ioc = 1; + break; + case RXRPC_SKB_MARK_REJECT_ABORT: + whdr.type = RXRPC_PACKET_TYPE_ABORT; + code = htonl(skb->priority); + size = sizeof(whdr) + sizeof(code); + ioc = 2; + break; + default: + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + continue; + } + if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { msg.msg_namelen = srx.transport_len; - code = htonl(skb->priority); - whdr.epoch = htonl(sp->hdr.epoch); whdr.cid = htonl(sp->hdr.cid); whdr.callNumber = htonl(sp->hdr.callNumber); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 4f9da2f51c69..05b51bdbdd41 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -23,6 +23,8 @@ #include "ar-internal.h" static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); +static void rxrpc_distribute_error(struct rxrpc_peer *, int, + enum rxrpc_call_completion); /* * Find the peer associated with an ICMP packet. @@ -194,8 +196,6 @@ void rxrpc_error_report(struct sock *sk) rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - /* The ref we obtained is passed off to the work item */ - __rxrpc_queue_peer_error(peer); _leave(""); } @@ -205,6 +205,7 @@ void rxrpc_error_report(struct sock *sk) static void rxrpc_store_error(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) { + enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; struct sock_extended_err *ee; int err; @@ -255,7 +256,7 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, case SO_EE_ORIGIN_NONE: case SO_EE_ORIGIN_LOCAL: _proto("Rx Received local error { error=%d }", err); - err += RXRPC_LOCAL_ERROR_OFFSET; + compl = RXRPC_CALL_LOCAL_ERROR; break; case SO_EE_ORIGIN_ICMP6: @@ -264,48 +265,23 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, break; } - peer->error_report = err; + rxrpc_distribute_error(peer, err, compl); } /* - * Distribute an error that occurred on a peer + * Distribute an error that occurred on a peer. */ -void rxrpc_peer_error_distributor(struct work_struct *work) +static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, + enum rxrpc_call_completion compl) { - struct rxrpc_peer *peer = - container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; - enum rxrpc_call_completion compl; - int error; - - _enter(""); - - error = READ_ONCE(peer->error_report); - if (error < RXRPC_LOCAL_ERROR_OFFSET) { - compl = RXRPC_CALL_NETWORK_ERROR; - } else { - compl = RXRPC_CALL_LOCAL_ERROR; - error -= RXRPC_LOCAL_ERROR_OFFSET; - } - - _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error); - spin_lock_bh(&peer->lock); - - while (!hlist_empty(&peer->error_targets)) { - call = hlist_entry(peer->error_targets.first, - struct rxrpc_call, error_link); - hlist_del_init(&call->error_link); + hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { rxrpc_see_call(call); - - if (rxrpc_set_call_completion(call, compl, 0, -error)) + if (call->state < RXRPC_CALL_COMPLETE && + rxrpc_set_call_completion(call, compl, 0, -error)) rxrpc_notify_socket(call); } - - spin_unlock_bh(&peer->lock); - - rxrpc_put_peer(peer); - _leave(""); } /* @@ -325,6 +301,8 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, if (rtt < 0) return; + spin_lock(&peer->rtt_input_lock); + /* Replace the oldest datum in the RTT buffer */ sum -= peer->rtt_cache[cursor]; sum += rtt; @@ -336,6 +314,8 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, peer->rtt_usage = usage; } + spin_unlock(&peer->rtt_input_lock); + /* Now recalculate the average */ if (usage == RXRPC_RTT_CACHE_SIZE) { avg = sum / RXRPC_RTT_CACHE_SIZE; @@ -344,6 +324,7 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, do_div(avg, usage); } + /* Don't need to update this under lock */ peer->rtt = avg; trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, usage, avg); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 1dc7648e3eff..5691b7d266ca 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -124,11 +124,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( struct rxrpc_net *rxnet = local->rxnet; hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { - if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) { - if (atomic_read(&peer->usage) == 0) - return NULL; + if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && + atomic_read(&peer->usage) > 0) return peer; - } } return NULL; @@ -155,8 +153,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) +static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, + struct rxrpc_peer *peer) { + struct net *net = sock_net(&rx->sk); struct dst_entry *dst; struct rtable *rt; struct flowi fl; @@ -171,7 +171,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) switch (peer->srx.transport.family) { case AF_INET: rt = ip_route_output_ports( - &init_net, fl4, NULL, + net, fl4, NULL, peer->srx.transport.sin.sin_addr.s_addr, 0, htons(7000), htons(7001), IPPROTO_UDP, 0, 0); if (IS_ERR(rt)) { @@ -190,7 +190,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) sizeof(struct in6_addr)); fl6->fl6_dport = htons(7001); fl6->fl6_sport = htons(7000); - dst = ip6_route_output(&init_net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); if (dst->error) { _leave(" [route err %d]", dst->error); return; @@ -222,11 +222,10 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) atomic_set(&peer->usage, 1); peer->local = local; INIT_HLIST_HEAD(&peer->error_targets); - INIT_WORK(&peer->error_distributor, - &rxrpc_peer_error_distributor); peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); + spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); if (RXRPC_TX_SMSS > 2190) @@ -244,10 +243,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) /* * Initialise peer record. */ -static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, + unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(peer); + rxrpc_assess_MTU_size(rx, peer); peer->mtu = peer->if_mtu; peer->rtt_last_req = ktime_get_real(); @@ -279,7 +279,8 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) /* * Set up a new peer. */ -static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, struct sockaddr_rxrpc *srx, unsigned long hash_key, gfp_t gfp) @@ -291,7 +292,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); - rxrpc_init_peer(peer, hash_key); + rxrpc_init_peer(rx, peer, hash_key); } _leave(" = %p", peer); @@ -299,40 +300,31 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, } /* - * Set up a new incoming peer. The address is prestored in the preallocated - * peer. + * Set up a new incoming peer. There shouldn't be any other matching peers + * since we've already done a search in the list from the non-reentrant context + * (the data_ready handler) that is the only place we can add new peers. */ -struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, - struct rxrpc_peer *prealloc) +void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer) { - struct rxrpc_peer *peer; struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; - hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); - prealloc->local = local; - rxrpc_init_peer(prealloc, hash_key); + hash_key = rxrpc_peer_hash_key(local, &peer->srx); + peer->local = local; + rxrpc_init_peer(rx, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); - - /* Need to check that we aren't racing with someone else */ - peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - peer = prealloc; - hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); - list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - } - + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); + list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); spin_unlock(&rxnet->peer_hash_lock); - return peer; } /* * obtain a remote transport endpoint for the specified address */ -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; @@ -352,7 +344,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, /* The peer is not yet present in hash - create a candidate * for a new record and then redo the search. */ - candidate = rxrpc_create_peer(local, srx, hash_key, gfp); + candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); if (!candidate) { _leave(" = NULL [nomem]"); return NULL; @@ -416,21 +408,6 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) } /* - * Queue a peer record. This passes the caller's ref to the workqueue. - */ -void __rxrpc_queue_peer_error(struct rxrpc_peer *peer) -{ - const void *here = __builtin_return_address(0); - int n; - - n = atomic_read(&peer->usage); - if (rxrpc_queue_work(&peer->error_distributor)) - trace_rxrpc_peer(peer, rxrpc_peer_queued_error, n, here); - else - rxrpc_put_peer(peer); -} - -/* * Discard a peer record. */ static void __rxrpc_put_peer(struct rxrpc_peer *peer) diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 93da73bf7098..f9cb83c938f3 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -50,7 +50,6 @@ struct rxrpc_wire_header { #define RXRPC_PACKET_TYPE_10 10 /* Ignored */ #define RXRPC_PACKET_TYPE_11 11 /* Ignored */ #define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ -#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ uint8_t flags; /* packet flags */ #define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ @@ -72,20 +71,6 @@ struct rxrpc_wire_header { } __packed; -#define RXRPC_SUPPORTED_PACKET_TYPES ( \ - (1 << RXRPC_PACKET_TYPE_DATA) | \ - (1 << RXRPC_PACKET_TYPE_ACK) | \ - (1 << RXRPC_PACKET_TYPE_BUSY) | \ - (1 << RXRPC_PACKET_TYPE_ABORT) | \ - (1 << RXRPC_PACKET_TYPE_ACKALL) | \ - (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ - (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ - /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ - (1 << RXRPC_PACKET_TYPE_PARAMS) | \ - (1 << RXRPC_PACKET_TYPE_10) | \ - (1 << RXRPC_PACKET_TYPE_11) | \ - (1 << RXRPC_PACKET_TYPE_VERSION)) - /*****************************************************************************/ /* * jumbo packet secondary header diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 23273b5303fd..8525de811616 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -135,7 +135,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) { if (exists) tcf_idr_release(*a, bind); else diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index f218ccf1e2d9..b2c3406a2cf2 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -398,6 +398,7 @@ static int u32_init(struct tcf_proto *tp) rcu_assign_pointer(tp_c->hlist, root_ht); root_ht->tp_c = tp_c; + root_ht->refcnt++; rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -610,7 +611,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(ht->refcnt); + WARN_ON(--ht->refcnt); u32_clear_hnode(tp, ht, extack); @@ -649,7 +650,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 0) + if (root_ht && --root_ht->refcnt == 1) u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { @@ -698,7 +699,6 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } if (ht->refcnt == 1) { - ht->refcnt--; u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -708,11 +708,11 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, out: *last = true; if (root_ht) { - if (root_ht->refcnt > 1) { + if (root_ht->refcnt > 2) { *last = false; goto ret; } - if (root_ht->refcnt == 1) { + if (root_ht->refcnt == 2) { if (!ht_empty(root_ht)) { *last = false; goto ret; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 98541c6399db..85e73f48e48f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1311,6 +1311,18 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ +const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { + [TCA_KIND] = { .type = NLA_STRING }, + [TCA_OPTIONS] = { .type = NLA_NESTED }, + [TCA_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct tc_estimator) }, + [TCA_STAB] = { .type = NLA_NESTED }, + [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, + [TCA_CHAIN] = { .type = NLA_U32 }, + [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, + [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, +}; + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1327,7 +1339,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; @@ -1411,7 +1424,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; @@ -1645,7 +1659,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, + rtm_tca_policy, NULL); if (err < 0) return err; @@ -1864,7 +1879,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index c07c30b916d5..793016d722ec 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2644,7 +2644,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, for (i = 1; i <= CAKE_QUEUES; i++) quantum_div[i] = 65535 / i; - q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data), + q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) goto nomem; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d74d00b29942..42191ed9902b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1048,7 +1048,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, if (!ctx->packet || !ctx->packet->has_cookie_echo) return; - /* fallthru */ + /* fall through */ case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: case SCTP_STATE_SHUTDOWN_RECEIVED: diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 418f03d0be90..645c16052052 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -609,16 +609,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, switch (evt) { case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) + if (netif_carrier_ok(dev) && netif_oper_up(dev)) { + test_and_set_bit_lock(0, &b->up); break; - /* else: fall through */ - case NETDEV_UP: - test_and_set_bit_lock(0, &b->up); - break; + } + /* fall through */ case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; + case NETDEV_UP: + test_and_set_bit_lock(0, &b->up); + break; case NETDEV_CHANGEMTU: if (tipc_mtu_bad(dev, 0)) { bearer_disable(net, b); diff --git a/net/tipc/link.c b/net/tipc/link.c index b1f0bee54eac..f6552e4f4b43 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -410,6 +410,11 @@ char *tipc_link_name(struct tipc_link *l) return l->name; } +u32 tipc_link_state(struct tipc_link *l) +{ + return l->state; +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -472,6 +477,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, l->in_session = false; l->bearer_id = bearer_id; l->tolerance = tolerance; + if (bc_rcvlink) + bc_rcvlink->tolerance = tolerance; l->net_plane = net_plane; l->advertised_mtu = mtu; l->mtu = mtu; @@ -838,12 +845,24 @@ static void link_prepare_wakeup(struct tipc_link *l) void tipc_link_reset(struct tipc_link *l) { + struct sk_buff_head list; + + __skb_queue_head_init(&list); + l->in_session = false; l->session++; l->mtu = l->advertised_mtu; + + spin_lock_bh(&l->wakeupq.lock); + skb_queue_splice_init(&l->wakeupq, &list); + spin_unlock_bh(&l->wakeupq.lock); + + spin_lock_bh(&l->inputq->lock); + skb_queue_splice_init(&list, l->inputq); + spin_unlock_bh(&l->inputq->lock); + __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); - skb_queue_splice_init(&l->wakeupq, l->inputq); __skb_queue_purge(&l->backlogq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; @@ -1021,7 +1040,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r, /* Detect repeated retransmit failures on same packet */ if (r->last_retransm != buf_seqno(skb)) { r->last_retransm = buf_seqno(skb); - r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance); + r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) { link_retransmit_failure(l, skb); if (link_is_bc_sndlink(l)) @@ -1380,6 +1399,36 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, __skb_queue_tail(xmitq, skb); } +void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 onode = tipc_own_addr(l->net); + struct tipc_msg *hdr, *ihdr; + struct sk_buff_head tnlq; + struct sk_buff *skb; + u32 dnode = l->addr; + + skb_queue_head_init(&tnlq); + skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, + INT_H_SIZE, BASIC_H_SIZE, + dnode, onode, 0, 0, 0); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; + } + + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, 1); + msg_set_bearer_id(hdr, l->peer_bearer_id); + + ihdr = (struct tipc_msg *)msg_data(hdr); + tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, dnode); + msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, xmitq); +} + /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets * with contents of the link's transmit and backlog queues. */ @@ -1476,6 +1525,9 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) return false; if (session != curr_session) return false; + /* Extra sanity check */ + if (!link_is_up(l) && msg_ack(hdr)) + return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; /* Accept only STATE with new sequence number */ @@ -1533,9 +1585,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; - + l->bc_rcvlink->tolerance = peers_tol; + } /* Update own priority if peer's priority is higher */ if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; @@ -1561,9 +1614,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; - + l->bc_rcvlink->tolerance = peers_tol; + } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { @@ -2180,6 +2234,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; + if (l->bc_rcvlink) + l->bc_rcvlink->tolerance = tol; if (link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } diff --git a/net/tipc/link.h b/net/tipc/link.h index 7bc494a33fdf..90488c538a4e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -88,6 +88,8 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct tipc_link **link); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); +void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, + struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); bool tipc_link_is_up(struct tipc_link *l); @@ -107,6 +109,7 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l); u16 tipc_link_acked(struct tipc_link *l); u32 tipc_link_id(struct tipc_link *l); char *tipc_link_name(struct tipc_link *l); +u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); int tipc_link_window(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 68014f1b6976..2afc4f8c37a7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -111,6 +111,7 @@ struct tipc_node { int action_flags; struct list_head list; int state; + bool failover_sent; u16 sync_point; int link_cnt; u16 working_links; @@ -680,6 +681,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->failover_sent = false; n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); @@ -911,6 +913,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool reset = true; char *if_name; unsigned long intv; + u16 session; *dupl_addr = false; *respond = false; @@ -997,9 +1000,10 @@ void tipc_node_check_dest(struct net *net, u32 addr, goto exit; if_name = strchr(b->name, ':') + 1; + get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, - b->window, mod(tipc_net(net)->random), + b->window, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, @@ -1615,6 +1619,14 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } + /* If parallel link was already down, and this happened before + * the tunnel link came up, FAILOVER was never sent. Ensure that + * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + */ + if (n->state != NODE_FAILINGOVER && !n->failover_sent) { + tipc_link_create_dummy_tnl_msg(l, xmitq); + n->failover_sent = true; + } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3f03ddd0e35b..49810fdff4c5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1196,6 +1196,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @skb: pointer to message buffer. */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); @@ -1213,7 +1214,16 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); - goto exit; + + /* State change is ignored if socket already awake, + * - convert msg to abort msg and add to inqueue + */ + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_CONN_MSG); + msg_set_size(hdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + __skb_queue_tail(inputq, skb); + return; } tsk->probe_unacked = false; @@ -1419,8 +1429,10 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) /* Handle implicit connection setup */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dlen); - if (dlen && (dlen == rc)) + if (dlen && dlen == rc) { + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } return rc; } @@ -1934,7 +1946,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, switch (msg_user(hdr)) { case CONN_MANAGER: - tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4b8ec659e797..176edfefcbaa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3756,6 +3756,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return false; /* check availability */ + ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else @@ -10230,7 +10231,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; - int i, n; + int i, n, low_index; int err; /* RSSI reporting disabled? */ @@ -10267,10 +10268,19 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (last < wdev->cqm_config->rssi_thresholds[i]) break; - low = i > 0 ? - (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; - high = i < n ? - (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + low_index = i - 1; + if (low_index >= 0) { + low_index = array_index_nospec(low_index, n); + low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + } else { + low = S32_MIN; + } + if (i < n) { + i = array_index_nospec(i, n); + high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + } else { + high = S32_MAX; + } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2f702adf2912..24cfa2776f50 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2661,11 +2661,12 @@ static void reg_process_hint(struct regulatory_request *reg_request) { struct wiphy *wiphy = NULL; enum reg_request_treatment treatment; + enum nl80211_reg_initiator initiator = reg_request->initiator; if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - switch (reg_request->initiator) { + switch (initiator) { case NL80211_REGDOM_SET_BY_CORE: treatment = reg_process_hint_core(reg_request); break; @@ -2683,7 +2684,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) treatment = reg_process_hint_country_ie(wiphy, reg_request); break; default: - WARN(1, "invalid initiator %d\n", reg_request->initiator); + WARN(1, "invalid initiator %d\n", initiator); goto out_free; } @@ -2698,7 +2699,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) */ if (treatment == REG_REQ_ALREADY_SET && wiphy && wiphy->regulatory_flags & REGULATORY_STRICT_REG) { - wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_update_regulatory(wiphy, initiator); wiphy_all_share_dfs_chan_state(wiphy); reg_check_channels(); } @@ -2867,6 +2868,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d36c3eb7b931..d0e7472dd9fd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1058,13 +1058,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } +/* + * Update RX channel information based on the available frame payload + * information. This is mainly for the 2.4 GHz band where frames can be received + * from neighboring channels and the Beacon frames use the DSSS Parameter Set + * element to indicate the current (transmitting) channel, but this might also + * be needed on other bands if RX frequency does not match with the actual + * operating channel of a BSS. + */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel) + struct ieee80211_channel *channel, + enum nl80211_bss_scan_width scan_width) { const u8 *tmp; u32 freq; int channel_number = -1; + struct ieee80211_channel *alt_channel; tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); if (tmp && tmp[1] == 1) { @@ -1078,16 +1088,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } } - if (channel_number < 0) + if (channel_number < 0) { + /* No channel information in frame payload */ return channel; + } freq = ieee80211_channel_to_frequency(channel_number, channel->band); - channel = ieee80211_get_channel(wiphy, freq); - if (!channel) - return NULL; - if (channel->flags & IEEE80211_CHAN_DISABLED) + alt_channel = ieee80211_get_channel(wiphy, freq); + if (!alt_channel) { + if (channel->band == NL80211_BAND_2GHZ) { + /* + * Better not allow unexpected channels when that could + * be going beyond the 1-11 range (e.g., discovering + * BSS on channel 12 when radio is configured for + * channel 11. + */ + return NULL; + } + + /* No match for the payload channel number - ignore it */ + return channel; + } + + if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || + scan_width == NL80211_BSS_CHAN_WIDTH_5) { + /* + * Ignore channel number in 5 and 10 MHz channels where there + * may not be an n:1 or 1:n mapping between frequencies and + * channel numbers. + */ + return channel; + } + + /* + * Use the channel determined through the payload channel number + * instead of the RX channel reported by the driver. + */ + if (alt_channel->flags & IEEE80211_CHAN_DISABLED) return NULL; - return channel; + return alt_channel; } /* Returned bss is reference counted and must be cleaned up appropriately. */ @@ -1112,7 +1151,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, (data->signal < 0 || data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, + data->scan_width); if (!channel) return NULL; @@ -1210,7 +1250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, data->chan); + ielen, data->chan, data->scan_width); if (!channel) return NULL; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 167f7025ac98..06943d9c9835 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1278,12 +1278,16 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) - return -EOPNOTSUPP; + if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { + err = -EOPNOTSUPP; + goto free; + } rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); - return 0; +free: + cfg80211_sinfo_release_content(&sinfo); + return err; } /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ @@ -1293,7 +1297,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; - static struct station_info sinfo; + static struct station_info sinfo = {}; u8 bssid[ETH_ALEN]; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) @@ -1352,6 +1356,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; + cfg80211_sinfo_release_content(&sinfo); + return &wstats; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b89c9c7f8c5c..be3520e429c9 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -458,6 +458,7 @@ resume: XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } + crypto_done = false; } while (!err); err = xfrm_rcv_cb(skb, family, x->type->proto, 0); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 45ba07ab3e4f..261995d37ced 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -100,6 +100,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + goto error_nolock; + } if (xfrm_offload(skb)) { x->type_offload->encap(x, skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3110c3fbee20..f094d4b3520d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2491,6 +2491,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); + return 0; + } dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4791aa8b8185..df7ca2dabc48 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1396,10 +1402,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; @@ -1480,6 +1492,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { diff --git a/samples/Kconfig b/samples/Kconfig index bd133efc1a56..ad1ec7016d4c 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -1,5 +1,6 @@ menuconfig SAMPLES bool "Sample kernel code" + depends on !UML help You can build and test sample kernel code here. diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5a2d1c9578a0..54da4b070db3 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -219,7 +219,7 @@ else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ - "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ + "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)" \ "$(LD) $(KBUILD_LDFLAGS)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index b5282cbbe489..617ff1aa818f 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -145,9 +145,11 @@ int snd_hdac_i915_init(struct hdac_bus *bus) if (!acomp->ops) { request_module("i915"); /* 10s timeout */ - wait_for_completion_timeout(&bind_complete, 10 * 1000); + wait_for_completion_timeout(&bind_complete, + msecs_to_jiffies(10 * 1000)); } if (!acomp->ops) { + dev_info(bus->dev, "couldn't bind with audio component\n"); snd_hdac_acomp_exit(bus); return -ENODEV; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1d117f00d04d..3ac7ba9b342d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6409,6 +6409,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..fd23d5778ea1 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -377,6 +377,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index d78aed86af09..8ff8cb1a11f4 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -234,6 +234,7 @@ int main(int argc, char *argv[]) break; default: + error = HV_E_FAIL; syslog(LOG_ERR, "Unknown operation: %d", buffer.hdr.operation); diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 07548de5c988..251be353f950 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 439b8a27488d..195ba486640f 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1325,7 +1325,7 @@ class Tui(object): msg = '' while True: self.screen.erase() - self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' % + self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' % DELAY_DEFAULT, curses.A_BOLD) self.screen.addstr(4, 0, msg) self.screen.addstr(2, 0, 'Change delay from %.1fs to ' % diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 120037496f77..5afb11b30fca 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -36,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void) __tracing_path_set("", mnt); - return mnt; + return tracing_path; } static const char *tracing_path_debugfs_mount(void) @@ -49,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void) __tracing_path_set("tracing/", mnt); - return mnt; + return tracing_path; } const char *tracing_path_mount(void) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6d1a03c7523..e30d20fb482d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -833,7 +833,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') endif endif ifndef JDIR diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 92514fb3689f..2f3bf025e305 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -635,7 +635,7 @@ $(LIBPERF_IN): prepare FORCE $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c0703979c51d..257c9c18cb7e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -980,6 +980,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, + .event_update = perf_event__process_event_update, .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index d40498f2cb1e..635c09fda1d9 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -188,7 +188,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -199,7 +199,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -210,7 +210,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -221,7 +221,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -232,7 +232,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -243,7 +243,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -254,7 +254,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -265,7 +265,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index 16034bfd06dd..8755693d86c6 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -187,7 +187,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -198,7 +198,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -209,7 +209,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -220,7 +220,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -231,7 +231,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -242,7 +242,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -253,7 +253,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -264,7 +264,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0cd42150f712..bc646185f8d9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max } *size += sizeof(struct cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } @@ -1560,26 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } -try_again: + al->map = map_groups__find(mg, al->addr); - if (al->map == NULL) { - /* - * If this is outside of all known maps, and is a negative - * address, try to look it up in the kernel dso, as it might be - * a vsyscall or vdso (which executes in user-mode). - * - * XXX This is nasty, we should have a symbol list in the - * "[vdso]" dso, but for now lets use the old trick of looking - * in the whole kernel symbol list. - */ - if (cpumode == PERF_RECORD_MISC_USER && machine && - mg != &machine->kmaps && - machine__kernel_ip(machine, al->addr)) { - mg = &machine->kmaps; - load_map = true; - goto try_again; - } - } else { + if (al->map != NULL) { /* * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cb7f01059940..29d7b97f66fb 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1089,6 +1089,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } + if (evsel->own_cpus) + evsel->attr.read_format |= PERF_FORMAT_ID; + /* * Apply event specific term settings, * it overloads any global configuration. diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index afd68524ffa9..7799788f662f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -930,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - __u64 w = 0; - int fbit; - - for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) - w |= (1ULL << fbit); + int w; - return w; + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; } /* diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 09d6746e6ec8..e767c4a9d4d2 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso, struct symbol *inline_sym; char *demangled = NULL; + if (!funcname) + funcname = "??"; + if (dso) { demangled = dso__demangle_sym(dso, 0, funcname); if (demangled) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 08c341b49760..e101af52d1d6 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # This test is for checking rtnetlink callpaths, and get as much coverage as possible. # diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 850767befa47..99e537ab5ad9 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Run a series of udpgso benchmarks diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 642d4e12abea..eec2663261f2 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -56,15 +56,13 @@ unsigned int yield_mod_cnt, nr_abort; printf(fmt, ## __VA_ARGS__); \ } while (0) -#if defined(__x86_64__) || defined(__i386__) +#ifdef __i386__ #define INJECT_ASM_REG "eax" #define RSEQ_INJECT_CLOBBER \ , INJECT_ASM_REG -#ifdef __i386__ - #define RSEQ_INJECT_ASM(n) \ "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ @@ -76,9 +74,16 @@ unsigned int yield_mod_cnt, nr_abort; #elif defined(__x86_64__) +#define INJECT_ASM_REG_P "rax" +#define INJECT_ASM_REG "eax" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG_P \ + , INJECT_ASM_REG + #define RSEQ_INJECT_ASM(n) \ - "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \ - "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ + "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ + "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ "jz 333f\n\t" \ "222:\n\t" \ @@ -86,10 +91,6 @@ unsigned int yield_mod_cnt, nr_abort; "jnz 222b\n\t" \ "333:\n\t" -#else -#error "Unsupported architecture" -#endif - #elif defined(__s390__) #define RSEQ_INJECT_INPUT \ diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 235259011704..35edd61d1663 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -17,6 +17,7 @@ #include <errno.h> #include <sched.h> #include <stdbool.h> +#include <limits.h> #ifndef SYS_getcpu # ifdef __x86_64__ @@ -31,6 +32,14 @@ int nerrs = 0; +typedef int (*vgettime_t)(clockid_t, struct timespec *); + +vgettime_t vdso_clock_gettime; + +typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); + +vgtod_t vdso_gettimeofday; + typedef long (*getcpu_t)(unsigned *, unsigned *, void *); getcpu_t vgetcpu; @@ -95,6 +104,15 @@ static void fill_function_pointers() printf("Warning: failed to find getcpu in vDSO\n"); vgetcpu = (getcpu_t) vsyscall_getcpu(); + + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + printf("Warning: failed to find clock_gettime in vDSO\n"); + + vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gettimeofday) + printf("Warning: failed to find gettimeofday in vDSO\n"); + } static long sys_getcpu(unsigned * cpu, unsigned * node, @@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node, return syscall(__NR_getcpu, cpu, node, cache); } +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(__NR_clock_gettime, id, ts); +} + +static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return syscall(__NR_gettimeofday, tv, tz); +} + static void test_getcpu(void) { printf("[RUN]\tTesting getcpu...\n"); @@ -155,10 +183,154 @@ static void test_getcpu(void) } } +static bool ts_leq(const struct timespec *a, const struct timespec *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_nsec <= b->tv_nsec; +} + +static bool tv_leq(const struct timeval *a, const struct timeval *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_usec <= b->tv_usec; +} + +static char const * const clocknames[] = { + [0] = "CLOCK_REALTIME", + [1] = "CLOCK_MONOTONIC", + [2] = "CLOCK_PROCESS_CPUTIME_ID", + [3] = "CLOCK_THREAD_CPUTIME_ID", + [4] = "CLOCK_MONOTONIC_RAW", + [5] = "CLOCK_REALTIME_COARSE", + [6] = "CLOCK_MONOTONIC_COARSE", + [7] = "CLOCK_BOOTTIME", + [8] = "CLOCK_REALTIME_ALARM", + [9] = "CLOCK_BOOTTIME_ALARM", + [10] = "CLOCK_SGI_CYCLE", + [11] = "CLOCK_TAI", +}; + +static void test_one_clock_gettime(int clock, const char *name) +{ + struct timespec start, vdso, end; + int vdso_ret, end_ret; + + printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); + + if (sys_clock_gettime(clock, &start) < 0) { + if (errno == EINVAL) { + vdso_ret = vdso_clock_gettime(clock, &vdso); + if (vdso_ret == -EINVAL) { + printf("[OK]\tNo such clock.\n"); + } else { + printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); + nerrs++; + } + } else { + printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); + } + return; + } + + vdso_ret = vdso_clock_gettime(clock, &vdso); + end_ret = sys_clock_gettime(clock, &end); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + (unsigned long long)start.tv_sec, start.tv_nsec, + (unsigned long long)vdso.tv_sec, vdso.tv_nsec, + (unsigned long long)end.tv_sec, end.tv_nsec); + + if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } +} + +static void test_clock_gettime(void) +{ + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); + clock++) { + test_one_clock_gettime(clock, clocknames[clock]); + } + + /* Also test some invalid clock ids */ + test_one_clock_gettime(-1, "invalid"); + test_one_clock_gettime(INT_MIN, "invalid"); + test_one_clock_gettime(INT_MAX, "invalid"); +} + +static void test_gettimeofday(void) +{ + struct timeval start, vdso, end; + struct timezone sys_tz, vdso_tz; + int vdso_ret, end_ret; + + if (!vdso_gettimeofday) + return; + + printf("[RUN]\tTesting gettimeofday...\n"); + + if (sys_gettimeofday(&start, &sys_tz) < 0) { + printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); + nerrs++; + return; + } + + vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); + end_ret = sys_gettimeofday(&end, NULL); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", + (unsigned long long)start.tv_sec, start.tv_usec, + (unsigned long long)vdso.tv_sec, vdso.tv_usec, + (unsigned long long)end.tv_sec, end.tv_usec); + + if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } + + if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && + sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { + printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", + sys_tz.tz_minuteswest, sys_tz.tz_dsttime); + } else { + printf("[FAIL]\ttimezones do not match\n"); + nerrs++; + } + + /* And make sure that passing NULL for tz doesn't crash. */ + vdso_gettimeofday(&vdso, NULL); +} + int main(int argc, char **argv) { fill_function_pointers(); + test_clock_gettime(); + test_gettimeofday(); + + /* + * Test getcpu() last so that, if something goes wrong setting affinity, + * we still run the other tests. + */ test_getcpu(); return nerrs ? 1 : 0; |