summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/accounting/delay-accounting.rst55
-rw-r--r--Documentation/accounting/psi.rst3
-rw-r--r--Documentation/admin-guide/gpio/index.rst1
-rw-r--r--Documentation/arm/marvell.rst2
-rw-r--r--Documentation/arm64/silicon-errata.rst12
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ps8640.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,td.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml8
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml1
-rw-r--r--Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml1
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys.yaml6
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml12
-rw-r--r--Documentation/devicetree/bindings/mfd/cirrus,madera.yaml3
-rw-r--r--Documentation/devicetree/bindings/mfd/google,cros-ec.yaml1
-rw-r--r--Documentation/devicetree/bindings/mmc/arm,pl18x.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,m_can.yaml52
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt2
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt9
-rw-r--r--Documentation/devicetree/bindings/net/oxnas-dwmac.txt3
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem.yaml17
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml9
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml10
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-restart.yaml4
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm.yaml2
-rw-r--r--Documentation/devicetree/bindings/rtc/epson,rx8900.yaml1
-rw-r--r--Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml9
-rw-r--r--Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml1
-rw-r--r--Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/samsung-i2s.yaml6
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml8
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml10
-rw-r--r--Documentation/driver-api/firewire.rst4
-rw-r--r--Documentation/filesystems/ceph.rst25
-rw-r--r--Documentation/index.rst1
-rw-r--r--Documentation/kernel-hacking/locking.rst2
-rw-r--r--Documentation/riscv/vm-layout.rst12
-rw-r--r--Documentation/staging/tee.rst4
-rw-r--r--Documentation/tools/index.rst20
-rw-r--r--Documentation/tools/rtla/index.rst26
-rw-r--r--Documentation/trace/ftrace.rst2
-rw-r--r--Documentation/virt/kvm/api.rst10
-rw-r--r--Documentation/vm/cleancache.rst296
-rw-r--r--Documentation/vm/frontswap.rst31
-rw-r--r--Documentation/vm/index.rst1
-rw-r--r--Documentation/vm/page_table_check.rst2
-rw-r--r--MAINTAINERS51
-rw-r--r--Makefile6
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/alpha/include/asm/bitops.h2
-rw-r--r--arch/alpha/kernel/rtc.c7
-rw-r--r--arch/alpha/kernel/srm_env.c4
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/include/asm/bitops.h1
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/configs/bcm2835_defconfig1
-rw-r--r--arch/arm/configs/qcom_defconfig1
-rw-r--r--arch/arm/include/asm/assembler.h2
-rw-r--r--arch/arm/include/asm/bitops.h1
-rw-r--r--arch/arm/include/asm/processor.h1
-rw-r--r--arch/arm/include/asm/uaccess.h10
-rw-r--r--arch/arm/kernel/atags_proc.c2
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/probes/kprobes/Makefile3
-rw-r--r--arch/arm64/Kconfig102
-rw-r--r--arch/arm64/include/asm/atomic_lse.h2
-rw-r--r--arch/arm64/include/asm/bitops.h1
-rw-r--r--arch/arm64/include/asm/cmpxchg.h2
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/kernel/cpu_errata.c29
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/stacktrace.c5
-rw-r--r--arch/arm64/kernel/vdso/Makefile5
-rw-r--r--arch/arm64/kvm/hyp/exception.c5
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c18
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c17
-rw-r--r--arch/arm64/mm/extable.c4
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/tools/cpucaps3
-rw-r--r--arch/csky/include/asm/bitops.h1
-rw-r--r--arch/h8300/include/asm/bitops.h1
-rw-r--r--arch/hexagon/include/asm/bitops.h1
-rw-r--r--arch/ia64/Kconfig11
-rw-r--r--arch/ia64/include/asm/bitops.h2
-rw-r--r--arch/ia64/kernel/salinfo.c10
-rw-r--r--arch/ia64/pci/fixup.c4
-rw-r--r--arch/m68k/configs/amiga_defconfig1
-rw-r--r--arch/m68k/configs/apollo_defconfig1
-rw-r--r--arch/m68k/configs/atari_defconfig1
-rw-r--r--arch/m68k/configs/bvme6000_defconfig1
-rw-r--r--arch/m68k/configs/hp300_defconfig1
-rw-r--r--arch/m68k/configs/mac_defconfig1
-rw-r--r--arch/m68k/configs/multi_defconfig1
-rw-r--r--arch/m68k/configs/mvme147_defconfig1
-rw-r--r--arch/m68k/configs/mvme16x_defconfig1
-rw-r--r--arch/m68k/configs/q40_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig1
-rw-r--r--arch/m68k/configs/sun3x_defconfig1
-rw-r--r--arch/m68k/include/asm/bitops.h2
-rw-r--r--arch/mips/Kconfig11
-rw-r--r--arch/mips/include/asm/asm.h4
-rw-r--r--arch/mips/include/asm/bitops.h1
-rw-r--r--arch/mips/include/asm/ftrace.h4
-rw-r--r--arch/mips/include/asm/r4kcache.h4
-rw-r--r--arch/mips/include/asm/unaligned-emul.h176
-rw-r--r--arch/mips/kernel/mips-r2-to-r6-emul.c104
-rw-r--r--arch/mips/kernel/r2300_fpu.S6
-rw-r--r--arch/mips/kernel/r4k_fpu.S2
-rw-r--r--arch/mips/kernel/relocate_kernel.S22
-rw-r--r--arch/mips/kernel/scall32-o32.S10
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-n64.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S10
-rw-r--r--arch/mips/kernel/syscall.c8
-rw-r--r--arch/mips/lib/csum_partial.S4
-rw-r--r--arch/mips/lib/memcpy.S4
-rw-r--r--arch/mips/lib/memset.S2
-rw-r--r--arch/mips/lib/strncpy_user.S4
-rw-r--r--arch/mips/lib/strnlen_user.S2
-rw-r--r--arch/mips/loongson64/vbios_quirk.c9
-rw-r--r--arch/mips/mm/init.c14
-rw-r--r--arch/openrisc/include/asm/bitops.h1
-rw-r--r--arch/parisc/include/asm/bitops.h1
-rw-r--r--arch/parisc/include/asm/processor.h1
-rw-r--r--arch/parisc/kernel/setup.c15
-rw-r--r--arch/parisc/kernel/toc.c3
-rw-r--r--arch/powerpc/Kconfig17
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi2
-rw-r--r--arch/powerpc/boot/dts/wii.dts5
-rw-r--r--arch/powerpc/configs/gamecube_defconfig2
-rw-r--r--arch/powerpc/configs/wii_defconfig2
-rw-r--r--arch/powerpc/include/asm/bitops.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/fixmap.h6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/syscall.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/kernel/interrupt_64.S2
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c97
-rw-r--r--arch/powerpc/kernel/time.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c10
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c59
-rw-r--r--arch/powerpc/mm/pgtable.c9
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c29
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c9
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c29
-rw-r--r--arch/powerpc/perf/core-book3s.c75
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c4
-rw-r--r--arch/riscv/Kconfig62
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts5
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig1
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig1
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig2
-rw-r--r--arch/riscv/include/asm/bitops.h1
-rw-r--r--arch/riscv/include/asm/cpu_ops.h2
-rw-r--r--arch/riscv/include/asm/cpu_ops_sbi.h25
-rw-r--r--arch/riscv/include/asm/csr.h3
-rw-r--r--arch/riscv/include/asm/fixmap.h1
-rw-r--r--arch/riscv/include/asm/kasan.h11
-rw-r--r--arch/riscv/include/asm/page.h16
-rw-r--r--arch/riscv/include/asm/pgalloc.h40
-rw-r--r--arch/riscv/include/asm/pgtable-64.h108
-rw-r--r--arch/riscv/include/asm/pgtable.h65
-rw-r--r--arch/riscv/include/asm/sbi.h19
-rw-r--r--arch/riscv/include/asm/smp.h2
-rw-r--r--arch/riscv/include/asm/sparsemem.h6
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/asm-offsets.c3
-rw-r--r--arch/riscv/kernel/cpu.c23
-rw-r--r--arch/riscv/kernel/cpu_ops.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c27
-rw-r--r--arch/riscv/kernel/head.S38
-rw-r--r--arch/riscv/kernel/head.h6
-rw-r--r--arch/riscv/kernel/ptrace.c4
-rw-r--r--arch/riscv/kernel/sbi.c189
-rw-r--r--arch/riscv/kernel/setup.c10
-rw-r--r--arch/riscv/kernel/smpboot.c2
-rw-r--r--arch/riscv/kvm/mmu.c4
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c11
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c11
-rw-r--r--arch/riscv/kvm/vmid.c4
-rw-r--r--arch/riscv/mm/cacheflush.c5
-rw-r--r--arch/riscv/mm/context.c4
-rw-r--r--arch/riscv/mm/init.c378
-rw-r--r--arch/riscv/mm/kasan_init.c248
-rw-r--r--arch/riscv/mm/tlbflush.c9
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c2
-rw-r--r--arch/s390/Kconfig16
-rw-r--r--arch/s390/configs/debug_defconfig21
-rw-r--r--arch/s390/configs/defconfig17
-rw-r--r--arch/s390/configs/zfcpdump_defconfig3
-rw-r--r--arch/s390/hypfs/hypfs_vm.c6
-rw-r--r--arch/s390/include/asm/bitops.h1
-rw-r--r--arch/s390/include/asm/cpu_mf.h4
-rw-r--r--arch/s390/include/asm/uaccess.h120
-rw-r--r--arch/s390/kernel/module.c37
-rw-r--r--arch/s390/kernel/nmi.c27
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c4
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c6
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/test_modules.c35
-rw-r--r--arch/s390/lib/test_modules.h50
-rw-r--r--arch/s390/lib/test_modules_helpers.c13
-rw-r--r--arch/s390/lib/uaccess.c24
-rw-r--r--arch/sh/include/asm/bitops.h1
-rw-r--r--arch/sh/mm/alignment.c4
-rw-r--r--arch/sparc/Kconfig12
-rw-r--r--arch/sparc/include/asm/bitops_32.h1
-rw-r--r--arch/sparc/include/asm/bitops_64.h2
-rw-r--r--arch/sparc/kernel/led.c8
-rw-r--r--arch/sparc/kernel/smp_64.c103
-rw-r--r--arch/x86/Kconfig21
-rw-r--r--arch/x86/events/intel/core.c15
-rw-r--r--arch/x86/events/intel/lbr.c168
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore.h3
-rw-r--r--arch/x86/events/intel/uncore_discovery.c4
-rw-r--r--arch/x86/events/intel/uncore_discovery.h2
-rw-r--r--arch/x86/events/intel/uncore_snb.c214
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/events/rapl.c9
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/early-quirks.c10
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/setup_percpu.c66
-rw-r--r--arch/x86/kvm/cpuid.c165
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu/mmu.c31
-rw-r--r--arch/x86/kvm/mmu/spte.c1
-rw-r--r--arch/x86/kvm/mmu/spte.h42
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c6
-rw-r--r--arch/x86/kvm/pmu.c33
-rw-r--r--arch/x86/kvm/svm/avic.c123
-rw-r--r--arch/x86/kvm/svm/nested.c9
-rw-r--r--arch/x86/kvm/svm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/sev.c9
-rw-r--r--arch/x86/kvm/svm/svm.c667
-rw-r--r--arch/x86/kvm/svm/svm.h24
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h12
-rw-r--r--arch/x86/kvm/vmx/capabilities.h5
-rw-r--r--arch/x86/kvm/vmx/evmcs.c4
-rw-r--r--arch/x86/kvm/vmx/evmcs.h48
-rw-r--r--arch/x86/kvm/vmx/nested.c82
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c20
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c183
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h8
-rw-r--r--arch/x86/kvm/vmx/vmcs12.c4
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h6
-rw-r--r--arch/x86/kvm/vmx/vmx.c115
-rw-r--r--arch/x86/kvm/vmx/vmx.h3
-rw-r--r--arch/x86/kvm/x86.c164
-rw-r--r--arch/x86/kvm/x86.h1
-rw-r--r--arch/x86/kvm/xen.c10
-rw-r--r--arch/x86/pci/fixup.c4
-rw-r--r--arch/x86/um/Kconfig1
-rw-r--r--arch/xtensa/include/asm/bitops.h1
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c4
-rw-r--r--block/bdev.c5
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-core.c25
-rw-r--r--block/blk-ia-ranges.c2
-rw-r--r--block/blk-mq-tag.c40
-rw-r--r--block/blk-mq.c5
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/mq-deadline.c4
-rw-r--r--certs/Makefile4
-rw-r--r--drivers/acpi/cppc_acpi.c11
-rw-r--r--drivers/acpi/dptf/dptf_pch_fivr.c1
-rw-r--r--drivers/acpi/dptf/dptf_power.c2
-rw-r--r--drivers/acpi/dptf/int340x_thermal.c6
-rw-r--r--drivers/acpi/fan.h1
-rw-r--r--drivers/acpi/proc.c2
-rw-r--r--drivers/ata/pata_octeon_cf.c2
-rw-r--r--drivers/ata/pata_platform.c2
-rw-r--r--drivers/atm/iphase.c4
-rw-r--r--drivers/base/arch_numa.c68
-rw-r--r--drivers/base/firmware_loader/fallback.c7
-rw-r--r--drivers/base/firmware_loader/fallback.h11
-rw-r--r--drivers/base/firmware_loader/fallback_table.c25
-rw-r--r--drivers/base/power/trace.c6
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/brd.c73
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/rbd.c5
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/cdrom/cdrom.c23
-rw-r--r--drivers/char/hpet.c22
-rw-r--r--drivers/char/random.c14
-rw-r--r--drivers/clk/clk-si5341.c2
-rw-r--r--drivers/clk/mediatek/clk-mt7986-apmixed.c2
-rw-r--r--drivers/clk/mediatek/clk-mt7986-infracfg.c2
-rw-r--r--drivers/clk/mediatek/clk-mt7986-topckgen.c2
-rw-r--r--drivers/clk/visconti/pll.c3
-rw-r--r--drivers/connector/cn_proc.c2
-rw-r--r--drivers/counter/counter-core.c15
-rw-r--r--drivers/firmware/efi/efi.c7
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c6
-rw-r--r--drivers/firmware/efi/libstub/efi-stub.c2
-rw-r--r--drivers/gpio/gpio-idt3243x.c6
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c6
-rw-r--r--drivers/gpio/gpio-sim.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.h4
-rw-r--r--drivers/gpu/drm/ast/ast_tables.h2
-rw-r--r--drivers/gpu/drm/drm_atomic.c12
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c1
-rw-r--r--drivers/gpu/drm/drm_mm.c4
-rw-r--r--drivers/gpu/drm/drm_modeset_lock.c9
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c12
-rw-r--r--drivers/gpu/drm/drm_privacy_screen_x86.c3
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c108
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h2
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c22
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h19
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c3
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c3
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c26
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c18
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c11
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.c7
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c4
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.c7
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c5
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c3
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_devfreq.c21
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c22
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c14
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c33
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c2
-rw-r--r--drivers/hid/hid-ids.h1
-rw-r--r--drivers/hid/hid-input.c2
-rw-r--r--drivers/hid/hid-vivaldi.c41
-rw-r--r--drivers/hid/uhid.c49
-rw-r--r--drivers/hid/wacom_wac.c39
-rw-r--r--drivers/hv/hv_balloon.c7
-rw-r--r--drivers/hwmon/adt7470.c3
-rw-r--r--drivers/hwmon/dell-smm-hwmon.c4
-rw-r--r--drivers/hwmon/lm90.c21
-rw-r--r--drivers/hwmon/ltc2992.c3
-rw-r--r--drivers/hwmon/nct6775.c6
-rw-r--r--drivers/hwmon/pmbus/ir38064.c2
-rw-r--r--drivers/iio/adc/ad7124.c2
-rw-r--r--drivers/infiniband/hw/irdma/hw.c16
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c3
-rw-r--r--drivers/macintosh/mac_hid.c24
-rw-r--r--drivers/md/dm.c20
-rw-r--r--drivers/media/cec/core/cec-core.c2
-rw-r--r--drivers/media/mc/mc-devnode.c2
-rw-r--r--drivers/message/fusion/mptbase.c149
-rw-r--r--drivers/message/fusion/mptctl.c82
-rw-r--r--drivers/message/fusion/mptlan.c90
-rw-r--r--drivers/message/fusion/mptsas.c94
-rw-r--r--drivers/misc/eeprom/at25.c4
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c2
-rw-r--r--drivers/net/bonding/bond_main.c38
-rw-r--r--drivers/net/bonding/bond_procfs.c8
-rw-r--r--drivers/net/can/flexcan/flexcan-core.c1
-rw-r--r--drivers/net/can/flexcan/flexcan.h2
-rw-r--r--drivers/net/can/m_can/m_can.c6
-rw-r--r--drivers/net/can/m_can/tcan4x5x-regmap.c2
-rw-r--r--drivers/net/ethernet/3com/typhoon.c6
-rw-r--r--drivers/net/ethernet/8390/etherh.c6
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c31
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.h18
-rw-r--r--drivers/net/ethernet/amd/declance.c4
-rw-r--r--drivers/net/ethernet/apple/bmac.c5
-rw-r--r--drivers/net/ethernet/apple/mace.c16
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_filters.c6
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c10
-rw-r--r--drivers/net/ethernet/broadcom/sb1250-mac.c4
-rw-r--r--drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c3
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx.c12
-rw-r--r--drivers/net/ethernet/freescale/xgmac_mdio.c28
-rw-r--r--drivers/net/ethernet/google/gve/gve.h2
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c6
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c3
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c3
-rw-r--r--drivers/net/ethernet/i825xx/ether1.c4
-rw-r--r--drivers/net/ethernet/i825xx/sni_82596.c3
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c167
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h9
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c44
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c103
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h70
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.c66
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c14
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c22
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c7
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera.h1
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_hw.c4
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c1
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c24
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router_hw.c40
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router_hw.h3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c5
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c11
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c6
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot_flower.c44
-rw-r--r--drivers/net/ethernet/mscc/ocelot_net.c6
-rw-r--r--drivers/net/ethernet/seeq/ether3.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c101
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c42
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c39
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c3
-rw-r--r--drivers/net/ethernet/ti/cpsw.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c4
-rw-r--r--drivers/net/ethernet/tundra/tsi108_eth.c35
-rw-r--r--drivers/net/ethernet/vertexcom/Kconfig2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c135
-rw-r--r--drivers/net/hamradio/yam.c4
-rw-r--r--drivers/net/ipa/ipa_endpoint.c28
-rw-r--r--drivers/net/ipa/ipa_endpoint.h17
-rw-r--r--drivers/net/phy/at803x.c2
-rw-r--r--drivers/net/phy/broadcom.c1
-rw-r--r--drivers/net/phy/marvell.c56
-rw-r--r--drivers/net/phy/micrel.c36
-rw-r--r--drivers/net/phy/phy_device.c6
-rw-r--r--drivers/net/phy/sfp-bus.c5
-rw-r--r--drivers/net/phy/sfp.c25
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/smsc95xx.c3
-rw-r--r--drivers/net/virtio_net.c2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c2
-rw-r--r--drivers/net/wireless/cisco/airo.c22
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_ap.c16
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_download.c2
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_proc.c24
-rw-r--r--drivers/net/wireless/ray_cs.c2
-rw-r--r--drivers/net/wwan/mhi_wwan_mbim.c4
-rw-r--r--drivers/nfc/pn544/i2c.c2
-rw-r--r--drivers/nfc/st21nfca/se.c10
-rw-r--r--drivers/nubus/proc.c36
-rw-r--r--drivers/nvme/host/fabrics.c3
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/of/base.c131
-rw-r--r--drivers/of/device.c2
-rw-r--r--drivers/parisc/led.c4
-rw-r--r--drivers/parisc/pdc_stable.c4
-rw-r--r--drivers/pci/controller/dwc/pci-dra7xx.c2
-rw-r--r--drivers/pci/controller/pcie-mt7621.c11
-rw-r--r--drivers/pci/proc.c10
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c4
-rw-r--r--drivers/platform/x86/toshiba_acpi.c16
-rw-r--r--drivers/pnp/isapnp/proc.c2
-rw-r--r--drivers/pnp/pnpbios/proc.c4
-rw-r--r--drivers/pwm/core.c139
-rw-r--r--drivers/pwm/pwm-img.c35
-rw-r--r--drivers/pwm/pwm-twl.c62
-rw-r--r--drivers/pwm/pwm-vt8500.c57
-rw-r--r--drivers/remoteproc/Kconfig4
-rw-r--r--drivers/remoteproc/qcom_q6v5.c1
-rw-r--r--drivers/rpmsg/rpmsg_char.c22
-rw-r--r--drivers/rtc/Kconfig24
-rw-r--r--drivers/rtc/Makefile2
-rw-r--r--drivers/rtc/dev.c6
-rw-r--r--drivers/rtc/rtc-cmos.c201
-rw-r--r--drivers/rtc/rtc-da9063.c16
-rw-r--r--drivers/rtc/rtc-ftrtc010.c8
-rw-r--r--drivers/rtc/rtc-gamecube.c377
-rw-r--r--drivers/rtc/rtc-mc146818-lib.c182
-rw-r--r--drivers/rtc/rtc-pcf2127.c2
-rw-r--r--drivers/rtc/rtc-pcf85063.c97
-rw-r--r--drivers/rtc/rtc-pxa.c4
-rw-r--r--drivers/rtc/rtc-rs5c372.c185
-rw-r--r--drivers/rtc/rtc-rv8803.c6
-rw-r--r--drivers/rtc/rtc-sunplus.c362
-rw-r--r--drivers/s390/scsi/zfcp_fc.c13
-rw-r--r--drivers/scsi/3w-sas.c4
-rw-r--r--drivers/scsi/53c700.c1
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/aic7xxx/aic79xx_osm.c6
-rw-r--r--drivers/scsi/bfa/bfad.c6
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_fcoe.c20
-rw-r--r--drivers/scsi/elx/efct/efct_driver.c11
-rw-r--r--drivers/scsi/elx/libefc/efc_els.c8
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c8
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c10
-rw-r--r--drivers/scsi/megaraid.c84
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c11
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h4
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_ctl.c87
-rw-r--r--drivers/scsi/myrs.c3
-rw-r--r--drivers/scsi/pcmcia/nsp_cs.c3
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c7
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.h3
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c23
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.h6
-rw-r--r--drivers/scsi/qedf/qedf_io.c1
-rw-r--r--drivers/scsi/qedf/qedf_main.c9
-rw-r--r--drivers/scsi/scsi_lib.c8
-rw-r--r--drivers/scsi/scsi_proc.c4
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/sg.c35
-rw-r--r--drivers/scsi/ufs/ufs-mediatek.c2
-rw-r--r--drivers/scsi/ufs/ufshcd-pltfrm.c7
-rw-r--r--drivers/scsi/ufs/ufshcd.c11
-rw-r--r--drivers/scsi/ufs/ufshci.h3
-rw-r--r--drivers/soc/canaan/Kconfig1
-rw-r--r--drivers/soc/fsl/qbman/bman_portal.c2
-rw-r--r--drivers/soc/fsl/qbman/qman_portal.c2
-rw-r--r--drivers/soc/ti/k3-ringacc.c4
-rw-r--r--drivers/target/iscsi/iscsi_target_tpg.c3
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c1
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3403_thermal.c1
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.h1
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c1
-rw-r--r--drivers/tty/n_gsm.c4
-rw-r--r--drivers/tty/n_tty.c2
-rw-r--r--drivers/tty/rpmsg_tty.c40
-rw-r--r--drivers/tty/serial/8250/8250_of.c11
-rw-r--r--drivers/tty/serial/8250/8250_pci.c100
-rw-r--r--drivers/tty/serial/8250/8250_port.c61
-rw-r--r--drivers/tty/serial/amba-pl011.c11
-rw-r--r--drivers/tty/serial/serial_core.c34
-rw-r--r--drivers/tty/serial/stm32-usart.c14
-rw-r--r--drivers/usb/cdns3/drd.c6
-rw-r--r--drivers/usb/common/ulpi.c7
-rw-r--r--drivers/usb/core/hcd.c14
-rw-r--r--drivers/usb/core/urb.c12
-rw-r--r--drivers/usb/dwc2/gadget.c2
-rw-r--r--drivers/usb/dwc3/dwc3-xilinx.c23
-rw-r--r--drivers/usb/gadget/function/f_sourcesink.c1
-rw-r--r--drivers/usb/gadget/function/rndis.c4
-rw-r--r--drivers/usb/gadget/udc/at91_udc.c2
-rw-r--r--drivers/usb/host/xhci-plat.c3
-rw-r--r--drivers/usb/storage/unusual_devs.h10
-rw-r--r--drivers/usb/typec/port-mapper.c8
-rw-r--r--drivers/usb/typec/tcpm/tcpci.c26
-rw-r--r--drivers/usb/typec/tcpm/tcpci.h1
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c7
-rw-r--r--drivers/usb/typec/ucsi/ucsi_ccg.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c15
-rw-r--r--drivers/vfio/vfio_iommu_type1.c2
-rw-r--r--drivers/video/fbdev/hyperv_fb.c16
-rw-r--r--drivers/virt/acrn/ioreq.c3
-rw-r--r--drivers/zorro/proc.c2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/afs/proc.c6
-rw-r--r--fs/aio.c31
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/ioctl.c90
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/cachefiles/cache.c17
-rw-r--r--fs/cachefiles/daemon.c11
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c58
-rw-r--r--fs/ceph/file.c33
-rw-r--r--fs/ceph/metric.c2
-rw-r--r--fs/ceph/quota.c17
-rw-r--r--fs/ceph/super.c169
-rw-r--r--fs/ceph/super.h28
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cache.c105
-rw-r--r--fs/cifs/cifs_swn.c9
-rw-r--r--fs/cifs/cifsfs.c19
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/cifsglob.h7
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/connect.c142
-rw-r--r--fs/cifs/dfs_cache.c2
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/file.c66
-rw-r--r--fs/cifs/fs_context.c8
-rw-r--r--fs/cifs/fscache.c333
-rw-r--r--fs/cifs/fscache.h128
-rw-r--r--fs/cifs/inode.c25
-rw-r--r--fs/cifs/misc.c49
-rw-r--r--fs/cifs/netmisc.c5
-rw-r--r--fs/cifs/ntlmssp.h30
-rw-r--r--fs/cifs/sess.c112
-rw-r--r--fs/cifs/smb2pdu.c112
-rw-r--r--fs/cifs/smb2transport.c6
-rw-r--r--fs/cifs/transport.c17
-rw-r--r--fs/configfs/dir.c6
-rw-r--r--fs/coredump.c66
-rw-r--r--fs/dcache.c37
-rw-r--r--fs/devpts/inode.c2
-rw-r--r--fs/eventpoll.c10
-rw-r--r--fs/exec.c40
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/readpage.c6
-rw-r--r--fs/ext4/super.c3
-rw-r--r--fs/f2fs/data.c13
-rw-r--r--fs/f2fs/segment.c8
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/file_table.c47
-rw-r--r--fs/fscache/volume.c4
-rw-r--r--fs/hfsplus/hfsplus_raw.h12
-rw-r--r--fs/hfsplus/xattr.c4
-rw-r--r--fs/inode.c39
-rw-r--r--fs/io-wq.c91
-rw-r--r--fs/io_uring.c90
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/ksmbd/asn1.c142
-rw-r--r--fs/ksmbd/auth.c27
-rw-r--r--fs/ksmbd/auth.h10
-rw-r--r--fs/ksmbd/connection.c10
-rw-r--r--fs/ksmbd/connection.h12
-rw-r--r--fs/ksmbd/ksmbd_netlink.h12
-rw-r--r--fs/ksmbd/mgmt/user_config.c10
-rw-r--r--fs/ksmbd/mgmt/user_config.h1
-rw-r--r--fs/ksmbd/mgmt/user_session.h1
-rw-r--r--fs/ksmbd/smb2misc.c18
-rw-r--r--fs/ksmbd/smb2ops.c16
-rw-r--r--fs/ksmbd/smb2pdu.c222
-rw-r--r--fs/ksmbd/smb2pdu.h1
-rw-r--r--fs/ksmbd/smb_common.h1
-rw-r--r--fs/ksmbd/transport_ipc.c2
-rw-r--r--fs/ksmbd/transport_rdma.c261
-rw-r--r--fs/ksmbd/transport_rdma.h4
-rw-r--r--fs/ksmbd/transport_tcp.c3
-rw-r--r--fs/ksmbd/vfs_cache.h10
-rw-r--r--fs/locks.c34
-rw-r--r--fs/mpage.c7
-rw-r--r--fs/namei.c68
-rw-r--r--fs/namespace.c24
-rw-r--r--fs/netfs/read_helper.c3
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/callback_xdr.c18
-rw-r--r--fs/nfs/client.c7
-rw-r--r--fs/nfs/dir.c146
-rw-r--r--fs/nfs/filelayout/filelayout.h2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c4
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3proc.c5
-rw-r--r--fs/nfs/nfs42proc.c13
-rw-r--r--fs/nfs/nfs4_fs.h14
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4namespace.c19
-rw-r--r--fs/nfs/nfs4proc.c197
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4xdr.c49
-rw-r--r--fs/nfs/sysfs.c3
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nilfs2/page.c4
-rw-r--r--fs/notify/dnotify/dnotify.c21
-rw-r--r--fs/notify/fanotify/fanotify_user.c13
-rw-r--r--fs/notify/inotify/inotify_user.c11
-rw-r--r--fs/ntfs3/ntfs_fs.h1
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c18
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/dlm/dlmthread.c2
-rw-r--r--fs/ocfs2/stackglue.c36
-rw-r--r--fs/ocfs2/suballoc.c25
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/pipe.c64
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/proc_net.c8
-rw-r--r--fs/proc/proc_sysctl.c72
-rw-r--r--fs/proc/vmcore.c10
-rw-r--r--fs/smbfs_common/smb2pdu.h2
-rw-r--r--fs/smbfs_common/smbfsctl.h2
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysctls.c39
-rw-r--r--fs/udf/inode.c9
-rw-r--r--fs/xfs/libxfs/xfs_fs.h37
-rw-r--r--fs/xfs/xfs_bmap_util.c7
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_file.c3
-rw-r--r--fs/xfs/xfs_icache.c22
-rw-r--r--fs/xfs/xfs_ioctl.c102
-rw-r--r--fs/xfs/xfs_ioctl.h6
-rw-r--r--fs/xfs/xfs_ioctl32.c27
-rw-r--r--fs/xfs/xfs_ioctl32.h22
-rw-r--r--include/asm-generic/barrier.h2
-rw-r--r--include/asm-generic/bitops.h1
-rw-r--r--include/asm-generic/bitops/le.h64
-rw-r--r--include/asm-generic/pgalloc.h24
-rw-r--r--include/kunit/assert.h2
-rw-r--r--include/linux/aio.h4
-rw-r--r--include/linux/bitmap.h34
-rw-r--r--include/linux/bitops.h34
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/bpf.h9
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/ceph/libceph.h3
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/cleancache.h124
-rw-r--r--include/linux/coredump.h10
-rw-r--r--include/linux/cpumask.h46
-rw-r--r--include/linux/dcache.h10
-rw-r--r--include/linux/delayacct.h107
-rw-r--r--include/linux/dnotify.h1
-rw-r--r--include/linux/elfcore-compat.h5
-rw-r--r--include/linux/elfcore.h5
-rw-r--r--include/linux/ethtool.h2
-rw-r--r--include/linux/fanotify.h2
-rw-r--r--include/linux/find.h372
-rw-r--r--include/linux/frontswap.h35
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/fscache.h5
-rw-r--r--include/linux/fsnotify.h49
-rw-r--r--include/linux/hash.h5
-rw-r--r--include/linux/inotify.h3
-rw-r--r--include/linux/kernel.h9
-rw-r--r--include/linux/kprobes.h6
-rw-r--r--include/linux/kthread.h1
-rw-r--r--include/linux/kvm_host.h3
-rw-r--r--include/linux/list.h36
-rw-r--r--include/linux/lsm_hook_defs.h2
-rw-r--r--include/linux/mc146818rtc.h6
-rw-r--r--include/linux/migrate.h2
-rw-r--r--include/linux/mm.h37
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/nfs_fs.h10
-rw-r--r--include/linux/nfs_fs_sb.h4
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/of.h422
-rw-r--r--include/linux/pagevec.h1
-rw-r--r--include/linux/percpu.h13
-rw-r--r--include/linux/perf_event.h15
-rw-r--r--include/linux/pid_namespace.h5
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/poll.h2
-rw-r--r--include/linux/printk.h4
-rw-r--r--include/linux/proc_fs.h25
-rw-r--r--include/linux/psi.h13
-rw-r--r--include/linux/psi_types.h3
-rw-r--r--include/linux/quota.h2
-rw-r--r--include/linux/ref_tracker.h2
-rw-r--r--include/linux/rwlock.h6
-rw-r--r--include/linux/rwlock_api_smp.h8
-rw-r--r--include/linux/rwlock_rt.h10
-rw-r--r--include/linux/sbitmap.h11
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/sched/sysctl.h14
-rw-r--r--include/linux/seq_file.h2
-rw-r--r--include/linux/shmem_fs.h3
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/spinlock_api_up.h1
-rw-r--r--include/linux/stackdepot.h25
-rw-r--r--include/linux/stackleak.h5
-rw-r--r--include/linux/suspend.h11
-rw-r--r--include/linux/swapfile.h3
-rw-r--r--include/linux/sysctl.h67
-rw-r--r--include/linux/unaligned/packed_struct.h2
-rw-r--r--include/linux/usb/role.h6
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/bonding.h2
-rw-r--r--include/net/inet_frag.h11
-rw-r--r--include/net/ip.h21
-rw-r--r--include/net/ip6_fib.h2
-rw-r--r--include/net/ipv6_frag.h3
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sch_generic.h5
-rw-r--r--include/net/tcp.h4
-rw-r--r--include/scsi/scsi_device.h5
-rw-r--r--include/scsi/sg.h4
-rw-r--r--include/trace/events/cachefiles.h103
-rw-r--r--include/trace/events/error_report.h8
-rw-r--r--include/trace/events/skb.h2
-rw-r--r--include/trace/events/sunrpc.h70
-rw-r--r--include/trace/perf.h5
-rw-r--r--include/trace/trace_events.h9
-rw-r--r--include/uapi/linux/cyclades.h35
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--include/uapi/linux/taskstats.h6
-rw-r--r--include/uapi/linux/uuid.h10
-rw-r--r--init/main.c9
-rw-r--r--ipc/util.c2
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/inode.c14
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/verifier.c81
-rw-r--r--kernel/cgroup/cgroup.c11
-rw-r--r--kernel/configs/debug.config105
-rw-r--r--kernel/delayacct.c49
-rw-r--r--kernel/events/core.c269
-rw-r--r--kernel/hung_task.c81
-rw-r--r--kernel/irq/proc.c8
-rw-r--r--kernel/kprobes.c30
-rw-r--r--kernel/kthread.c32
-rw-r--r--kernel/locking/spinlock.c10
-rw-r--r--kernel/locking/spinlock_rt.c12
-rw-r--r--kernel/panic.c21
-rw-r--r--kernel/power/snapshot.c21
-rw-r--r--kernel/power/wakelock.c11
-rw-r--r--kernel/printk/Makefile5
-rw-r--r--kernel/printk/internal.h8
-rw-r--r--kernel/printk/printk.c4
-rw-r--r--kernel/printk/sysctl.c85
-rw-r--r--kernel/rcu/tasks.h12
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/sched/core.c15
-rw-r--r--kernel/sched/core_sched.c2
-rw-r--r--kernel/sched/fair.c118
-rw-r--r--kernel/sched/membarrier.c9
-rw-r--r--kernel/sched/pelt.h4
-rw-r--r--kernel/sched/psi.c145
-rw-r--r--kernel/stackleak.c26
-rw-r--r--kernel/sys.c16
-rw-r--r--kernel/sysctl.c724
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/trace/Kconfig15
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c3
-rw-r--r--kernel/trace/trace_events_hist.c10
-rw-r--r--kernel/ucount.c2
-rw-r--r--kernel/watchdog.c101
-rw-r--r--lib/Kconfig7
-rw-r--r--lib/Kconfig.debug31
-rw-r--r--lib/Kconfig.kasan2
-rw-r--r--lib/Kconfig.ubsan13
-rw-r--r--lib/Makefile3
-rw-r--r--lib/find_bit.c21
-rw-r--r--lib/find_bit_benchmark.c21
-rw-r--r--lib/genalloc.c2
-rw-r--r--lib/kstrtox.c12
-rw-r--r--lib/list_debug.c8
-rw-r--r--lib/lz4/lz4defs.h2
-rw-r--r--lib/ref_tracker.c5
-rw-r--r--lib/sbitmap.c29
-rw-r--r--lib/stackdepot.c46
-rw-r--r--lib/test_bitmap.c37
-rw-r--r--lib/test_hash.c259
-rw-r--r--lib/test_kasan.c5
-rw-r--r--lib/test_meminit.c1
-rw-r--r--lib/test_sysctl.c22
-rw-r--r--lib/test_ubsan.c22
-rw-r--r--lib/vsprintf.c24
-rw-r--r--mm/Kconfig50
-rw-r--r--mm/Makefile1
-rw-r--r--mm/cleancache.c315
-rw-r--r--mm/filemap.c112
-rw-r--r--mm/frontswap.c259
-rw-r--r--mm/kasan/common.c1
-rw-r--r--mm/memory-failure.c6
-rw-r--r--mm/memory.c4
-rw-r--r--mm/migrate.c38
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/page_owner.c2
-rw-r--r--mm/percpu.c189
-rw-r--r--mm/shmem.c33
-rw-r--r--mm/swapfile.c90
-rw-r--r--mm/truncate.c15
-rw-r--r--mm/zsmalloc.c529
-rw-r--r--mm/zswap.c8
-rw-r--r--net/atm/proc.c4
-rw-r--r--net/bluetooth/af_bluetooth.c8
-rw-r--r--net/bridge/br_if.c3
-rw-r--r--net/bridge/br_vlan.c9
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/proc.c2
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/messenger.c15
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-procfs.c38
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/of_net.c33
-rw-r--r--net/core/pktgen.c6
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/fib_semantics.c76
-rw-r--r--net/ipv4/inet_fragment.c8
-rw-r--r--net/ipv4/ip_fragment.c3
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_output.c26
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/ping.c3
-rw-r--r--net/ipv4/raw.c13
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c27
-rw-r--r--net/ipv6/ip6_fib.c23
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/mctp/test/route-test.c2
-rw-r--r--net/mptcp/pm_netlink.c39
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/ncsi/ncsi-manage.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c8
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c5
-rw-r--r--net/netfilter/nf_tables_api.c4
-rw-r--r--net/netfilter/nft_connlimit.c13
-rw-r--r--net/netfilter/nft_last.c2
-rw-r--r--net/netfilter/nft_limit.c2
-rw-r--r--net/netfilter/nft_quota.c2
-rw-r--r--net/netfilter/x_tables.c10
-rw-r--r--net/netfilter/xt_hashlimit.c18
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rxrpc/call_event.c8
-rw-r--r--net/rxrpc/output.c2
-rw-r--r--net/sched/sch_api.c4
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_htb.c20
-rw-r--r--net/smc/af_smc.c69
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c3
-rw-r--r--net/smc/smc_clc.c2
-rw-r--r--net/smc/smc_core.c137
-rw-r--r--net/smc/smc_core.h12
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/smc/smc_wr.h4
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/cache.c24
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/sysfs.c47
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c4
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/transport.c4
-rw-r--r--net/sunrpc/xprtrdma/verbs.c23
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/tls/tls_sw.c1
-rw-r--r--net/unix/garbage.c14
-rw-r--r--net/unix/scm.c6
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--samples/bpf/offwaketime_kern.c4
-rw-r--r--samples/bpf/test_overhead_kprobe_kern.c11
-rw-r--r--samples/bpf/test_overhead_tp_kern.c5
-rw-r--r--scripts/Makefile2
-rw-r--r--scripts/Makefile.ubsan1
-rwxr-xr-xscripts/checkpatch.pl54
-rw-r--r--scripts/const_structs.checkpatch23
-rwxr-xr-xscripts/dtc/dtx_diff8
-rwxr-xr-xscripts/get_maintainer.pl2
-rw-r--r--security/security.c15
-rw-r--r--sound/core/info.c4
-rw-r--r--sound/core/init.c25
-rw-r--r--sound/core/misc.c2
-rw-r--r--sound/pci/hda/cs35l41_hda.c134
-rw-r--r--sound/pci/hda/cs35l41_hda.h4
-rw-r--r--sound/pci/hda/cs35l41_hda_i2c.c6
-rw-r--r--sound/pci/hda/cs35l41_hda_spi.c6
-rw-r--r--sound/pci/hda/patch_cs8409-tables.c2
-rw-r--r--sound/pci/hda/patch_realtek.c12
-rw-r--r--sound/usb/mixer_maps.c12
-rw-r--r--tools/accounting/getdelays.c8
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/include/uapi/asm/prctl.h26
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c4
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c2
-rw-r--r--tools/bpf/runqslower/runqslower.c2
-rw-r--r--tools/bpf/runqslower/runqslower.h2
-rw-r--r--tools/include/asm-generic/bitops.h1
-rw-r--r--tools/include/asm-generic/bitops/find.h145
-rw-r--r--tools/include/linux/bitmap.h7
-rw-r--r--tools/include/linux/find.h (renamed from include/asm-generic/bitops/find.h)54
-rw-r--r--tools/include/linux/hash.h5
-rw-r--r--tools/include/uapi/asm-generic/unistd.h5
-rw-r--r--tools/include/uapi/linux/kvm.h20
-rw-r--r--tools/lib/find_bit.c20
-rw-r--r--tools/lib/perf/evsel.c4
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/bench/epoll-ctl.c2
-rw-r--r--tools/perf/bench/epoll-wait.c2
-rw-r--r--tools/perf/bench/evlist-open-close.c4
-rw-r--r--tools/perf/bench/futex-hash.c2
-rw-r--r--tools/perf/bench/futex-lock-pi.c2
-rw-r--r--tools/perf/bench/futex-requeue.c2
-rw-r--r--tools/perf/bench/futex-wake-parallel.c2
-rw-r--r--tools/perf/bench/futex-wake.c2
-rw-r--r--tools/perf/builtin-ftrace.c2
-rw-r--r--tools/perf/builtin-inject.c5
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/builtin-stat.c24
-rw-r--r--tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json16
-rw-r--r--tools/perf/tests/bitmap.c4
-rw-r--r--tools/perf/tests/event_update.c8
-rw-r--r--tools/perf/tests/mem2node.c9
-rw-r--r--tools/perf/tests/mmap-basic.c5
-rw-r--r--tools/perf/tests/parse-events.c49
-rw-r--r--tools/perf/tests/pmu-events.c32
-rw-r--r--tools/perf/tests/topology.c37
-rw-r--r--tools/perf/util/affinity.c8
-rw-r--r--tools/perf/util/auxtrace.c2
-rw-r--r--tools/perf/util/counts.c2
-rw-r--r--tools/perf/util/cpumap.h3
-rw-r--r--tools/perf/util/cputopo.c4
-rw-r--r--tools/perf/util/evlist-hybrid.c11
-rw-r--r--tools/perf/util/evlist.c28
-rw-r--r--tools/perf/util/evsel.c45
-rw-r--r--tools/perf/util/evsel.h3
-rw-r--r--tools/perf/util/machine.c3
-rw-r--r--tools/perf/util/mmap.c2
-rw-r--r--tools/perf/util/parse-events.c67
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/parse-events.y17
-rw-r--r--tools/perf/util/perf_api_probe.c4
-rw-r--r--tools/perf/util/probe-event.c3
-rw-r--r--tools/perf/util/python.c6
-rw-r--r--tools/perf/util/record.c6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c4
-rw-r--r--tools/perf/util/svghelper.c4
-rw-r--r--tools/perf/util/synthetic-events.c18
-rw-r--r--tools/perf/util/top.c6
-rw-r--r--tools/testing/scatterlist/linux/mm.h3
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_types.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/ringbuf.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c2
-rw-r--r--tools/testing/selftests/kvm/.gitignore5
-rw-r--r--tools/testing/selftests/kvm/Makefile7
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h26
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c13
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c161
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/get_cpuid_test.c)30
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c434
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c139
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c34
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh3
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh5
-rw-r--r--tools/testing/selftests/net/settings2
-rw-r--r--tools/tracing/Makefile18
-rw-r--r--tools/tracing/rtla/Makefile2
-rw-r--r--usr/include/Makefile2
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/kvm_main.c13
1156 files changed, 16080 insertions, 10564 deletions
diff --git a/.mailmap b/.mailmap
index b157f88ce26a..b76e520809d0 100644
--- a/.mailmap
+++ b/.mailmap
@@ -70,6 +70,7 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
Brian Avery <b.avery@hp.com>
Brian King <brking@us.ibm.com>
+Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 1b8b46deeb29..197fe319cbec 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -13,6 +13,8 @@ a) waiting for a CPU (while being runnable)
b) completion of synchronous block I/O initiated by the task
c) swapping in pages
d) memory reclaim
+e) thrashing page cache
+f) direct compact
and makes these statistics available to userspace through
the taskstats interface.
@@ -41,11 +43,12 @@ generic data structure to userspace corresponding to per-pid and per-tgid
statistics. The delay accounting functionality populates specific fields of
this structure. See
- include/linux/taskstats.h
+ include/uapi/linux/taskstats.h
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
+cache, direct compact etc.
Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -88,41 +91,37 @@ seen.
General format of the getdelays command::
- getdelays [-t tgid] [-p pid] [-c cmd...]
-
+ getdelays [-dilv] [-t tgid] [-p pid]
Get delays, since system boot, for pid 10::
- # ./getdelays -p 10
+ # ./getdelays -d -p 10
(output similar to next case)
Get sum of delays, since system boot, for all pids with tgid 5::
- # ./getdelays -t 5
-
-
- CPU count real total virtual total delay total
- 7876 92005750 100000000 24001500
- IO count delay total
- 0 0
- SWAP count delay total
- 0 0
- RECLAIM count delay total
- 0 0
+ # ./getdelays -d -t 5
+ print delayacct stats ON
+ TGID 5
-Get delays seen in executing a given simple command::
- # ./getdelays -c ls /
+ CPU count real total virtual total delay total delay average
+ 8 7000000 6872122 3382277 0.423ms
+ IO count delay total delay average
+ 0 0 0ms
+ SWAP count delay total delay average
+ 0 0 0ms
+ RECLAIM count delay total delay average
+ 0 0 0ms
+ THRASHING count delay total delay average
+ 0 0 0ms
+ COMPACT count delay total delay average
+ 0 0 0ms
- bin data1 data3 data5 dev home media opt root srv sys usr
- boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
+Get IO accounting for pid 1, it works only with -p::
+ # ./getdelays -i -p 1
+ printing IO accounting
+ linuxrc: read=65536, write=0, cancelled_write=0
- CPU count real total virtual total delay total
- 6 4000250 4000000 0
- IO count delay total
- 0 0
- SWAP count delay total
- 0 0
- RECLAIM count delay total
- 0 0
+The above command can be used with -v to get more debug information.
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index f2b3439edcc2..860fe651d645 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
for the same psi metric can be specified. However for each trigger a separate
file descriptor is required to be able to poll it separately from others,
therefore for each trigger a separate open() syscall should be made even
-when opening the same psi interface file.
+when opening the same psi interface file. Write operations to a file descriptor
+with an already existing psi trigger will fail with EBUSY.
Monitors activate only when system enters stall state for the monitored
psi metric and deactivates upon exit from the stall state. While system is
diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst
index 7db367572f30..f6861ca16ffe 100644
--- a/Documentation/admin-guide/gpio/index.rst
+++ b/Documentation/admin-guide/gpio/index.rst
@@ -10,6 +10,7 @@ gpio
gpio-aggregator
sysfs
gpio-mockup
+ gpio-sim
.. only:: subproject and html
diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst
index 9485a5a2e2e9..2f41caa0096c 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@@ -266,10 +266,12 @@ Avanta family
-------------
Flavors:
+ - 88F6500
- 88F6510
- 88F6530P
- 88F6550
- 88F6560
+ - 88F6601
Homepage:
https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 5342e895fb60..0ec7b7f1524b 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -52,6 +52,12 @@ stable kernels.
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
@@ -92,12 +98,18 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
index 8e13f27b28ed..bce96b5b0db0 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
@@ -7,7 +7,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Analogix ANX7814 SlimPort (Full-HD Transmitter)
maintainers:
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
+ - Andrzej Hajda <andrzej.hajda@intel.com>
+ - Neil Armstrong <narmstrong@baylibre.com>
+ - Robert Foss <robert.foss@linaro.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
index 9f7cc6b757cb..a88a5d8c7ba5 100644
--- a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
@@ -8,7 +8,6 @@ title: ChromeOS EC ANX7688 HDMI to DP Converter through Type-C Port
maintainers:
- Nicolas Boichat <drinkcat@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
ChromeOS EC ANX7688 is a display bridge that converts HDMI 2.0 to
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
index cdaf7a7a8f88..186e17be51fb 100644
--- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -8,7 +8,6 @@ title: MIPI DSI to eDP Video Format Converter Device Tree Bindings
maintainers:
- Nicolas Boichat <drinkcat@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
The PS8640 is a low power MIPI-to-eDP video format converter supporting
diff --git a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
index a108029ecfab..acd2f3faa6b9 100644
--- a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Asia Better Technology 3.0" (320x480 pixels) 24-bit IPS LCD panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Paul Cercueil <paul@crapouillou.net>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
index e89c1ea62ffa..7d221ef35443 100644
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
@@ -15,11 +15,9 @@ description: |
960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and
VCOMH outputs.
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml
index cda36c04e85c..72788e3e6c59 100644
--- a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml
+++ b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Innolux EJ030NA 3.0" (320x480 pixels) 24-bit TFT LCD panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Paul Cercueil <paul@crapouillou.net>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
index c45c92a3d41f..2a2756d19681 100644
--- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
+++ b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Paul Cercueil <paul@crapouillou.net>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
index 830e335ddb53..5e4e0e552c2f 100644
--- a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
+++ b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: LG.Philips LB035Q02 Panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Tomi Valkeinen <tomi.valkeinen@ti.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
index 060ee27a4749..d525165d6d63 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Samsung LD9040 AMOLED LCD parallel RGB panel with SPI control bus
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Andrzej Hajda <a.hajda@samsung.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
@@ -63,8 +60,6 @@ examples:
lcd@0 {
compatible = "samsung,ld9040";
- #address-cells = <1>;
- #size-cells = <0>;
reg = <0>;
vdd3-supply = <&ldo7_reg>;
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
index ea58df49263a..940f7f88526f 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
@@ -12,6 +12,7 @@ maintainers:
allOf:
- $ref: panel-common.yaml#
- $ref: /schemas/leds/backlight/common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
index fa46d151e7b3..9e1d707c2ace 100644
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Sitronix ST7789V RGB panel with SPI control bus
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Maxime Ripard <mripard@kernel.org>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
index 95d053c548ab..98abdf4ddeac 100644
--- a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Sony ACX565AKM SDI Panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Tomi Valkeinen <tomi.valkeinen@ti.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
index 4aa605613445..f902a9d74141 100644
--- a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
+++ b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
@@ -6,16 +6,13 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Toppoly TD Panels
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Marek Belisko <marek@goldelico.com>
- H. Nikolaus Schaller <hns@goldelico.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
index 008c144257cb..1a68a940d165 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
@@ -26,14 +26,6 @@ properties:
clock-names:
const: hclk
- pinctrl-0:
- maxItems: 2
-
- pinctrl-names:
- const: default
- description:
- Switch the iomux for the HPD/I2C pins to HDMI function.
-
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml b/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml
index 20e1ccfc8630..2d82b44268db 100644
--- a/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml
+++ b/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml
@@ -8,7 +8,6 @@ title: ChromeOS EC USB Type-C cable and accessories detection
maintainers:
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
On ChromeOS systems with USB Type C ports, the ChromeOS Embedded Controller is
diff --git a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
index b386e4128a79..6e1c70e9275e 100644
--- a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
+++ b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
@@ -10,7 +10,6 @@ title: I2C bus that tunnels through the ChromeOS EC (cros-ec)
maintainers:
- Doug Anderson <dianders@chromium.org>
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
On some ChromeOS board designs we've got a connection to the EC
diff --git a/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml b/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml
index 099b4be927d4..00e3b59641d2 100644
--- a/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml
+++ b/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml
@@ -10,7 +10,6 @@ title: ChromeOS EC MKBP Proximity Sensor
maintainers:
- Stephen Boyd <swboyd@chromium.org>
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
Google's ChromeOS EC sometimes has the ability to detect user proximity.
diff --git a/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml b/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
index 5377b232fa10..e8f137abb03c 100644
--- a/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
+++ b/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
@@ -10,7 +10,6 @@ title: ChromeOS EC Keyboard
maintainers:
- Simon Glass <sjg@chromium.org>
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
Google's ChromeOS EC Keyboard is a simple matrix keyboard
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml
index dbe7ecc19ccb..7fe1966ea28a 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.yaml
+++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml
@@ -88,12 +88,6 @@ patternProperties:
which can be disabled to suppress events from the button.
type: boolean
- pinctrl-0:
- maxItems: 1
-
- pinctrl-names:
- maxItems: 1
-
required:
- linux,code
diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
index 1ef849dc74d7..e2e6e9aa0fe6 100644
--- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
@@ -81,14 +81,12 @@ properties:
data-lanes:
description:
Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines.
+ minItems: 1
items:
- minItems: 1
- maxItems: 4
- items:
- - const: 1
- - const: 2
- - const: 3
- - const: 4
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
required:
- data-lanes
diff --git a/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml
index 9c04fa85ee5c..1b3e1c4b99ed 100644
--- a/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml
@@ -87,14 +87,12 @@ properties:
properties:
data-lanes:
+ minItems: 1
items:
- minItems: 1
- maxItems: 4
- items:
- - const: 1
- - const: 2
- - const: 3
- - const: 4
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
required:
- data-lanes
diff --git a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml
index 5dce62a7eff2..68c75a517c92 100644
--- a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml
+++ b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml
@@ -245,8 +245,7 @@ examples:
interrupt-controller;
#interrupt-cells = <2>;
- interrupts = <&host_irq1>;
- interrupt-parent = <&gic>;
+ interrupts = <4 1 0>;
gpio-controller;
#gpio-cells = <2>;
diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index 0faa4da6c7c8..d1f53bd449f7 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -8,7 +8,6 @@ title: ChromeOS Embedded Controller
maintainers:
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
- Guenter Roeck <groeck@chromium.org>
description:
diff --git a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
index a4f74bec68a3..1e69a5a42439 100644
--- a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
+++ b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
@@ -185,6 +185,9 @@ examples:
clock-names = "mclk", "apb_pclk";
};
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
mmc@80126000 {
compatible = "arm,pl18x", "arm,primecell";
reg = <0x80126000 0x1000>;
@@ -206,12 +209,12 @@ examples:
vqmmc-supply = <&vmmci>;
};
+ - |
mmc@101f6000 {
compatible = "arm,pl18x", "arm,primecell";
reg = <0x101f6000 0x1000>;
clocks = <&sdiclk>, <&pclksdi>;
clock-names = "mclk", "apb_pclk";
- interrupt-parent = <&vica>;
interrupts = <22>;
max-frequency = <400000>;
bus-width = <4>;
@@ -226,6 +229,7 @@ examples:
vmmc-supply = <&vmmc_regulator>;
};
+ - |
mmc@52007000 {
compatible = "arm,pl18x", "arm,primecell";
arm,primecell-periphid = <0x10153180>;
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
index fb547e26c676..401ab7cdb379 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@ -76,33 +76,31 @@ properties:
M_CAN user manual for details.
$ref: /schemas/types.yaml#/definitions/int32-array
items:
- items:
- - description: The 'offset' is an address offset of the Message RAM where
- the following elements start from. This is usually set to 0x0 if
- you're using a private Message RAM.
- default: 0
- - description: 11-bit Filter 0-128 elements / 0-128 words
- minimum: 0
- maximum: 128
- - description: 29-bit Filter 0-64 elements / 0-128 words
- minimum: 0
- maximum: 64
- - description: Rx FIFO 0 0-64 elements / 0-1152 words
- minimum: 0
- maximum: 64
- - description: Rx FIFO 1 0-64 elements / 0-1152 words
- minimum: 0
- maximum: 64
- - description: Rx Buffers 0-64 elements / 0-1152 words
- minimum: 0
- maximum: 64
- - description: Tx Event FIFO 0-32 elements / 0-64 words
- minimum: 0
- maximum: 32
- - description: Tx Buffers 0-32 elements / 0-576 words
- minimum: 0
- maximum: 32
- maxItems: 1
+ - description: The 'offset' is an address offset of the Message RAM where
+ the following elements start from. This is usually set to 0x0 if
+ you're using a private Message RAM.
+ default: 0
+ - description: 11-bit Filter 0-128 elements / 0-128 words
+ minimum: 0
+ maximum: 128
+ - description: 29-bit Filter 0-64 elements / 0-128 words
+ minimum: 0
+ maximum: 64
+ - description: Rx FIFO 0 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Rx FIFO 1 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Rx Buffers 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Tx Event FIFO 0-32 elements / 0-64 words
+ minimum: 0
+ maximum: 32
+ - description: Tx Buffers 0-32 elements / 0-576 words
+ minimum: 0
+ maximum: 32
power-domains:
description:
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
index 0968b40aef1e..e3501bfa22e9 100644
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 {
#address-cells = <1>;
#size-cells = <1>;
spi-max-frequency = <10000000>;
- bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
+ bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>;
interrupt-parent = <&gpio1>;
interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 47b5f728701d..34c5463abcec 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -17,9 +17,8 @@ properties:
description:
Specifies the MAC address that was assigned to the network device.
$ref: /schemas/types.yaml#/definitions/uint8-array
- items:
- - minItems: 6
- maxItems: 6
+ minItems: 6
+ maxItems: 6
mac-address:
description:
@@ -28,9 +27,8 @@ properties:
to the device by the boot program is different from the
local-mac-address property.
$ref: /schemas/types.yaml#/definitions/uint8-array
- items:
- - minItems: 6
- maxItems: 6
+ minItems: 6
+ maxItems: 6
max-frame-size:
$ref: /schemas/types.yaml#/definitions/uint32
@@ -164,33 +162,30 @@ properties:
type: array
then:
deprecated: true
- minItems: 1
- maxItems: 1
items:
- items:
- - minimum: 0
- maximum: 31
- description:
- Emulated PHY ID, choose any but unique to the all
- specified fixed-links
-
- - enum: [0, 1]
- description:
- Duplex configuration. 0 for half duplex or 1 for
- full duplex
-
- - enum: [10, 100, 1000, 2500, 10000]
- description:
- Link speed in Mbits/sec.
-
- - enum: [0, 1]
- description:
- Pause configuration. 0 for no pause, 1 for pause
-
- - enum: [0, 1]
- description:
- Asymmetric pause configuration. 0 for no asymmetric
- pause, 1 for asymmetric pause
+ - minimum: 0
+ maximum: 31
+ description:
+ Emulated PHY ID, choose any but unique to the all
+ specified fixed-links
+
+ - enum: [0, 1]
+ description:
+ Duplex configuration. 0 for half duplex or 1 for
+ full duplex
+
+ - enum: [10, 100, 1000, 2500, 10000]
+ description:
+ Link speed in Mbits/sec.
+
+ - enum: [0, 1]
+ description:
+ Pause configuration. 0 for no pause, 1 for pause
+
+ - enum: [0, 1]
+ description:
+ Asymmetric pause configuration. 0 for no asymmetric
+ pause, 1 for asymmetric pause
- if:
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index c00fb0d22c7b..020337f3c05f 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -410,6 +410,15 @@ PROPERTIES
The settings and programming routines for internal/external
MDIO are different. Must be included for internal MDIO.
+- fsl,erratum-a009885
+ Usage: optional
+ Value type: <boolean>
+ Definition: Indicates the presence of the A009885
+ erratum describing that the contents of MDIO_DATA may
+ become corrupt unless it is read within 16 MDC cycles
+ of MDIO_CFG[BSY] being cleared, when performing an
+ MDIO read operation.
+
- fsl,erratum-a011043
Usage: optional
Value type: <boolean>
diff --git a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
index d7117a22fd87..27db496f1ce8 100644
--- a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
@@ -9,6 +9,9 @@ Required properties on all platforms:
- compatible: For the OX820 SoC, it should be :
- "oxsemi,ox820-dwmac" to select glue
- "snps,dwmac-3.512" to select IP version.
+ For the OX810SE SoC, it should be :
+ - "oxsemi,ox810se-dwmac" to select glue
+ - "snps,dwmac-3.512" to select IP version.
- clocks: Should contain phandles to the following clocks
- clock-names: Should contain the following:
diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
index 456fb808100a..43ed7e32e5ac 100644
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
@@ -50,16 +50,15 @@ patternProperties:
Offset and size in bytes within the storage device.
bits:
- maxItems: 1
+ $ref: /schemas/types.yaml#/definitions/uint32-array
items:
- items:
- - minimum: 0
- maximum: 7
- description:
- Offset in bit within the address range specified by reg.
- - minimum: 1
- description:
- Size in bit within the address range specified by reg.
+ - minimum: 0
+ maximum: 7
+ description:
+ Offset in bit within the address range specified by reg.
+ - minimum: 1
+ description:
+ Size in bit within the address range specified by reg.
required:
- reg
diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml
index 80020539c3bb..5cd512b7d5ba 100644
--- a/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml
@@ -51,15 +51,6 @@ properties:
appropriate of the LOCHNAGARx_PIN_NUM_GPIOS define, see [3].
maxItems: 1
- pinctrl-0:
- description:
- A phandle to the default pinctrl state.
-
- pinctrl-names:
- description:
- A pinctrl state named "default" must be defined.
- const: default
-
pin-settings:
type: object
patternProperties:
diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
index e50d7ad5c229..c85f759ae5a3 100644
--- a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
@@ -30,16 +30,6 @@ description: |
Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
properties:
- pinctrl-0:
- description:
- A phandle to the node containing the subnodes containing default
- configurations.
-
- pinctrl-names:
- description:
- A pinctrl state named "default" must be defined.
- const: default
-
pin-settings:
description:
One subnode is required to contain the default settings. It
diff --git a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
index 3dd22220cb5f..a72d5c721516 100644
--- a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
+++ b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
@@ -43,7 +43,7 @@ properties:
priority:
$ref: /schemas/types.yaml#/definitions/uint32
description: |
- A priority ranging from 0 to 255 (default 128) according to the following guidelines:
+ A priority ranging from 0 to 255 (default 129) according to the following guidelines:
0: Restart handler of last resort, with limited restart capabilities.
128: Default restart handler; use if no other restart handler is expected to be available,
@@ -51,7 +51,7 @@ properties:
255: Highest priority restart handler, will preempt all other restart handlers.
minimum: 0
maximum: 255
- default: 128
+ default: 129
active-delay:
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/pwm/pwm.yaml b/Documentation/devicetree/bindings/pwm/pwm.yaml
index 2effe6c0de6b..3c01f85029e5 100644
--- a/Documentation/devicetree/bindings/pwm/pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/pwm.yaml
@@ -9,6 +9,8 @@ title: PWM controllers (providers)
maintainers:
- Thierry Reding <thierry.reding@gmail.com>
+select: false
+
properties:
$nodename:
pattern: "^pwm(@.*|-[0-9a-f])*$"
diff --git a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
index 29fe39bb08ad..d12855e7ffd7 100644
--- a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
+++ b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
@@ -15,6 +15,7 @@ allOf:
properties:
compatible:
enum:
+ - epson,rx8804
- epson,rx8900
- microcrystal,rv8803
diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
index 4fba6dba16f3..6fa7d9fc2dc7 100644
--- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
@@ -19,7 +19,14 @@ properties:
- qcom,pmk8350-rtc
reg:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: rtc
+ - const: alarm
interrupts:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
index 2359f541b770..764717ce1873 100644
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
@@ -127,6 +127,7 @@ examples:
st,syscfg = <&pwrcfg 0x00 0x100>;
};
+ - |
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/clock/stm32mp1-clks.h>
rtc@5c004000 {
diff --git a/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml b/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
new file mode 100644
index 000000000000..fd1b3e71ff2c
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/sunplus,sp7021-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus SP7021 Real Time Clock controller
+
+maintainers:
+ - Vincent Shih <vincent.sunplus@gmail.com>
+
+properties:
+ compatible:
+ const: sunplus,sp7021-rtc
+
+ reg:
+ maxItems: 1
+
+ reg-names:
+ items:
+ - const: rtc
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - resets
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ rtc: serial@9c003a00 {
+ compatible = "sunplus,sp7021-rtc";
+ reg = <0x9c003a00 0x80>;
+ reg-names = "rtc";
+ clocks = <&clkc 0x12>;
+ resets = <&rstc 0x02>;
+ interrupt-parent = <&intc>;
+ interrupts = <163 IRQ_TYPE_EDGE_RISING>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
index 2e3628ef48df..84c4d6cba521 100644
--- a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
@@ -110,12 +110,6 @@ properties:
Internal DMA register base address of the audio
subsystem (used in secondary sound source).
- pinctrl-0:
- description: Should specify pin control groups used for this controller.
-
- pinctrl-names:
- const: default
-
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 9af1b0f4ecea..091792ba993e 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -31,7 +31,7 @@ properties:
- enum:
# SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin
- ad,ad7414
- # ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems
+ # ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems
- ad,adm9240
# AD5110 - Nonvolatile Digital Potentiometer
- adi,ad5110
@@ -43,7 +43,7 @@ properties:
- adi,adp5589
# AMS iAQ-Core VOC Sensor
- ams,iaq-core
- # i2c serial eeprom (24cxx)
+ # i2c serial eeprom (24cxx)
- at,24c08
# i2c trusted platform module (TPM)
- atmel,at97sc3204t
@@ -303,9 +303,9 @@ properties:
- skyworks,sky81452
# Socionext SynQuacer TPM MMIO module
- socionext,synquacer-tpm-mmio
- # i2c serial eeprom (24cxx)
- - sparkfun,qwiic-joystick
# SparkFun Qwiic Joystick (COM-15168) with i2c interface
+ - sparkfun,qwiic-joystick
+ # i2c serial eeprom (24cxx)
- st,24c256
# Ambient Light Sensor with SMBUS/Two Wire Serial Interface
- taos,tsl2550
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index c48ce3c54951..294093d45a23 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -25,6 +25,8 @@ patternProperties:
# Keep list in alphabetical order.
"^70mai,.*":
description: 70mai Co., Ltd.
+ "^8dev,.*":
+ description: 8devices, UAB
"^abb,.*":
description: ABB
"^abilis,.*":
@@ -441,6 +443,8 @@ patternProperties:
description: Freescale Semiconductor
"^fujitsu,.*":
description: Fujitsu Ltd.
+ "^fxtec,.*":
+ description: FX Technology Ltd.
"^gardena,.*":
description: GARDENA GmbH
"^gateworks,.*":
@@ -515,6 +519,8 @@ patternProperties:
description: HannStar Display Co.
"^holtek,.*":
description: Holtek Semiconductor, Inc.
+ "^huawei,.*":
+ description: Huawei Technologies Co., Ltd.
"^hugsun,.*":
description: Shenzhen Hugsun Technology Co. Ltd.
"^hwacom,.*":
@@ -1207,6 +1213,8 @@ patternProperties:
description: THine Electronics, Inc.
"^thingyjp,.*":
description: thingy.jp
+ "^thundercomm,.*":
+ description: Thundercomm Technology Co., Ltd.
"^ti,.*":
description: Texas Instruments
"^tianma,.*":
@@ -1334,6 +1342,8 @@ patternProperties:
description: Wiligear, Ltd.
"^winbond,.*":
description: Winbond Electronics corp.
+ "^wingtech,.*":
+ description: Wingtech Technology Co., Ltd.
"^winlink,.*":
description: WinLink Co., Ltd
"^winstar,.*":
diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst
index 94a2d7f01d99..d3cfa73cbb2b 100644
--- a/Documentation/driver-api/firewire.rst
+++ b/Documentation/driver-api/firewire.rst
@@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module
Firewire char device data structures
====================================
-.. include:: /ABI/stable/firewire-cdev
+.. include:: ../ABI/stable/firewire-cdev
:literal:
.. kernel-doc:: include/uapi/linux/firewire-cdev.h
@@ -28,7 +28,7 @@ Firewire char device data structures
Firewire device probing and sysfs interfaces
============================================
-.. include:: /ABI/stable/sysfs-bus-firewire
+.. include:: ../ABI/stable/sysfs-bus-firewire
:literal:
.. kernel-doc:: drivers/firewire/core-device.c
diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst
index 7d2ef4e27273..4942e018db85 100644
--- a/Documentation/filesystems/ceph.rst
+++ b/Documentation/filesystems/ceph.rst
@@ -82,7 +82,7 @@ Mount Syntax
The basic mount syntax is::
- # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
+ # mount -t ceph user@fsid.fs_name=/[subdir] mnt -o mon_addr=monip1[:port][/monip2[:port]]
You only need to specify a single monitor, as the client will get the
full list when it connects. (However, if the monitor you specify
@@ -90,16 +90,35 @@ happens to be down, the mount won't succeed.) The port can be left
off if the monitor is using the default. So if the monitor is at
1.2.3.4::
- # mount -t ceph 1.2.3.4:/ /mnt/ceph
+ # mount -t ceph cephuser@07fe3187-00d9-42a3-814b-72a4d5e7d5be.cephfs=/ /mnt/ceph -o mon_addr=1.2.3.4
is sufficient. If /sbin/mount.ceph is installed, a hostname can be
-used instead of an IP address.
+used instead of an IP address and the cluster FSID can be left out
+(as the mount helper will fill it in by reading the ceph configuration
+file)::
+ # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=mon-addr
+Multiple monitor addresses can be passed by separating each address with a slash (`/`)::
+
+ # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=192.168.1.100/192.168.1.101
+
+When using the mount helper, monitor address can be read from ceph
+configuration file if available. Note that, the cluster FSID (passed as part
+of the device string) is validated by checking it with the FSID reported by
+the monitor.
Mount Options
=============
+ mon_addr=ip_address[:port][/ip_address[:port]]
+ Monitor address to the cluster. This is used to bootstrap the
+ connection to the cluster. Once connection is established, the
+ monitor addresses in the monitor map are followed.
+
+ fsid=cluster-id
+ FSID of the cluster (from `ceph fsid` command).
+
ip=A.B.C.D[:N]
Specify the IP and/or port the client should bind to locally.
There is normally not much reason to do this. If the IP is not
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 2b4de3926858..b58692d687f6 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -166,6 +166,7 @@ to ReStructured Text format, or are simply too old.
.. toctree::
:maxdepth: 2
+ tools/index
staging/index
watch_queue
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index e6cd40663ea5..4cbd50edf277 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -295,7 +295,7 @@ Pete Zaitcev gives the following summary:
- If you are in a process context (any syscall) and want to lock other
process out, use a mutex. You can take a mutex and sleep
- (``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``).
+ (``copy_from_user()`` or ``kmalloc(x,GFP_KERNEL)``).
- Otherwise (== data can be touched in an interrupt), use
spin_lock_irqsave() and
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
index b7f98930d38d..1bd687b97104 100644
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -47,12 +47,12 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
- ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan
- ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap
- ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io
- ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap
- ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space
- ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory
+ ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
+ ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
+ ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
+ ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
+ ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory
+ fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
__________________|____________|__________________|_________|____________________________________________________________
|
|
diff --git a/Documentation/staging/tee.rst b/Documentation/staging/tee.rst
index 3c63d8dcd61e..498343c7ab08 100644
--- a/Documentation/staging/tee.rst
+++ b/Documentation/staging/tee.rst
@@ -255,7 +255,7 @@ The following picture shows a high level overview of AMD-TEE::
+--------------------------+ +---------+--------------------+
At the lowest level (in x86), the AMD Secure Processor (ASP) driver uses the
-CPU to PSP mailbox regsister to submit commands to the PSP. The format of the
+CPU to PSP mailbox register to submit commands to the PSP. The format of the
command buffer is opaque to the ASP driver. It's role is to submit commands to
the secure processor and return results to AMD-TEE driver. The interface
between AMD-TEE driver and AMD Secure Processor driver can be found in [6].
@@ -290,7 +290,7 @@ cancel_req driver callback is not supported by AMD-TEE.
The GlobalPlatform TEE Client API [5] can be used by the user space (client) to
talk to AMD's TEE. AMD's TEE provides a secure environment for loading, opening
-a session, invoking commands and clossing session with TA.
+a session, invoking commands and closing session with TA.
References
==========
diff --git a/Documentation/tools/index.rst b/Documentation/tools/index.rst
new file mode 100644
index 000000000000..0bb1e61bdcc0
--- /dev/null
+++ b/Documentation/tools/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Kernel tools
+============
+
+This book covers user-space tools that are shipped with the kernel source;
+more additions are needed here:
+
+.. toctree::
+ :maxdepth: 1
+
+ rtla/index
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/tools/rtla/index.rst b/Documentation/tools/rtla/index.rst
new file mode 100644
index 000000000000..840f0bf3e803
--- /dev/null
+++ b/Documentation/tools/rtla/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+The realtime Linux analysis tool
+================================
+
+RTLA provides a set of tools for the analysis of the kernel's realtime
+behavior on specific hardware.
+
+.. toctree::
+ :maxdepth: 1
+
+ rtla
+ rtla-osnoise
+ rtla-osnoise-hist
+ rtla-osnoise-top
+ rtla-timerlat
+ rtla-timerlat-hist
+ rtla-timerlat-top
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index b3166c4a7867..45b8c56af67a 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -3370,7 +3370,7 @@ one of the latency tracers, you will get the following results.
Instances
---------
-In the tracefs tracing directory is a directory called "instances".
+In the tracefs tracing directory, there is a directory called "instances".
This directory can have new directories created inside of it using
mkdir, and removing directories with rmdir. The directory created
with mkdir in this directory will already contain files and other
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index d3791a14eb9a..a4267104db50 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3268,6 +3268,7 @@ number.
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+ KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set)
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error
@@ -3302,7 +3303,8 @@ transferred is defined by the particular attribute.
------------------------
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
- KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+ KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+ KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error
@@ -5545,8 +5547,8 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header.
The Stats Data block contains an array of 64-bit values in the same order
as the descriptors in Descriptors block.
-4.42 KVM_GET_XSAVE2
-------------------
+4.134 KVM_GET_XSAVE2
+--------------------
:Capability: KVM_CAP_XSAVE2
:Architectures: x86
@@ -7363,7 +7365,7 @@ trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
------------------------------
+-------------------------------------
Architectures: x86
diff --git a/Documentation/vm/cleancache.rst b/Documentation/vm/cleancache.rst
deleted file mode 100644
index 68cba9131c31..000000000000
--- a/Documentation/vm/cleancache.rst
+++ /dev/null
@@ -1,296 +0,0 @@
-.. _cleancache:
-
-==========
-Cleancache
-==========
-
-Motivation
-==========
-
-Cleancache is a new optional feature provided by the VFS layer that
-potentially dramatically increases page cache effectiveness for
-many workloads in many environments at a negligible cost.
-
-Cleancache can be thought of as a page-granularity victim cache for clean
-pages that the kernel's pageframe replacement algorithm (PFRA) would like
-to keep around, but can't since there isn't enough memory. So when the
-PFRA "evicts" a page, it first attempts to use cleancache code to
-put the data contained in that page into "transcendent memory", memory
-that is not directly accessible or addressable by the kernel and is
-of unknown and possibly time-varying size.
-
-Later, when a cleancache-enabled filesystem wishes to access a page
-in a file on disk, it first checks cleancache to see if it already
-contains it; if it does, the page of data is copied into the kernel
-and a disk access is avoided.
-
-Transcendent memory "drivers" for cleancache are currently implemented
-in Xen (using hypervisor memory) and zcache (using in-kernel compressed
-memory) and other implementations are in development.
-
-:ref:`FAQs <faq>` are included below.
-
-Implementation Overview
-=======================
-
-A cleancache "backend" that provides transcendent memory registers itself
-to the kernel's cleancache "frontend" by calling cleancache_register_ops,
-passing a pointer to a cleancache_ops structure with funcs set appropriately.
-The functions provided must conform to certain semantics as follows:
-
-Most important, cleancache is "ephemeral". Pages which are copied into
-cleancache have an indefinite lifetime which is completely unknowable
-by the kernel and so may or may not still be in cleancache at any later time.
-Thus, as its name implies, cleancache is not suitable for dirty pages.
-Cleancache has complete discretion over what pages to preserve and what
-pages to discard and when.
-
-Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a
-pool id which, if positive, must be saved in the filesystem's superblock;
-a negative return value indicates failure. A "put_page" will copy a
-(presumably about-to-be-evicted) page into cleancache and associate it with
-the pool id, a file key, and a page index into the file. (The combination
-of a pool id, a file key, and an index is sometimes called a "handle".)
-A "get_page" will copy the page, if found, from cleancache into kernel memory.
-An "invalidate_page" will ensure the page no longer is present in cleancache;
-an "invalidate_inode" will invalidate all pages associated with the specified
-file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate
-all pages in all files specified by the given pool id and also surrender
-the pool id.
-
-An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache
-to treat the pool as shared using a 128-bit UUID as a key. On systems
-that may run multiple kernels (such as hard partitioned or virtualized
-systems) that may share a clustered filesystem, and where cleancache
-may be shared among those kernels, calls to init_shared_fs that specify the
-same UUID will receive the same pool id, thus allowing the pages to
-be shared. Note that any security requirements must be imposed outside
-of the kernel (e.g. by "tools" that control cleancache). Or a
-cleancache implementation can simply disable shared_init by always
-returning a negative value.
-
-If a get_page is successful on a non-shared pool, the page is invalidated
-(thus making cleancache an "exclusive" cache). On a shared pool, the page
-is NOT invalidated on a successful get_page so that it remains accessible to
-other sharers. The kernel is responsible for ensuring coherency between
-cleancache (shared or not), the page cache, and the filesystem, using
-cleancache invalidate operations as required.
-
-Note that cleancache must enforce put-put-get coherency and get-get
-coherency. For the former, if two puts are made to the same handle but
-with different data, say AAA by the first put and BBB by the second, a
-subsequent get can never return the stale data (AAA). For get-get coherency,
-if a get for a given handle fails, subsequent gets for that handle will
-never succeed unless preceded by a successful put with that handle.
-
-Last, cleancache provides no SMP serialization guarantees; if two
-different Linux threads are simultaneously putting and invalidating a page
-with the same handle, the results are indeterminate. Callers must
-lock the page to ensure serial behavior.
-
-Cleancache Performance Metrics
-==============================
-
-If properly configured, monitoring of cleancache is done via debugfs in
-the `/sys/kernel/debug/cleancache` directory. The effectiveness of cleancache
-can be measured (across all filesystems) with:
-
-``succ_gets``
- number of gets that were successful
-
-``failed_gets``
- number of gets that failed
-
-``puts``
- number of puts attempted (all "succeed")
-
-``invalidates``
- number of invalidates attempted
-
-A backend implementation may provide additional metrics.
-
-.. _faq:
-
-FAQ
-===
-
-* Where's the value? (Andrew Morton)
-
-Cleancache provides a significant performance benefit to many workloads
-in many environments with negligible overhead by improving the
-effectiveness of the pagecache. Clean pagecache pages are
-saved in transcendent memory (RAM that is otherwise not directly
-addressable to the kernel); fetching those pages later avoids "refaults"
-and thus disk reads.
-
-Cleancache (and its sister code "frontswap") provide interfaces for
-this transcendent memory (aka "tmem"), which conceptually lies between
-fast kernel-directly-addressable RAM and slower DMA/asynchronous devices.
-Disallowing direct kernel or userland reads/writes to tmem
-is ideal when data is transformed to a different form and size (such
-as with compression) or secretly moved (as might be useful for write-
-balancing for some RAM-like devices). Evicted page-cache pages (and
-swap pages) are a great use for this kind of slower-than-RAM-but-much-
-faster-than-disk transcendent memory, and the cleancache (and frontswap)
-"page-object-oriented" specification provides a nice way to read and
-write -- and indirectly "name" -- the pages.
-
-In the virtual case, the whole point of virtualization is to statistically
-multiplex physical resources across the varying demands of multiple
-virtual machines. This is really hard to do with RAM and efforts to
-do it well with no kernel change have essentially failed (except in some
-well-publicized special-case workloads). Cleancache -- and frontswap --
-with a fairly small impact on the kernel, provide a huge amount
-of flexibility for more dynamic, flexible RAM multiplexing.
-Specifically, the Xen Transcendent Memory backend allows otherwise
-"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
-virtual machines, but the pages can be compressed and deduplicated to
-optimize RAM utilization. And when guest OS's are induced to surrender
-underutilized RAM (e.g. with "self-ballooning"), page cache pages
-are the first to go, and cleancache allows those pages to be
-saved and reclaimed if overall host system memory conditions allow.
-
-And the identical interface used for cleancache can be used in
-physical systems as well. The zcache driver acts as a memory-hungry
-device that stores pages of data in a compressed state. And
-the proposed "RAMster" driver shares RAM across multiple physical
-systems.
-
-* Why does cleancache have its sticky fingers so deep inside the
- filesystems and VFS? (Andrew Morton and Christoph Hellwig)
-
-The core hooks for cleancache in VFS are in most cases a single line
-and the minimum set are placed precisely where needed to maintain
-coherency (via cleancache_invalidate operations) between cleancache,
-the page cache, and disk. All hooks compile into nothingness if
-cleancache is config'ed off and turn into a function-pointer-
-compare-to-NULL if config'ed on but no backend claims the ops
-functions, or to a compare-struct-element-to-negative if a
-backend claims the ops functions but a filesystem doesn't enable
-cleancache.
-
-Some filesystems are built entirely on top of VFS and the hooks
-in VFS are sufficient, so don't require an "init_fs" hook; the
-initial implementation of cleancache didn't provide this hook.
-But for some filesystems (such as btrfs), the VFS hooks are
-incomplete and one or more hooks in fs-specific code are required.
-And for some other filesystems, such as tmpfs, cleancache may
-be counterproductive. So it seemed prudent to require a filesystem
-to "opt in" to use cleancache, which requires adding a hook in
-each filesystem. Not all filesystems are supported by cleancache
-only because they haven't been tested. The existing set should
-be sufficient to validate the concept, the opt-in approach means
-that untested filesystems are not affected, and the hooks in the
-existing filesystems should make it very easy to add more
-filesystems in the future.
-
-The total impact of the hooks to existing fs and mm files is only
-about 40 lines added (not counting comments and blank lines).
-
-* Why not make cleancache asynchronous and batched so it can more
- easily interface with real devices with DMA instead of copying each
- individual page? (Minchan Kim)
-
-The one-page-at-a-time copy semantics simplifies the implementation
-on both the frontend and backend and also allows the backend to
-do fancy things on-the-fly like page compression and
-page deduplication. And since the data is "gone" (copied into/out
-of the pageframe) before the cleancache get/put call returns,
-a great deal of race conditions and potential coherency issues
-are avoided. While the interface seems odd for a "real device"
-or for real kernel-addressable RAM, it makes perfect sense for
-transcendent memory.
-
-* Why is non-shared cleancache "exclusive"? And where is the
- page "invalidated" after a "get"? (Minchan Kim)
-
-The main reason is to free up space in transcendent memory and
-to avoid unnecessary cleancache_invalidate calls. If you want inclusive,
-the page can be "put" immediately following the "get". If
-put-after-get for inclusive becomes common, the interface could
-be easily extended to add a "get_no_invalidate" call.
-
-The invalidate is done by the cleancache backend implementation.
-
-* What's the performance impact?
-
-Performance analysis has been presented at OLS'09 and LCA'10.
-Briefly, performance gains can be significant on most workloads,
-especially when memory pressure is high (e.g. when RAM is
-overcommitted in a virtual workload); and because the hooks are
-invoked primarily in place of or in addition to a disk read/write,
-overhead is negligible even in worst case workloads. Basically
-cleancache replaces I/O with memory-copy-CPU-overhead; on older
-single-core systems with slow memory-copy speeds, cleancache
-has little value, but in newer multicore machines, especially
-consolidated/virtualized machines, it has great value.
-
-* How do I add cleancache support for filesystem X? (Boaz Harrash)
-
-Filesystems that are well-behaved and conform to certain
-restrictions can utilize cleancache simply by making a call to
-cleancache_init_fs at mount time. Unusual, misbehaving, or
-poorly layered filesystems must either add additional hooks
-and/or undergo extensive additional testing... or should just
-not enable the optional cleancache.
-
-Some points for a filesystem to consider:
-
- - The FS should be block-device-based (e.g. a ram-based FS such
- as tmpfs should not enable cleancache)
- - To ensure coherency/correctness, the FS must ensure that all
- file removal or truncation operations either go through VFS or
- add hooks to do the equivalent cleancache "invalidate" operations
- - To ensure coherency/correctness, either inode numbers must
- be unique across the lifetime of the on-disk file OR the
- FS must provide an "encode_fh" function.
- - The FS must call the VFS superblock alloc and deactivate routines
- or add hooks to do the equivalent cleancache calls done there.
- - To maximize performance, all pages fetched from the FS should
- go through the do_mpag_readpage routine or the FS should add
- hooks to do the equivalent (cf. btrfs)
- - Currently, the FS blocksize must be the same as PAGESIZE. This
- is not an architectural restriction, but no backends currently
- support anything different.
- - A clustered FS should invoke the "shared_init_fs" cleancache
- hook to get best performance for some backends.
-
-* Why not use the KVA of the inode as the key? (Christoph Hellwig)
-
-If cleancache would use the inode virtual address instead of
-inode/filehandle, the pool id could be eliminated. But, this
-won't work because cleancache retains pagecache data pages
-persistently even when the inode has been pruned from the
-inode unused list, and only invalidates the data page if the file
-gets removed/truncated. So if cleancache used the inode kva,
-there would be potential coherency issues if/when the inode
-kva is reused for a different file. Alternately, if cleancache
-invalidated the pages when the inode kva was freed, much of the value
-of cleancache would be lost because the cache of pages in cleanache
-is potentially much larger than the kernel pagecache and is most
-useful if the pages survive inode cache removal.
-
-* Why is a global variable required?
-
-The cleancache_enabled flag is checked in all of the frequently-used
-cleancache hooks. The alternative is a function call to check a static
-variable. Since cleancache is enabled dynamically at runtime, systems
-that don't enable cleancache would suffer thousands (possibly
-tens-of-thousands) of unnecessary function calls per second. So the
-global variable allows cleancache to be enabled by default at compile
-time, but have insignificant performance impact when cleancache remains
-disabled at runtime.
-
-* Does cleanache work with KVM?
-
-The memory model of KVM is sufficiently different that a cleancache
-backend may have less value for KVM. This remains to be tested,
-especially in an overcommitted system.
-
-* Does cleancache work in userspace? It sounds useful for
- memory hungry caches like web browsers. (Jamie Lokier)
-
-No plans yet, though we agree it sounds useful, at least for
-apps that bypass the page cache (e.g. O_DIRECT).
-
-Last updated: Dan Magenheimer, April 13 2011
diff --git a/Documentation/vm/frontswap.rst b/Documentation/vm/frontswap.rst
index 1979f430c1c5..feecc5e24477 100644
--- a/Documentation/vm/frontswap.rst
+++ b/Documentation/vm/frontswap.rst
@@ -8,12 +8,6 @@ Frontswap provides a "transcendent memory" interface for swap pages.
In some environments, dramatic performance savings may be obtained because
swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
-(Note, frontswap -- and :ref:`cleancache` (merged at 3.0) -- are the "frontends"
-and the only necessary changes to the core kernel for transcendent memory;
-all other supporting code -- the "backends" -- is implemented as drivers.
-See the LWN.net article `Transcendent memory in a nutshell`_
-for a detailed overview of frontswap and related kernel parts)
-
.. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/
Frontswap is so named because it can be thought of as the opposite of
@@ -45,12 +39,6 @@ a disk write and, if the data is later read back, a disk read are avoided.
If a store returns failure, transcendent memory has rejected the data, and the
page can be written to swap as usual.
-If a backend chooses, frontswap can be configured as a "writethrough
-cache" by calling frontswap_writethrough(). In this mode, the reduction
-in swap device writes is lost (and also a non-trivial performance advantage)
-in order to allow the backend to arbitrarily "reclaim" space used to
-store frontswap pages to more completely manage its memory usage.
-
Note that if a page is stored and the page already exists in transcendent memory
(a "duplicate" store), either the store succeeds and the data is overwritten,
or the store fails AND the page is invalidated. This ensures stale data may
@@ -87,11 +75,9 @@ This interface is ideal when data is transformed to a different form
and size (such as with compression) or secretly moved (as might be
useful for write-balancing for some RAM-like devices). Swap pages (and
evicted page-cache pages) are a great use for this kind of slower-than-RAM-
-but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
-cleancache) interface to transcendent memory provides a nice way to read
-and write -- and indirectly "name" -- the pages.
+but-much-faster-than-disk "pseudo-RAM device".
-Frontswap -- and cleancache -- with a fairly small impact on the kernel,
+Frontswap with a fairly small impact on the kernel,
provides a huge amount of flexibility for more dynamic, flexible RAM
utilization in various system configurations:
@@ -269,19 +255,6 @@ the old data and ensure that it is no longer accessible. Since the
swap subsystem then writes the new data to the read swap device,
this is the correct course of action to ensure coherency.
-* What is frontswap_shrink for?
-
-When the (non-frontswap) swap subsystem swaps out a page to a real
-swap device, that page is only taking up low-value pre-allocated disk
-space. But if frontswap has placed a page in transcendent memory, that
-page may be taking up valuable real estate. The frontswap_shrink
-routine allows code outside of the swap subsystem to force pages out
-of the memory managed by frontswap and back into kernel-addressable memory.
-For example, in RAMster, a "suction driver" thread will attempt
-to "repatriate" pages sent to a remote machine back to the local machine;
-this is driven using the frontswap_shrink mechanism when memory pressure
-subsides.
-
* Why does the frontswap patch create the new include file swapfile.h?
The frontswap code depends on some swap-subsystem-internal data
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index 932440805453..44365c4574a3 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -15,7 +15,6 @@ algorithms. If you are looking for advice on simply allocating memory, see the
active_mm
arch_pgtable_helpers
balance
- cleancache
damon/index
free_page_reporting
frontswap
diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst
index 81f521ff7ea7..1a09472f10a3 100644
--- a/Documentation/vm/page_table_check.rst
+++ b/Documentation/vm/page_table_check.rst
@@ -9,7 +9,7 @@ Page Table Check
Introduction
============
-Page table check allows to hardern the kernel by ensuring that some types of
+Page table check allows to harden the kernel by ensuring that some types of
the memory corruptions are prevented.
Page table check performs extra verifications at the time when new pages become
diff --git a/MAINTAINERS b/MAINTAINERS
index b84e2d5642bc..f41088418aae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -190,8 +190,9 @@ M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/driver-api/80211/cfg80211.rst
F: Documentation/networking/regulatory.rst
F: include/linux/ieee80211.h
@@ -3410,14 +3411,14 @@ M: Yury Norov <yury.norov@gmail.com>
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
R: Rasmus Villemoes <linux@rasmusvillemoes.dk>
S: Maintained
-F: include/asm-generic/bitops/find.h
F: include/linux/bitmap.h
+F: include/linux/find.h
F: lib/bitmap.c
F: lib/find_bit.c
F: lib/find_bit_benchmark.c
F: lib/test_bitmap.c
-F: tools/include/asm-generic/bitops/find.h
F: tools/include/linux/bitmap.h
+F: tools/include/linux/find.h
F: tools/lib/bitmap.c
F: tools/lib/find_bit.c
@@ -4705,13 +4706,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/cla
F: include/linux/cfi.h
F: kernel/cfi.c
-CLEANCACHE API
-M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L: linux-kernel@vger.kernel.org
-S: Maintained
-F: include/linux/cleancache.h
-F: mm/cleancache.c
-
CLK API
M: Russell King <linux@armlinux.org.uk>
L: linux-clk@vger.kernel.org
@@ -7215,8 +7209,10 @@ F: drivers/net/mdio/of_mdio.c
F: drivers/net/pcs/
F: drivers/net/phy/
F: include/dt-bindings/net/qca-ar803x.h
+F: include/linux/linkmode.h
F: include/linux/*mdio*.h
F: include/linux/mdio/*.h
+F: include/linux/mii.h
F: include/linux/of_net.h
F: include/linux/phy.h
F: include/linux/phy_fixed.h
@@ -11373,8 +11369,9 @@ M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/networking/mac80211-injection.rst
F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst
F: drivers/net/wireless/mac80211_hwsim.[ch]
@@ -13381,9 +13378,10 @@ NETWORKING DRIVERS (WIRELESS)
M: Kalle Valo <kvalo@kernel.org>
L: linux-wireless@vger.kernel.org
S: Maintained
-Q: http://patchwork.kernel.org/project/linux-wireless/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git
+W: https://wireless.wiki.kernel.org/
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/devicetree/bindings/net/wireless/
F: drivers/net/wireless/
@@ -13456,7 +13454,11 @@ L: netdev@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
F: arch/x86/net/*
+F: include/linux/ip.h
+F: include/linux/ipv6*
+F: include/net/fib*
F: include/net/ip*
+F: include/net/route.h
F: net/ipv4/
F: net/ipv6/
@@ -13517,10 +13519,6 @@ F: include/net/tls.h
F: include/uapi/linux/tls.h
F: net/tls/*
-NETWORKING [WIRELESS]
-L: linux-wireless@vger.kernel.org
-Q: http://patchwork.kernel.org/project/linux-wireless/list/
-
NETXEN (1/10) GbE SUPPORT
M: Manish Chopra <manishc@marvell.com>
M: Rahul Verma <rahulv@marvell.com>
@@ -16539,8 +16537,9 @@ M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/ABI/stable/sysfs-class-rfkill
F: Documentation/driver-api/rfkill.rst
F: include/linux/rfkill.h
@@ -16807,6 +16806,7 @@ M: Heiko Carstens <hca@linux.ibm.com>
M: Vasily Gorbik <gor@linux.ibm.com>
M: Christian Borntraeger <borntraeger@linux.ibm.com>
R: Alexander Gordeev <agordeev@linux.ibm.com>
+R: Sven Schnelle <svens@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
@@ -18490,6 +18490,13 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/dlink/sundance.c
+SUNPLUS RTC DRIVER
+M: Vincent Shih <vincent.sunplus@gmail.com>
+L: linux-rtc@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
+F: drivers/rtc/rtc-sunplus.c
+
SUPERH
M: Yoshinori Sato <ysato@users.sourceforge.jp>
M: Rich Felker <dalias@libc.org>
diff --git a/Makefile b/Makefile
index 3f07f0f04475..0fb4f94a6885 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
-PATCHLEVEL = 16
+PATCHLEVEL = 17
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Gobble Gobble
# *DOCUMENTATION*
@@ -778,7 +778,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong
KBUILD_CFLAGS += $(stackp-flags-y)
KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%)
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
ifdef CONFIG_CC_IS_CLANG
KBUILD_CPPFLAGS += -Qunused-arguments
diff --git a/arch/Kconfig b/arch/Kconfig
index 874c65d9e963..678a80713b21 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -997,6 +997,10 @@ config PAGE_SIZE_LESS_THAN_64KB
depends on !PAGE_SIZE_64KB
depends on !PARISC_PAGE_SIZE_64KB
depends on !PPC_64K_PAGES
+ depends on PAGE_SIZE_LESS_THAN_256KB
+
+config PAGE_SIZE_LESS_THAN_256KB
+ def_bool y
depends on !PPC_256K_PAGES
depends on !PAGE_SIZE_256KB
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index 5adca78830b5..e1d8483a45f2 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -430,8 +430,6 @@ static inline unsigned int __arch_hweight8(unsigned int w)
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
/*
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index ce3077946e1d..fb3025396ac9 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -80,7 +80,12 @@ init_rtc_epoch(void)
static int
alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
- mc146818_get_time(tm);
+ int ret = mc146818_get_time(tm);
+
+ if (ret < 0) {
+ dev_err_ratelimited(dev, "unable to read current time\n");
+ return ret;
+ }
/* Adjust for non-default epochs. It's easier to depend on the
generic __get_rtc_time and adjust the epoch here than create
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index 528d2be58182..217b4dca51dd 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -83,14 +83,14 @@ static int srm_env_proc_show(struct seq_file *m, void *v)
static int srm_env_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, srm_env_proc_show, PDE_DATA(inode));
+ return single_open(file, srm_env_proc_show, pde_data(inode));
}
static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *pos)
{
int res;
- unsigned long id = (unsigned long)PDE_DATA(file_inode(file));
+ unsigned long id = (unsigned long)pde_data(file_inode(file));
char *buf = (char *) __get_free_page(GFP_USER);
unsigned long ret1, ret2;
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index f74d9860a442..3c2a4753d09b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -20,7 +20,6 @@ config ARC
select COMMON_CLK
select DMA_DIRECT_REMAP
select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
- select GENERIC_FIND_FIRST_BIT
# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
select GENERIC_IRQ_SHOW
select GENERIC_PCI_IOMAP
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index a7daaf64ae34..bdb7e190a294 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -189,7 +189,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fabe39169b12..4c97cb40eebb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
select HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
+ select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
index 383c632eba7b..a9ed79b7f871 100644
--- a/arch/arm/configs/bcm2835_defconfig
+++ b/arch/arm/configs/bcm2835_defconfig
@@ -31,7 +31,6 @@ CONFIG_ARCH_BCM2835=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_AEABI=y
CONFIG_KSM=y
-CONFIG_CLEANCACHE=y
CONFIG_CMA=y
CONFIG_SECCOMP=y
CONFIG_KEXEC=y
diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig
index 0daa9c0d298e..9981566f2096 100644
--- a/arch/arm/configs/qcom_defconfig
+++ b/arch/arm/configs/qcom_defconfig
@@ -27,7 +27,6 @@ CONFIG_PCIE_QCOM=y
CONFIG_SMP=y
CONFIG_PREEMPT=y
CONFIG_HIGHMEM=y
-CONFIG_CLEANCACHE=y
CONFIG_ARM_APPENDED_DTB=y
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_CPU_IDLE=y
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 7d23d4bb2168..6fe67963ba5a 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -288,6 +288,7 @@
*/
#define ALT_UP(instr...) \
.pushsection ".alt.smp.init", "a" ;\
+ .align 2 ;\
.long 9998b - . ;\
9997: instr ;\
.if . - 9997b == 2 ;\
@@ -299,6 +300,7 @@
.popsection
#define ALT_UP_B(label) \
.pushsection ".alt.smp.init", "a" ;\
+ .align 2 ;\
.long 9998b - . ;\
W(b) . + (label - 9998b) ;\
.popsection
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index c92e42a5c8f7..8e94fe7ab5eb 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -264,7 +264,6 @@ static inline int find_next_bit_le(const void *p, int size, int offset)
#endif
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
/*
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 6af68edfa53a..bdc35c0e8dfb 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_struct *p);
#define __ALT_SMP_ASM(smp, up) \
"9998: " smp "\n" \
" .pushsection \".alt.smp.init\", \"a\"\n" \
+ " .align 2\n" \
" .long 9998b - .\n" \
" " up "\n" \
" .popsection\n"
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 36fbc3329252..32dbfd81f42a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/memory.h>
#include <asm/domain.h>
+#include <asm/unaligned.h>
#include <asm/unified.h>
#include <asm/compiler.h>
@@ -497,7 +498,10 @@ do { \
} \
default: __err = __get_user_bad(); break; \
} \
- *(type *)(dst) = __val; \
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \
+ put_unaligned(__val, (type *)(dst)); \
+ else \
+ *(type *)(dst) = __val; /* aligned by caller */ \
if (__err) \
goto err_label; \
} while (0)
@@ -507,7 +511,9 @@ do { \
const type *__pk_ptr = (dst); \
unsigned long __dst = (unsigned long)__pk_ptr; \
int __err = 0; \
- type __val = *(type *)src; \
+ type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
+ ? get_unaligned((type *)(src)) \
+ : *(type *)(src); /* aligned by caller */ \
switch (sizeof(type)) { \
case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \
case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \
diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c
index 3c2faf2bd124..3ec2afe78423 100644
--- a/arch/arm/kernel/atags_proc.c
+++ b/arch/arm/kernel/atags_proc.c
@@ -13,7 +13,7 @@ struct buffer {
static ssize_t atags_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct buffer *b = PDE_DATA(file_inode(file));
+ struct buffer *b = pde_data(file_inode(file));
return simple_read_from_buffer(buf, count, ppos, b->data, b->size);
}
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index adbb3817d0be..6f499559d193 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -1005,7 +1005,7 @@ static int __init noalign_setup(char *__unused)
__setup("noalign", noalign_setup);
/*
- * This needs to be done after sysctl_init, otherwise sys/ will be
+ * This needs to be done after sysctl_init_bases(), otherwise sys/ will be
* overwritten. Actually, this shouldn't be in sys/ at all since
* it isn't a sysctl, and it doesn't contain sysctl information.
* We now locate it in /proc/cpu/alignment instead.
diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
index 14db56f49f0a..6159010dac4a 100644
--- a/arch/arm/probes/kprobes/Makefile
+++ b/arch/arm/probes/kprobes/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+KASAN_SANITIZE_actions-common.o := n
+KASAN_SANITIZE_actions-arm.o := n
+KASAN_SANITIZE_actions-thumb.o := n
obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o
obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o
test-kprobes-objs := test-core.o
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f6e333b59314..f2b5a4abef21 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -120,7 +120,6 @@ config ARM64
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
- select GENERIC_FIND_FIRST_BIT
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_IPI
select GENERIC_IRQ_PROBE
@@ -671,15 +670,25 @@ config ARM64_ERRATUM_1508412
config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
bool
+config ARM64_ERRATUM_2051678
+ bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit"
+ help
+ This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678.
+ Affected Coretex-A510 might not respect the ordering rules for
+ hardware update of the page table's dirty bit. The workaround
+ is to not enable the feature on affected CPUs.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_2119858
- bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
+ bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
default y
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
- This option adds the workaround for ARM Cortex-A710 erratum 2119858.
+ This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858.
- Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
+ Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
@@ -762,14 +771,14 @@ config ARM64_ERRATUM_2253138
If unsure, say Y.
config ARM64_ERRATUM_2224489
- bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
+ bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range"
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
- This option adds the workaround for ARM Cortex-A710 erratum 2224489.
+ This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489.
- Affected Cortex-A710 cores might write to an out-of-range address, not reserved
+ Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
@@ -779,6 +788,65 @@ config ARM64_ERRATUM_2224489
If unsure, say Y.
+config ARM64_ERRATUM_2064142
+ bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2064142.
+
+ Affected Cortex-A510 core might fail to write into system registers after the
+ TRBE has been disabled. Under some conditions after the TRBE has been disabled
+ writes into TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1,
+ and TRBTRG_EL1 will be ignored and will not be effected.
+
+ Work around this in the driver by executing TSB CSYNC and DSB after collection
+ is stopped and before performing a system register write to one of the affected
+ registers.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_2038923
+ bool "Cortex-A510: 2038923: workaround TRBE corruption with enable"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2038923.
+
+ Affected Cortex-A510 core might cause an inconsistent view on whether trace is
+ prohibited within the CPU. As a result, the trace buffer or trace buffer state
+ might be corrupted. This happens after TRBE buffer has been enabled by setting
+ TRBLIMITR_EL1.E, followed by just a single context synchronization event before
+ execution changes from a context, in which trace is prohibited to one where it
+ isn't, or vice versa. In these mentioned conditions, the view of whether trace
+ is prohibited is inconsistent between parts of the CPU, and the trace buffer or
+ the trace buffer state might be corrupted.
+
+ Work around this in the driver by preventing an inconsistent view of whether the
+ trace is prohibited or not based on TRBLIMITR_EL1.E by immediately following a
+ change to TRBLIMITR_EL1.E with at least one ISB instruction before an ERET, or
+ two ISB instructions if no ERET is to take place.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_1902691
+ bool "Cortex-A510: 1902691: workaround TRBE trace corruption"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 1902691.
+
+ Affected Cortex-A510 core might cause trace data corruption, when being written
+ into the memory. Effectively TRBE is broken and hence cannot be used to capture
+ trace data.
+
+ Work around this problem in the driver by just preventing TRBE initialization on
+ affected cpus. The firmware must have disabled the access to TRBE for the kernel
+ on such implementations. This will cover the kernel for any firmware that doesn't
+ do this already.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -1136,6 +1204,10 @@ config NUMA
select GENERIC_ARCH_NUMA
select ACPI_NUMA if ACPI
select OF_NUMA
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -1152,22 +1224,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
source "kernel/Kconfig.hz"
config ARCH_SPARSEMEM_ENABLE
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index d955ade5df7c..5d460f6b7675 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -249,7 +249,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
" mov %" #w "[tmp], %" #w "[old]\n" \
" cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \
" mov %" #w "[ret], %" #w "[tmp]" \
- : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr), \
+ : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr), \
[tmp] "=&r" (tmp) \
: [old] "r" (x1), [new] "r" (x2) \
: cl); \
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 81a3e519b07d..9b3c787132d2 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -18,7 +18,6 @@
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index f9bef42c1411..497acf134d99 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -243,7 +243,7 @@ static inline void __cmpwait_case_##sz(volatile void *ptr, \
" cbnz %" #w "[tmp], 1f\n" \
" wfe\n" \
"1:" \
- : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \
: [val] "r" (val)); \
}
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 19b8441aa8f2..999b9149f856 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -73,7 +73,9 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define APM_CPU_PART_POTENZA 0x000
@@ -115,7 +117,9 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9e1c1aef9ebd..066098198c24 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -347,6 +347,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
#endif
{},
};
@@ -371,6 +372,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
#endif
{},
};
@@ -598,6 +600,33 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2064142
+ {
+ .desc = "ARM erratum 2064142",
+ .capability = ARM64_WORKAROUND_2064142,
+
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2038923
+ {
+ .desc = "ARM erratum 2038923",
+ .capability = ARM64_WORKAROUND_2038923,
+
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1902691
+ {
+ .desc = "ARM erratum 1902691",
+ .capability = ARM64_WORKAROUND_1902691,
+
+ /* Cortex-A510 r0p0 - r0p1 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a46ab3b1c4d5..e5f23dab1c8d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1646,6 +1646,9 @@ static bool cpu_has_broken_dbm(void)
/* Kryo4xx Silver (rdpe => r1p0) */
MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2051678
+ MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+#endif
{},
};
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 0fb58fed54cb..e4103e085681 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -33,8 +33,8 @@
*/
-static void start_backtrace(struct stackframe *frame, unsigned long fp,
- unsigned long pc)
+static notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc)
{
frame->fp = fp;
frame->pc = pc;
@@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp,
frame->prev_fp = 0;
frame->prev_type = STACK_TYPE_UNKNOWN;
}
+NOKPROBE_SYMBOL(start_backtrace);
/*
* Unwind from one frame record (A) to the next frame record (B).
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 60813497a381..172452f79e46 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+# -Wmissing-prototypes and -Wmissing-declarations are removed from
+# the CFLAGS of vgettimeofday.c to make possible to build the
+# kernel with CONFIG_WERROR enabled.
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO)
+ $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 0418399e0a20..c5d009715402 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
{
- write_sysreg_el1(val, SYS_SPSR);
+ if (has_vhe())
+ write_sysreg_el1(val, SYS_SPSR);
+ else
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 844a6f003fd5..2cb3867eb7c2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
*/
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
- if (need_flush) {
- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
-
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
- }
+ if (need_flush && mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
if (childp)
mm_ops->put_page(childp);
@@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
- kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- pte_follow = kvm_pte_follow(pte, mm_ops);
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
+ if (mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 20db2f281cf2..4fb419f7b8b6 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
+ val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index a33d4366b326..b549af8b1dc2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf)
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
+static const struct midr_range broken_seis[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ {},
+};
+
+static bool vgic_v3_broken_seis(void)
+{
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
+ is_midr_in_range_list(read_cpuid_id(), broken_seis));
+}
+
/**
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
* @info: pointer to the GIC description
@@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
- kvm_info("GICv3 with locally generated SEI\n");
+ if (vgic_v3_broken_seis()) {
+ kvm_info("GICv3 with broken locally generated SEI\n");
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index c0181e60cc98..489455309695 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -40,8 +40,8 @@ static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
- int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type);
- int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type);
+ int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+ int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned long data, addr, offset;
addr = pt_regs_read_reg(regs, reg_addr);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a8834434af99..db63cc885771 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -172,7 +172,7 @@ int pfn_is_map_memory(unsigned long pfn)
}
EXPORT_SYMBOL(pfn_is_map_memory);
-static phys_addr_t memory_limit = PHYS_ADDR_MAX;
+static phys_addr_t memory_limit __ro_after_init = PHYS_ADDR_MAX;
/*
* Limit the memory size that was specified via FDT.
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 870c39537dd0..e7719e8f18de 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -55,6 +55,9 @@ WORKAROUND_1418040
WORKAROUND_1463225
WORKAROUND_1508412
WORKAROUND_1542419
+WORKAROUND_2064142
+WORKAROUND_2038923
+WORKAROUND_1902691
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h
index 02b72a000767..72e1b2aa29a0 100644
--- a/arch/csky/include/asm/bitops.h
+++ b/arch/csky/include/asm/bitops.h
@@ -59,7 +59,6 @@ static __always_inline unsigned long __fls(unsigned long x)
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index c867a80cab5b..4489e3d6edd3 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -168,7 +168,6 @@ static inline unsigned long __ffs(unsigned long word)
return result;
}
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
index 71429f756af0..75d6ba3643b8 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -271,7 +271,6 @@ static inline unsigned long __fls(unsigned long word)
}
#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1e33666fa679..a7e01573abd8 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -32,6 +32,7 @@ config IA64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
select HAVE_FUNCTION_TRACER
+ select HAVE_SETUP_PER_CPU_AREA
select TTY
select HAVE_ARCH_TRACEHOOK
select HAVE_VIRT_CPU_ACCOUNTING
@@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
-
config DMI
bool
default y
@@ -292,6 +290,7 @@ config NUMA
bool "NUMA support"
depends on !FLATMEM
select SMP
+ select USE_PERCPU_NUMA_NODE_ID
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option is for configuring high-end multiprocessor
@@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
def_bool y
depends on NUMA
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
config HAVE_MEMORYLESS_NODES
def_bool NUMA
@@ -323,7 +318,7 @@ config ARCH_PROC_KCORE_TEXT
depends on PROC_KCORE
config IA64_MCA_RECOVERY
- tristate "MCA recovery from errors other than TLB."
+ bool "MCA recovery from errors other than TLB."
config IA64_PALINFO
tristate "/proc/pal support"
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 2f24ee6459d2..577be93c0818 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -441,8 +441,6 @@ static __inline__ unsigned long __arch_hweight64(unsigned long x)
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
#include <asm-generic/bitops/le.h>
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index a25ab9b37953..bd3ba276e69c 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -282,7 +282,7 @@ salinfo_event_open(struct inode *inode, struct file *file)
static ssize_t
salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
- struct salinfo_data *data = PDE_DATA(file_inode(file));
+ struct salinfo_data *data = pde_data(file_inode(file));
char cmd[32];
size_t size;
int i, n, cpu = -1;
@@ -340,7 +340,7 @@ static const struct proc_ops salinfo_event_proc_ops = {
static int
salinfo_log_open(struct inode *inode, struct file *file)
{
- struct salinfo_data *data = PDE_DATA(inode);
+ struct salinfo_data *data = pde_data(inode);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -365,7 +365,7 @@ salinfo_log_open(struct inode *inode, struct file *file)
static int
salinfo_log_release(struct inode *inode, struct file *file)
{
- struct salinfo_data *data = PDE_DATA(inode);
+ struct salinfo_data *data = pde_data(inode);
if (data->state == STATE_NO_DATA) {
vfree(data->log_buffer);
@@ -433,7 +433,7 @@ retry:
static ssize_t
salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
- struct salinfo_data *data = PDE_DATA(file_inode(file));
+ struct salinfo_data *data = pde_data(file_inode(file));
u8 *buf;
u64 bufsize;
@@ -494,7 +494,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
static ssize_t
salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
- struct salinfo_data *data = PDE_DATA(file_inode(file));
+ struct salinfo_data *data = pde_data(file_inode(file));
char cmd[32];
size_t size;
u32 offset;
diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
index acb55a41260d..2bcdd7d3a1ad 100644
--- a/arch/ia64/pci/fixup.c
+++ b/arch/ia64/pci/fixup.c
@@ -76,5 +76,5 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
}
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index a4b6c7108465..bc9952f8be66 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -45,7 +45,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 2db721965520..a77269c6e5ba 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -41,7 +41,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index c266a704eecd..7a74efa6b9a1 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -48,7 +48,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index f644f08dd6ed..a5323bf2eb33 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -38,7 +38,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e4924650b687..5e80aa0869d5 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -40,7 +40,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 24113871ea76..e84326a3f62d 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -39,7 +39,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6a7e4be70eea..337552f43339 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -59,7 +59,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 1d223247aff0..7b688f7d272a 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -37,7 +37,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 961f789f96c9..7c2cb31d63dd 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -38,7 +38,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ff4b5e469390..ca43897af26d 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -39,7 +39,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 5f228621d0cc..e3d515f37144 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -35,7 +35,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a600cb9e68c2..d601606c969b 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -35,7 +35,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 7b93e1fd8ffa..51283db53667 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -529,6 +529,4 @@ static inline int __fls(int x)
#include <asm-generic/bitops/le.h>
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#endif /* _M68K_BITOPS_H */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 3dd8c4618293..058446f01487 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -32,7 +32,6 @@ config MIPS
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
- select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
@@ -2674,6 +2673,8 @@ config NUMA
bool "NUMA Support"
depends on SYS_SUPPORTS_NUMA
select SMP
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option improves performance on systems with more
@@ -2684,14 +2685,6 @@ config NUMA
config SYS_SUPPORTS_NUMA
bool
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RELOCATABLE
bool "Relocatable kernel"
depends on SYS_SUPPORTS_RELOCATABLE
diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h
index 6ffdd4b5e1d0..336ac9b65235 100644
--- a/arch/mips/include/asm/asm.h
+++ b/arch/mips/include/asm/asm.h
@@ -285,7 +285,7 @@ symbol = value
#define PTR_SCALESHIFT 2
-#define PTR .word
+#define PTR_WD .word
#define PTRSIZE 4
#define PTRLOG 2
#endif
@@ -310,7 +310,7 @@ symbol = value
#define PTR_SCALESHIFT 3
-#define PTR .dword
+#define PTR_WD .dword
#define PTRSIZE 8
#define PTRLOG 3
#endif
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 3812082b8295..b4bf754f7db3 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -444,7 +444,6 @@ static inline int ffs(int word)
}
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/find.h>
#ifdef __KERNEL__
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index b463f2aa5a61..db497a8167da 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -32,7 +32,7 @@ do { \
".previous\n" \
\
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR) "\t1b, 3b\n\t" \
+ STR(PTR_WD) "\t1b, 3b\n\t" \
".previous\n" \
\
: [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\
@@ -54,7 +54,7 @@ do { \
".previous\n" \
\
".section\t__ex_table,\"a\"\n\t"\
- STR(PTR) "\t1b, 3b\n\t" \
+ STR(PTR_WD) "\t1b, 3b\n\t" \
".previous\n" \
\
: [tmp_err] "=r" (error) \
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index af3788589ee6..431a1c9d53fc 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr)
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
- " "STR(PTR)" 1b, 3b \n" \
+ " "STR(PTR_WD)" 1b, 3b \n" \
" .previous" \
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
@@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr)
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
- " "STR(PTR)" 1b, 3b \n" \
+ " "STR(PTR_WD)" 1b, 3b \n" \
" .previous" \
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h
index 2022b18944b9..9af0f4d3d288 100644
--- a/arch/mips/include/asm/unaligned-emul.h
+++ b/arch/mips/include/asm/unaligned-emul.h
@@ -20,8 +20,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -41,8 +41,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -74,10 +74,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -102,8 +102,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -125,8 +125,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -145,8 +145,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -178,10 +178,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -223,14 +223,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -255,8 +255,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
@@ -276,8 +276,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -296,8 +296,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -325,10 +325,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -365,14 +365,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -398,8 +398,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -419,8 +419,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -452,10 +452,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -481,8 +481,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -504,8 +504,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -524,8 +524,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -557,10 +557,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -602,14 +602,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -632,8 +632,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
@@ -653,8 +653,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -673,8 +673,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -703,10 +703,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -743,14 +743,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index a39ec755e4c2..750fe569862b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1258,10 +1258,10 @@ fpu_emul:
" j 10b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1333,10 +1333,10 @@ fpu_emul:
" j 10b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1404,10 +1404,10 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1474,10 +1474,10 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1589,14 +1589,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1708,14 +1708,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1827,14 +1827,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1945,14 +1945,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -2007,7 +2007,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "=&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV)
@@ -2065,7 +2065,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "+&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV));
@@ -2126,7 +2126,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "=&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV)
@@ -2189,7 +2189,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "+&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV));
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index cbf6db98cfb3..2748c55820c2 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -23,14 +23,14 @@
#define EX(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
- PTR 9b,fault; \
+ PTR_WD 9b,fault; \
.previous
#define EX2(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
- PTR 9b,fault; \
- PTR 9b+4,fault; \
+ PTR_WD 9b,fault; \
+ PTR_WD 9b+4,fault; \
.previous
.set mips1
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index b91e91106475..2e687c60bc4f 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -31,7 +31,7 @@
.ex\@: \insn \reg, \src
.set pop
.section __ex_table,"a"
- PTR .ex\@, fault
+ PTR_WD .ex\@, fault
.previous
.endm
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index f3c908abdbb8..cfde14b48fd8 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -147,10 +147,10 @@ LEAF(kexec_smp_wait)
kexec_args:
EXPORT(kexec_args)
-arg0: PTR 0x0
-arg1: PTR 0x0
-arg2: PTR 0x0
-arg3: PTR 0x0
+arg0: PTR_WD 0x0
+arg1: PTR_WD 0x0
+arg2: PTR_WD 0x0
+arg3: PTR_WD 0x0
.size kexec_args,PTRSIZE*4
#ifdef CONFIG_SMP
@@ -161,10 +161,10 @@ arg3: PTR 0x0
*/
secondary_kexec_args:
EXPORT(secondary_kexec_args)
-s_arg0: PTR 0x0
-s_arg1: PTR 0x0
-s_arg2: PTR 0x0
-s_arg3: PTR 0x0
+s_arg0: PTR_WD 0x0
+s_arg1: PTR_WD 0x0
+s_arg2: PTR_WD 0x0
+s_arg3: PTR_WD 0x0
.size secondary_kexec_args,PTRSIZE*4
kexec_flag:
LONG 0x1
@@ -173,17 +173,17 @@ kexec_flag:
kexec_start_address:
EXPORT(kexec_start_address)
- PTR 0x0
+ PTR_WD 0x0
.size kexec_start_address, PTRSIZE
kexec_indirection_page:
EXPORT(kexec_indirection_page)
- PTR 0
+ PTR_WD 0
.size kexec_indirection_page, PTRSIZE
relocate_new_kernel_end:
relocate_new_kernel_size:
EXPORT(relocate_new_kernel_size)
- PTR relocate_new_kernel_end - relocate_new_kernel
+ PTR_WD relocate_new_kernel_end - relocate_new_kernel
.size relocate_new_kernel_size, PTRSIZE
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index b1b2e106f711..9bfce5f75f60 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -72,10 +72,10 @@ loads_done:
.set pop
.section __ex_table,"a"
- PTR load_a4, bad_stack_a4
- PTR load_a5, bad_stack_a5
- PTR load_a6, bad_stack_a6
- PTR load_a7, bad_stack_a7
+ PTR_WD load_a4, bad_stack_a4
+ PTR_WD load_a5, bad_stack_a5
+ PTR_WD load_a6, bad_stack_a6
+ PTR_WD load_a7, bad_stack_a7
.previous
lw t0, TI_FLAGS($28) # syscall tracing enabled?
@@ -216,7 +216,7 @@ einval: li v0, -ENOSYS
#endif /* CONFIG_MIPS_MT_FPAFF */
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 2
.type sys_call_table, @object
EXPORT(sys_call_table)
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index f650c55a17dc..97456b2ca7dc 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -101,7 +101,7 @@ not_n32_scall:
END(handle_sysn32)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.type sysn32_call_table, @object
EXPORT(sysn32_call_table)
#include <asm/syscall_table_n32.h>
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index 5d7bfc65e4d0..5f6ed4b4c399 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -109,7 +109,7 @@ illegal_syscall:
j n64_syscall_exit
END(handle_sys64)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 3
.type sys_call_table, @object
EXPORT(sys_call_table)
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index cedc8bd88804..d3c2616cba22 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp
loads_done:
.section __ex_table,"a"
- PTR load_a4, bad_stack_a4
- PTR load_a5, bad_stack_a5
- PTR load_a6, bad_stack_a6
- PTR load_a7, bad_stack_a7
+ PTR_WD load_a4, bad_stack_a4
+ PTR_WD load_a5, bad_stack_a5
+ PTR_WD load_a6, bad_stack_a6
+ PTR_WD load_a7, bad_stack_a7
.previous
li t1, _TIF_WORK_SYSCALL_ENTRY
@@ -214,7 +214,7 @@ einval: li v0, -ENOSYS
END(sys32_syscall)
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 3
.type sys32_call_table,@object
EXPORT(sys32_call_table)
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 5512cd586e6e..ae93a607ddf7 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
- " "STR(PTR)" 1b, 4b \n"
- " "STR(PTR)" 2b, 4b \n"
+ " "STR(PTR_WD)" 1b, 4b \n"
+ " "STR(PTR_WD)" 2b, 4b \n"
" .previous \n"
" .set pop \n"
: [old] "=&r" (old),
@@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
- " "STR(PTR)" 1b, 5b \n"
- " "STR(PTR)" 2b, 5b \n"
+ " "STR(PTR_WD)" 1b, 5b \n"
+ " "STR(PTR_WD)" 2b, 5b \n"
" .previous \n"
" .set pop \n"
: [old] "=&r" (old),
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index a46db0807195..7767137c3e49 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial)
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, .L_exc; \
+ PTR_WD 9b, .L_exc; \
.previous; \
/* This is enabled in EVA mode */ \
.else; \
@@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial)
((\to == USEROP) && (type == ST_INSN)); \
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
.section __ex_table,"a"; \
- PTR 9b, .L_exc; \
+ PTR_WD 9b, .L_exc; \
.previous; \
.else; \
/* EVA without exception */ \
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 277c32296636..18a43f2e29c8 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -116,7 +116,7 @@
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous; \
/* This is assembled in EVA mode */ \
.else; \
@@ -125,7 +125,7 @@
((\to == USEROP) && (type == ST_INSN)); \
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous; \
.else; \
/* \
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index b0baa3c79fad..0b342bae9a98 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -52,7 +52,7 @@
9: ___BUILD_EVA_INSN(insn, reg, addr); \
.endif; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
.macro f_fill64 dst, offset, val, fixup, mode
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 556acf684d7b..13aaa9927ad1 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -15,7 +15,7 @@
#define EX(insn,reg,addr,handler) \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
@@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm)
jr ra
.section __ex_table,"a"
- PTR 1b, .Lfault
+ PTR_WD 1b, .Lfault
.previous
EXPORT_SYMBOL(__strncpy_from_user_asm)
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 92b63f20ec05..6de31b616f9c 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -14,7 +14,7 @@
#define EX(insn,reg,addr,handler) \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c
index 9a29e94d3db1..3115d4de982c 100644
--- a/arch/mips/loongson64/vbios_quirk.c
+++ b/arch/mips/loongson64/vbios_quirk.c
@@ -3,7 +3,7 @@
#include <linux/pci.h>
#include <loongson.h>
-static void pci_fixup_radeon(struct pci_dev *pdev)
+static void pci_fixup_video(struct pci_dev *pdev)
{
struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
@@ -22,8 +22,7 @@ static void pci_fixup_radeon(struct pci_dev *pdev)
res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
IORESOURCE_PCI_FIXED;
- dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
- PCI_ROM_RESOURCE, res);
+ dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", res);
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, 0x9615,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 325e1552cbea..5a8002839550 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -519,17 +519,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(cpu_to_node(from), cpu_to_node(to));
}
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
+static int __init pcpu_cpu_to_node(int cpu)
{
- return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- MEMBLOCK_ALLOC_ACCESSIBLE,
- cpu_to_node(cpu));
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
+ return cpu_to_node(cpu);
}
void __init setup_per_cpu_areas(void)
@@ -545,7 +537,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("Failed to initialize percpu areas.");
diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h
index 7f1ca35213d8..d773ed938acb 100644
--- a/arch/openrisc/include/asm/bitops.h
+++ b/arch/openrisc/include/asm/bitops.h
@@ -30,7 +30,6 @@
#include <asm/bitops/fls.h>
#include <asm/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index daa2afd974fb..0ec9cfc5131f 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -203,7 +203,6 @@ static __inline__ int fls(unsigned int x)
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index b669f4b9040b..3a3d05438408 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -289,6 +289,7 @@ extern int _parisc_requires_coherency;
extern int running_on_qemu;
+extern void __noreturn toc_intr(struct pt_regs *regs);
extern void toc_handler(void);
extern unsigned int toc_handler_size;
extern unsigned int toc_handler_csum;
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index cceb09855e03..b91cb45ffd4e 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -48,6 +48,7 @@ struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
void __init setup_cmdline(char **cmdline_p)
{
extern unsigned int boot_args[];
+ char *p;
/* Collect stuff passed in from the boot loader */
@@ -56,9 +57,19 @@ void __init setup_cmdline(char **cmdline_p)
/* called from hpux boot loader */
boot_command_line[0] = '\0';
} else {
- strlcpy(boot_command_line, (char *)__va(boot_args[1]),
+ strscpy(boot_command_line, (char *)__va(boot_args[1]),
COMMAND_LINE_SIZE);
+ /* autodetect console type (if not done by palo yet) */
+ p = boot_command_line;
+ if (!str_has_prefix(p, "console=") && !strstr(p, " console=")) {
+ strlcat(p, " console=", COMMAND_LINE_SIZE);
+ if (PAGE0->mem_cons.cl_class == CL_DUPLEX)
+ strlcat(p, "ttyS0", COMMAND_LINE_SIZE);
+ else
+ strlcat(p, "tty0", COMMAND_LINE_SIZE);
+ }
+
#ifdef CONFIG_BLK_DEV_INITRD
if (boot_args[2] != 0) /* did palo pass us a ramdisk? */
{
@@ -68,7 +79,7 @@ void __init setup_cmdline(char **cmdline_p)
#endif
}
- strcpy(command_line, boot_command_line);
+ strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
}
diff --git a/arch/parisc/kernel/toc.c b/arch/parisc/kernel/toc.c
index be9a0bebe61e..e4b48d07afbd 100644
--- a/arch/parisc/kernel/toc.c
+++ b/arch/parisc/kernel/toc.c
@@ -10,9 +10,10 @@
#include <asm/pdc.h>
#include <asm/pdc_chassis.h>
#include <asm/ldcw.h>
+#include <asm/processor.h>
static unsigned int __aligned(16) toc_lock = 1;
-DEFINE_PER_CPU_PAGE_ALIGNED(char [16384], toc_stack);
+DEFINE_PER_CPU_PAGE_ALIGNED(char [16384], toc_stack) __visible;
static void toc20_to_pt_regs(struct pt_regs *regs, struct pdc_toc_pim_20 *toc)
{
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0631c9241af3..b779603978e1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K)
default 11 # 11 = 23 (8MB) - 12 (4K)
-config HAVE_SETUP_PER_CPU_AREA
- def_bool PPC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y if PPC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y if PPC64
-
config NR_IRQS
int "Number of virtual interrupt numbers"
range 32 1048576
@@ -241,6 +232,7 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
+ select HAVE_SETUP_PER_CPU_AREA if PPC64
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
@@ -255,6 +247,8 @@ config PPC
select MMU_GATHER_RCU_TABLE_FREE
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64
select NEED_SG_DMA_LENGTH
select OF
select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
@@ -660,6 +654,7 @@ config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
depends on PPC64 && SMP
default y if PPC_PSERIES || PPC_POWERNV
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -673,10 +668,6 @@ config NODES_SHIFT
default "4"
depends on NUMA
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
config HAVE_MEMORYLESS_NODES
def_bool y
depends on NUMA
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
index c90702b04a53..48e5cd61599c 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
@@ -79,6 +79,7 @@ fman0: fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xfc000 0x1000>;
+ fsl,erratum-a009885;
};
xmdio0: mdio@fd000 {
@@ -86,6 +87,7 @@ fman0: fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xfd000 0x1000>;
+ fsl,erratum-a009885;
};
};
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index e9c945b123c6..e46143c32308 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -168,6 +168,11 @@
interrupts = <14>;
};
+ srnprot@d800060 {
+ compatible = "nintendo,hollywood-srnprot";
+ reg = <0x0d800060 0x4>;
+ };
+
GPIO: gpio@d8000c0 {
#gpio-cells = <2>;
compatible = "nintendo,hollywood-gpio";
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 24c0e0ea5aeb..91a1b99f4e8f 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -68,7 +68,7 @@ CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index a0c45bf2bfb1..0ab78c51455d 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -98,7 +98,7 @@ CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_HEARTBEAT=y
CONFIG_LEDS_TRIGGER_PANIC=y
CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
CONFIG_NVMEM_NINTENDO_OTP=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index a05d8c62cbea..ea5d27dda8cf 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -328,8 +328,6 @@ unsigned long __arch_hweight64(__u64 w);
#include <asm-generic/bitops/hweight.h>
#endif
-#include <asm-generic/bitops/find.h>
-
/* wrappers that deal with KASAN instrumentation */
#include <asm-generic/bitops/instrumented-atomic.h>
#include <asm-generic/bitops/instrumented-lock.h>
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 7be27862329f..78c6a5fde1d6 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val)
update_user_segment(15, val);
}
+int __init find_free_bat(void);
+unsigned int bat_block_size(unsigned long base, unsigned long top);
#endif /* !__ASSEMBLY__ */
/* We happily ignore the smaller BATs on 601, we don't actually use
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 609c80f67194..f8b94f78403f 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte)
#ifndef __ASSEMBLY__
int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 33e073d6b0c4..875730d5af40 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p
return hash__map_kernel_page(ea, pa, prot);
}
+void unmap_kernel_page(unsigned long va);
+
static inline int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 947b5b9c4424..a832aeafe560 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx,
BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
else if (WARN_ON(idx >= __end_of_fixed_addresses))
return;
-
- map_kernel_page(__fix_to_virt(idx), phys, flags);
+ if (pgprot_val(flags))
+ map_kernel_page(__fix_to_virt(idx), phys, flags);
+ else
+ unmap_kernel_page(__fix_to_virt(idx));
}
#define __early_set_fixmap __set_fixmap
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index a58fb4aa6c81..674e5aaafcbd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static inline bool should_hard_irq_enable(void)
+static __always_inline bool should_hard_irq_enable(void)
{
return false;
}
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index fe07558173ef..827038a33064 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,7 +39,6 @@ struct kvm_nested_guest {
pgd_t *shadow_pgtable; /* our page table for this guest */
u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
u64 process_table; /* process table entry for this guest */
- u64 hfscr; /* HFSCR that the L1 requested for this nested guest */
long refcnt; /* number of pointers to this struct */
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a770443cd6e0..d9bf60bf0816 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -818,6 +818,7 @@ struct kvm_vcpu_arch {
/* For support of nested guests */
struct kvm_nested_guest *nested;
+ u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */
u32 nested_vcpu_id;
gpa_t nested_io_gpr;
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b67742e2a9b2..d959c2a73fbf 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -64,6 +64,7 @@ extern int icache_44x_need_flush;
#ifndef __ASSEMBLY__
int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index a3313e853e5e..2816d158280a 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -308,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __swp_entry_to_pte(x) __pte((x).val)
int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
extern int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index efad07081cc0..9675303b724e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -500,6 +500,7 @@
#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 52d05b465e3e..25fc8ad9a27a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long val, mask = -1UL;
unsigned int n = 6;
- if (is_32bit_task())
+ if (is_tsk_32bit_task(task))
mask = 0xffffffff;
while (n--) {
@@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline int syscall_get_arch(struct task_struct *task)
{
- if (is_32bit_task())
+ if (is_tsk_32bit_task(task))
return AUDIT_ARCH_PPC;
else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
return AUDIT_ARCH_PPC64LE;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 5725029aaa29..d6e649b3c70b 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags)
#ifdef CONFIG_COMPAT
#define is_32bit_task() (test_thread_flag(TIF_32BIT))
+#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT))
#else
#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
+#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32))
#endif
#if defined(CONFIG_PPC64)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 92088f848266..7bab2d7de372 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
@@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE:
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 877817471e3c..6a029f2378e1 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -25,7 +25,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes
loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos,
- PDE_DATA(file_inode(file)), PAGE_SIZE);
+ pde_data(file_inode(file)), PAGE_SIZE);
}
static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
@@ -34,7 +34,7 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
return -EINVAL;
remap_pfn_range(vma, vma->vm_start,
- __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT,
+ __pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
return 0;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d87f7c1103ce..be8577ac9397 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -771,50 +771,6 @@ void __init emergency_stack_init(void)
}
#ifdef CONFIG_SMP
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init pcpu_populate_pte(unsigned long addr)
+static __init int pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ return early_cpu_to_node(cpu);
}
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
void __init setup_per_cpu_areas(void)
{
@@ -900,7 +816,7 @@ void __init setup_per_cpu_areas(void)
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_alloc_bootmem, pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -908,8 +824,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
- rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 62361cc7281c..cd0b8b71ecdd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
- if (now <= decrementer_max)
- set_dec_or_work(now);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d1817cd9a691..84c89f08ae9a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
{
- struct kvm_nested_guest *nested = vcpu->arch.nested;
int r;
int srcu_idx;
@@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
* it into a HEAI.
*/
if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
- (nested->hfscr & (1UL << cause))) {
+ (vcpu->arch.nested_hfscr & (1UL << cause))) {
vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 8f8daaeeb3b7..9d373f8963ee 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* set L1 state to L2 state */
vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
- l2->hfscr = l2_hv.hfscr;
+ vcpu->arch.nested_hfscr = l2_hv.hfscr;
vcpu->arch.regs = l2_regs;
/* Guest must always run with ME enabled, HV disabled. */
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 94045b265b6b..203735caf691 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-static int __init find_free_bat(void)
+int __init find_free_bat(void)
{
int b;
int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -100,7 +100,7 @@ static int __init find_free_bat(void)
* - block size has to be a power of two. This is calculated by finding the
* highest bit set to 1.
*/
-static unsigned int block_size(unsigned long base, unsigned long top)
+unsigned int bat_block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
@@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
int idx;
while ((idx = find_free_bat()) != -1 && base != top) {
- unsigned int size = block_size(base, top);
+ unsigned int size = bat_block_size(base, top);
if (size < 128 << 10)
break;
@@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void)
unsigned long size;
for (i = 0; i < nb - 1 && base < top;) {
- size = block_size(base, top);
+ size = bat_block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
base += size;
}
if (base < top) {
- size = block_size(base, top);
+ size = bat_block_size(base, top);
if ((top - base) > size) {
size <<= 1;
if (strict_kernel_rwx_enabled() && base + size > border)
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index 35b287b0a8da..450a67ef0bbe 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size)
{
unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
- unsigned long k_cur = k_start;
- int k_size = k_end - k_start;
- int k_size_base = 1 << (ffs(k_size) - 1);
+ unsigned long k_nobat = k_start;
+ unsigned long k_cur;
+ phys_addr_t phys;
int ret;
- void *block;
- block = memblock_alloc(k_size, k_size_base);
-
- if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
- int shift = ffs(k_size - k_size_base);
- int k_size_more = shift ? 1 << (shift - 1) : 0;
-
- setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
- if (k_size_more >= SZ_128K)
- setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
- k_size_more, PAGE_KERNEL);
- if (v_block_mapped(k_start))
- k_cur = k_start + k_size_base;
- if (v_block_mapped(k_start + k_size_base))
- k_cur = k_start + k_size_base + k_size_more;
-
- update_bats();
+ while (k_nobat < k_end) {
+ unsigned int k_size = bat_block_size(k_nobat, k_end);
+ int idx = find_free_bat();
+
+ if (idx == -1)
+ break;
+ if (k_size < SZ_128K)
+ break;
+ phys = memblock_phys_alloc_range(k_size, k_size, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ break;
+
+ setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
+ k_nobat += k_size;
}
+ if (k_nobat != k_start)
+ update_bats();
- if (!block)
- block = memblock_alloc(k_size, PAGE_SIZE);
- if (!block)
- return -ENOMEM;
+ if (k_nobat < k_end) {
+ phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ return -ENOMEM;
+ }
ret = kasan_init_shadow_page_tables(k_start, k_end);
if (ret)
return ret;
- kasan_update_early_region(k_start, k_cur, __pte(0));
+ kasan_update_early_region(k_start, k_nobat, __pte(0));
- for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_off_k(k_cur);
- void *va = block + k_cur - k_start;
- pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+ pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
}
flush_tlb_kernel_range(k_start, k_end);
+ memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
+
return 0;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index abb3198bd277..6ec5a7dd7913 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
__set_pte_at(mm, addr, ptep, pte, 0);
}
+void unmap_kernel_page(unsigned long va)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+ pte_clear(&init_mm, va, ptep);
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
+}
+
/*
* This is called when relaxing access to a PTE. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d6ffdd0f2309..56dd1f4e3e44 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx, u32 *addrs)
+/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
+static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
+ struct codegen_context *ctx, u32 *addrs)
{
const struct bpf_insn *insn = fp->insnsi;
bool func_addr_fixed;
u64 func_addr;
u32 tmp_idx;
- int i, ret;
+ int i, j, ret;
for (i = 0; i < fp->len; i++) {
/*
@@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
* of the JITed sequence remains unchanged.
*/
ctx->idx = tmp_idx;
+ } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+ tmp_idx = ctx->idx;
+ ctx->idx = addrs[i] / 4;
+#ifdef CONFIG_PPC32
+ PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm);
+ PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm);
+ for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
+#else
+ func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
+ PPC_LI64(b2p[insn[i].dst_reg], func_addr);
+ /* overwrite rest with nops */
+ for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
+#endif
+ ctx->idx = tmp_idx;
+ i++;
}
}
@@ -200,13 +217,13 @@ skip_init_ctx:
/*
* Do not touch the prologue and epilogue as they will remain
* unchanged. Only fix the branch target address for subprog
- * calls in the body.
+ * calls in the body, and ldimm64 instructions.
*
* This does not change the offsets and lengths of the subprog
* call instruction sequences and hence, the size of the JITed
* image as well.
*/
- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+ bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
/* There is no need to perform the usual passes. */
goto skip_codegen_passes;
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index faaebd446cad..cf8dd8aea386 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
if (image && rel < 0x2000000 && rel >= -0x2000000) {
PPC_BL_ABS(func);
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
} else {
/* Load function address into r0 */
EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
@@ -290,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
bool func_addr_fixed;
u64 func_addr;
u32 true_cond;
+ u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -905,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
* 16 byte instruction that uses two 'struct bpf_insn'
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ tmp_idx = ctx->idx;
PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
PPC_LI32(dst_reg, (u32)insn[i].imm);
+ /* padding to allow full 4 instructions for later patching */
+ for (j = ctx->idx - tmp_idx; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 9eae8d8ed340..e1e8c934308a 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u64 imm64;
u32 true_cond;
u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -633,17 +634,21 @@ bpf_alu32_trunc:
EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
break;
case 64:
- /*
- * Way easier and faster(?) to store the value
- * into stack and then use ldbrx
- *
- * ctx->seen will be reliable in pass2, but
- * the instructions generated will remain the
- * same across all passes
- */
+ /* Store the value to stack and then use byte-reverse loads */
PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
- EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ } else {
+ EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
+ EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
+ EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
+ }
break;
}
break;
@@ -848,9 +853,13 @@ emit_clear:
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
+ tmp_idx = ctx->idx;
+ PPC_LI64(dst_reg, imm64);
+ /* padding to allow full 5 instructions for later patching */
+ for (j = ctx->idx - tmp_idx; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
- PPC_LI64(dst_reg, imm64);
break;
/*
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a684901b6965..b5b42cf0a703 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -1327,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu)
* Otherwise provide a warning if there is PMI pending, but
* no counter is found overflown.
*/
- if (any_pmc_overflown(cpuhw))
- clear_pmi_irq_pending();
- else
+ if (any_pmc_overflown(cpuhw)) {
+ /*
+ * Since power_pmu_disable runs under local_irq_save, it
+ * could happen that code hits a PMC overflow without PMI
+ * pending in paca. Hence only clear PMI pending if it was
+ * set.
+ *
+ * If a PMI is pending, then MSR[EE] must be disabled (because
+ * the masked PMI handler disabling EE). So it is safe to
+ * call clear_pmi_irq_pending().
+ */
+ if (pmi_irq_pending())
+ clear_pmi_irq_pending();
+ } else
WARN_ON(pmi_irq_pending());
val = mmcra = cpuhw->mmcr.mmcra;
@@ -2438,36 +2477,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_sample_event_took(sched_clock() - start_clock);
}
-/*
- * If the perf subsystem wants performance monitor interrupts as soon as
- * possible (e.g., to sample the instruction address and stack chain),
- * this should return true. The IRQ masking code can then enable MSR[EE]
- * in some places (e.g., interrupt handlers) that allows PMI interrupts
- * though to improve accuracy of profiles, at the cost of some performance.
- *
- * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
- * access), but in that case there is no need for prompt PMI handling.
- *
- * This currently returns true if any perf counter is being used. It
- * could possibly return false if only events are being counted rather than
- * samples being taken, but for now this is good enough.
- */
-bool power_pmu_wants_prompt_pmi(void)
-{
- struct cpu_hw_events *cpuhw;
-
- /*
- * This could simply test local_paca->pmcregs_in_use if that were not
- * under ifdef KVM.
- */
-
- if (!ppmu)
- return false;
-
- cpuhw = this_cpu_ptr(&cpu_hw_events);
- return cpuhw->n_events;
-}
-
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index 270fa3c0d372..26427311fc72 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -375,7 +375,7 @@ int pasemi_dma_alloc_flag(void)
int bit;
retry:
- bit = find_next_bit(flags_free, MAX_FLAGS, 0);
+ bit = find_first_bit(flags_free, MAX_FLAGS);
if (bit >= MAX_FLAGS)
return -ENOSPC;
if (!test_and_clear_bit(bit, flags_free))
@@ -440,7 +440,7 @@ int pasemi_dma_alloc_fun(void)
int bit;
retry:
- bit = find_next_bit(fun_free, MAX_FLAGS, 0);
+ bit = find_first_bit(fun_free, MAX_FLAGS);
if (bit >= MAX_FLAGS)
return -ENOSPC;
if (!test_and_clear_bit(bit, fun_free))
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4602cfe92a20..5adcbd9b5e88 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -147,27 +147,16 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
-config VA_BITS
- int
- default 32 if 32BIT
- default 39 if 64BIT
-
-config PA_BITS
- int
- default 34 if 32BIT
- default 56 if 64BIT
-
config PAGE_OFFSET
hex
- default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
+ default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
- default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
- default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
+ default 0xffffaf8000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
- default 0xdfffffc800000000 if 64BIT
+ default 0xdfffffff00000000 if 64BIT
default 0xffffffff if 32BIT
config ARCH_FLATMEM_ENABLE
@@ -213,7 +202,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
- default 3 if 64BIT
+ default 4 if 64BIT
default 2
config LOCKDEP_SUPPORT
@@ -271,24 +260,6 @@ config MODULE_SECTIONS
bool
select HAVE_MOD_ARCH_SPECIFIC
-choice
- prompt "Maximum Physical Memory"
- default MAXPHYSMEM_1GB if 32BIT
- default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
- default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
-
- config MAXPHYSMEM_1GB
- depends on 32BIT
- bool "1GiB"
- config MAXPHYSMEM_2GB
- depends on 64BIT
- bool "2GiB"
- config MAXPHYSMEM_128GB
- depends on 64BIT && CMODEL_MEDANY
- bool "128GiB"
-endchoice
-
-
config SMP
bool "Symmetric Multi-Processing"
help
@@ -335,6 +306,8 @@ config NUMA
select GENERIC_ARCH_NUMA
select OF_NUMA
select ARCH_SUPPORTS_NUMA_BALANCING
+ select USE_PERCPU_NUMA_NODE_ID
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -350,14 +323,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RISCV_ISA_C
bool "Emit compressed instructions when building Linux"
default y
@@ -398,12 +363,25 @@ source "kernel/Kconfig.hz"
config RISCV_SBI_V01
bool "SBI v0.1 support"
- default y
depends on RISCV_SBI
help
This config allows kernel to use SBI v0.1 APIs. This will be
deprecated in future once legacy M-mode software are no longer in use.
+config RISCV_BOOT_SPINWAIT
+ bool "Spinwait booting method"
+ depends on SMP
+ default y
+ help
+ This enables support for booting Linux via spinwait method. In the
+ spinwait method, all cores randomly jump to Linux. One of the cores
+ gets chosen via lottery and all other keep spinning on a percpu
+ variable. This method cannot support CPU hotplug and sparse hartid
+ scheme. It should be only enabled for M-mode Linux or platforms relying
+ on older firmware without SBI HSM extension. All other platforms should
+ rely on ordered booting via SBI HSM extension which gets chosen
+ dynamically at runtime if the firmware supports it.
+
config KEXEC
bool "Kexec system call"
select KEXEC_CORE
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index 6bfa1f24d3de..c4ed9efdff03 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -39,6 +39,11 @@
clock-frequency = <RTCCLK_FREQ>;
clock-output-names = "rtcclk";
};
+
+ gpio-poweroff {
+ compatible = "gpio-poweroff";
+ gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+ };
};
&uart0 {
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index e8ceab678e8b..3f42ed87dde8 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -29,7 +29,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 46aa3879f19c..2a82a3b2992b 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -21,7 +21,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
index 385cca741b01..e1c9864b6237 100644
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -24,10 +24,8 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
CONFIG_CMDLINE_FORCE=y
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 396a3303c537..3540b690944b 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -20,7 +20,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/ffs.h>
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h
index a8ec3c5c1bd2..134590f1b843 100644
--- a/arch/riscv/include/asm/cpu_ops.h
+++ b/arch/riscv/include/asm/cpu_ops.h
@@ -40,7 +40,5 @@ struct cpu_operations {
extern const struct cpu_operations *cpu_ops[NR_CPUS];
void __init cpu_set_ops(int cpu);
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle);
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h
new file mode 100644
index 000000000000..56e4b76d09ff
--- /dev/null
+++ b/arch/riscv/include/asm/cpu_ops_sbi.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 by Rivos Inc.
+ */
+#ifndef __ASM_CPU_OPS_SBI_H
+#define __ASM_CPU_OPS_SBI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/**
+ * struct sbi_hart_boot_data - Hart specific boot used during booting and
+ * cpu hotplug.
+ * @task_ptr: A pointer to the hart specific tp
+ * @stack_ptr: A pointer to the hart specific sp
+ */
+struct sbi_hart_boot_data {
+ void *task_ptr;
+ void *stack_ptr;
+};
+#endif
+
+#endif /* ifndef __ASM_CPU_OPS_SBI_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 5046f431645c..ae711692eec9 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,14 +40,13 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 54cbf07fb4e9..58a718573ad6 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -24,6 +24,7 @@ enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
+ FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index b00f503ec124..0b85e363e778 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -27,13 +27,18 @@
*/
#define KASAN_SHADOW_SCALE_SHIFT 3
-#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START KERN_VIRT_START
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+/*
+ * Depending on the size of the virtual address space, the region may not be
+ * aligned on PGDIR_SIZE, so force its alignment to ease its population.
+ */
+#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
+#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
+void kasan_swapper_init(void);
#endif
#endif
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index b3e5ff0125fe..160e3a1e8f8b 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -31,9 +31,20 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_MMU
+#define PAGE_OFFSET kernel_map.page_offset
+#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-
-#define KERN_VIRT_SIZE (-PAGE_OFFSET)
+#endif
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
+#else
+#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_64BIT */
#ifndef __ASSEMBLY__
@@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
#endif /* CONFIG_MMU */
struct kernel_mapping {
+ unsigned long page_offset;
unsigned long virt_addr;
uintptr_t phys_addr;
uintptr_t size;
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 0af6933a7100..11823004b87a 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
#include <asm/tlb.h>
#ifdef CONFIG_MMU
+#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+ pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d_safe(p4d,
+ __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+#define pud_alloc_one pud_alloc_one
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l4_enabled)
+ return __pud_alloc_one(mm, addr);
+
+ return NULL;
+}
+
+#define pud_free pud_free
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (pgtable_l4_enabled)
+ __pud_free(mm, pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 228261aa9628..bbbdd66e5e2f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -8,16 +8,36 @@
#include <linux/const.h>
-#define PGDIR_SHIFT 30
+extern bool pgtable_l4_enabled;
+
+#define PGDIR_SHIFT_L3 30
+#define PGDIR_SHIFT_L4 39
+#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
+
+#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT 30
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page Upper Directory entry */
+typedef struct {
+ unsigned long pud;
+} pud_t;
+
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
+
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+ return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
+{
+ return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
static inline pmd_t *pud_pgtable(pud_t pud)
{
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_pud_folded mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ if (pgtable_l4_enabled)
+ return false;
+
+ return true;
+}
+
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ *p4dp = p4d;
+ else
+ set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) == 0);
+
+ return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return !p4d_present(p4d);
+
+ return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if (pgtable_l4_enabled)
+ set_p4d(p4d, __p4d(0));
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+ return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+}
+
+static inline struct page *p4d_page(p4d_t p4d)
+{
+ return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+#define pud_offset pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(*p4d) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 67f687aee673..7e949f25c933 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -24,6 +24,17 @@
#define KERNEL_LINK_ADDR PAGE_OFFSET
#endif
+/* Number of entries in the page global directory */
+#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
+/* Number of entries in the page table */
+#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+
+/*
+ * Half of the kernel address space (half of the entries of the page global
+ * directory) is for the direct mapping.
+ */
+#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
+
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END PAGE_OFFSET
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
@@ -39,8 +50,10 @@
/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
-#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
-#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+/* This is used to define the end of the KASAN shadow region */
+#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif
/*
@@ -48,8 +61,14 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
+#ifdef CONFIG_64BIT
+#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
+#else
+#define VA_BITS 32
+#endif
+
#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
#define VMEMMAP_END VMALLOC_START
#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
@@ -83,8 +102,7 @@
#ifndef __ASSEMBLY__
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -107,12 +125,20 @@
#define XIP_FIXUP(addr) (addr)
#endif /* CONFIG_XIP_KERNEL */
-#ifdef CONFIG_MMU
-/* Number of entries in the page global directory */
-#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
-/* Number of entries in the page table */
-#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+ pud_t *(*get_pud_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pud)(uintptr_t va);
+#endif
+};
+
+extern struct pt_alloc_ops pt_ops __initdata;
+#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
@@ -659,7 +685,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* and give the kernel the other (upper) half.
*/
#ifdef CONFIG_64BIT
-#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
+#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START FIXADDR_START
#endif
@@ -667,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
+ * Task size is:
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ *
+ * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
+ * Instruction Set Manual Volume II: Privileged Architecture" states that
+ * "load and store effective addresses, which are 64bits, must have bits
+ * 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
#ifdef CONFIG_64BIT
-#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#else
-#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE_MIN TASK_SIZE
#endif
#else /* CONFIG_MMU */
@@ -697,6 +734,8 @@ extern uintptr_t _dtb_early_pa;
#define dtb_early_va _dtb_early_va
#define dtb_early_pa _dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
+extern u64 satp_mode;
+extern bool pgtable_l4_enabled;
void paging_init(void);
void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 26ba6f2d7a40..d1c37479d828 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
#define _ASM_RISCV_SBI_H
#include <linux/types.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_RISCV_SBI
enum sbi_ext_id {
@@ -128,27 +129,27 @@ long sbi_get_mimpid(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
-int sbi_send_ipi(const unsigned long *hart_mask);
-int sbi_remote_fence_i(const unsigned long *hart_mask);
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const struct cpumask *cpu_mask);
+int sbi_remote_fence_i(const struct cpumask *cpu_mask);
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid);
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
@@ -183,7 +184,7 @@ static inline unsigned long sbi_mk_version(unsigned long major,
int sbi_err_map_linux_errno(int err);
#else /* CONFIG_RISCV_SBI */
-static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
+static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
static inline void sbi_init(void) {}
#endif /* CONFIG_RISCV_SBI */
#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index 6ad749f42807..23170c933d73 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -92,8 +92,6 @@ static inline void riscv_clear_ipi(void)
#endif /* CONFIG_SMP */
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
-
#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)
bool cpu_has_hotplug(unsigned int cpu);
#else
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index 45a7018a8118..63acaecc3374 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -4,7 +4,11 @@
#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
+#ifdef CONFIG_64BIT
+#define MAX_PHYSMEM_BITS 56
+#else
+#define MAX_PHYSMEM_BITS 34
+#endif /* CONFIG_64BIT */
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 3397ddac1a30..612556faa527 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -43,7 +43,8 @@ obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
-obj-$(CONFIG_SMP) += cpu_ops_spinwait.o
+
+obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 253126e4beef..df0519a64eaf 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <asm/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/ptrace.h>
+#include <asm/cpu_ops_sbi.h>
void asm_offsets(void);
@@ -468,4 +469,6 @@ void asm_offsets(void)
DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN));
OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
+ OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
+ OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
}
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index f13b2c9ea912..ad0a7e9f828b 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -7,6 +7,7 @@
#include <linux/seq_file.h>
#include <linux/of.h>
#include <asm/smp.h>
+#include <asm/pgtable.h>
/*
* Returns the hart ID of the given device tree node, or -ENODEV if the node
@@ -71,18 +72,19 @@ static void print_isa(struct seq_file *f, const char *isa)
seq_puts(f, "\n");
}
-static void print_mmu(struct seq_file *f, const char *mmu_type)
+static void print_mmu(struct seq_file *f)
{
+ char sv_type[16];
+
#if defined(CONFIG_32BIT)
- if (strcmp(mmu_type, "riscv,sv32") != 0)
- return;
+ strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
- if (strcmp(mmu_type, "riscv,sv39") != 0 &&
- strcmp(mmu_type, "riscv,sv48") != 0)
- return;
+ if (pgtable_l4_enabled)
+ strncpy(sv_type, "sv48", 5);
+ else
+ strncpy(sv_type, "sv39", 5);
#endif
-
- seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+ seq_printf(f, "mmu\t\t: %s\n", sv_type);
}
static void *c_start(struct seq_file *m, loff_t *pos)
@@ -107,14 +109,13 @@ static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(cpu_id, NULL);
- const char *compat, *isa, *mmu;
+ const char *compat, *isa;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
if (!of_property_read_string(node, "riscv,isa", &isa))
print_isa(m, isa);
- if (!of_property_read_string(node, "mmu-type", &mmu))
- print_mmu(m, mmu);
+ print_mmu(m);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
seq_printf(m, "uarch\t\t: %s\n", compat);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index 1985884fe829..170d07e57721 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -8,37 +8,29 @@
#include <linux/of.h>
#include <linux/string.h>
#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-void *__cpu_up_stack_pointer[NR_CPUS] __section(".data");
-void *__cpu_up_task_pointer[NR_CPUS] __section(".data");
-
extern const struct cpu_operations cpu_ops_sbi;
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
extern const struct cpu_operations cpu_ops_spinwait;
-
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle)
-{
- int hartid = cpuid_to_hartid_map(cpuid);
-
- /* Make sure tidle is updated */
- smp_mb();
- WRITE_ONCE(__cpu_up_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
- WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
-}
+#else
+const struct cpu_operations cpu_ops_spinwait = {
+ .name = "",
+ .cpu_prepare = NULL,
+ .cpu_start = NULL,
+};
+#endif
void __init cpu_set_ops(int cpuid)
{
#if IS_ENABLED(CONFIG_RISCV_SBI)
if (sbi_probe_extension(SBI_EXT_HSM) > 0) {
if (!cpuid)
- pr_info("SBI v0.2 HSM extension detected\n");
+ pr_info("SBI HSM extension detected\n");
cpu_ops[cpuid] = &cpu_ops_sbi;
} else
#endif
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 685fae72b7f5..dae29cbfe550 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -7,13 +7,22 @@
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/sbi.h>
#include <asm/smp.h>
extern char secondary_start_sbi[];
const struct cpu_operations cpu_ops_sbi;
+/*
+ * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
+ * be invoked from multiple threads in parallel. Define a per cpu data
+ * to handle that.
+ */
+DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
{
@@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid)
static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
{
- int rc;
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
int hartid = cpuid_to_hartid_map(cpuid);
-
- cpu_update_secondary_bootdata(cpuid, tidle);
- rc = sbi_hsm_hart_start(hartid, boot_addr, 0);
-
- return rc;
+ unsigned long hsm_data;
+ struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ bdata->task_ptr = tidle;
+ bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ /* Make sure boot data is updated */
+ smp_mb();
+ hsm_data = __pa(bdata);
+ return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
}
static int sbi_cpu_prepare(unsigned int cpuid)
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index b2c957bb68c1..346847f6c41c 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -6,11 +6,36 @@
#include <linux/errno.h>
#include <linux/of.h>
#include <linux/string.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations cpu_ops_spinwait;
+void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data");
+void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
+
+static void cpu_update_secondary_bootdata(unsigned int cpuid,
+ struct task_struct *tidle)
+{
+ int hartid = cpuid_to_hartid_map(cpuid);
+
+ /*
+ * The hartid must be less than NR_CPUS to avoid out-of-bound access
+ * errors for __cpu_spinwait_stack/task_pointer. That is not always possible
+ * for platforms with discontiguous hartid numbering scheme. That's why
+ * spinwait booting is not the recommended approach for any platforms
+ * booting Linux in S-mode and can be disabled in the future.
+ */
+ if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+ return;
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
+ task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
+}
static int spinwait_cpu_prepare(unsigned int cpuid)
{
@@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
* selects the first cpu to boot the kernel and causes the remainder
* of the cpus to spin in a loop waiting for their stack pointer to be
* setup by that main cpu. Writing to bootdata
- * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they
+ * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they
* can continue the boot process.
*/
cpu_update_secondary_bootdata(cpuid, tidle);
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 604d60292dd8..2363b43312fc 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/csr.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
#include "efi-header.S"
@@ -105,7 +106,8 @@ relocate:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
- li a1, SATP_MODE
+ la a1, satp_mode
+ REG_L a1, 0(a1)
or a2, a2, a1
/*
@@ -167,15 +169,15 @@ secondary_start_sbi:
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
- slli a3, a0, LGREG
- la a4, __cpu_up_stack_pointer
- XIP_FIXUP_OFFSET a4
- la a5, __cpu_up_task_pointer
- XIP_FIXUP_OFFSET a5
- add a4, a3, a4
- add a5, a3, a5
- REG_L sp, (a4)
- REG_L tp, (a5)
+ /* a0 contains the hartid & a1 contains boot data */
+ li a2, SBI_HART_BOOT_TASK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a2
+ add a2, a2, a1
+ REG_L tp, (a2)
+ li a3, SBI_HART_BOOT_STACK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a3
+ add a3, a3, a1
+ REG_L sp, (a3)
.Lsecondary_start_common:
@@ -257,13 +259,13 @@ pmp_done:
li t0, SR_FS
csrc CSR_STATUS, t0
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
.Lgood_cores:
-#endif
+ /* The lottery system is only required for spinwait booting method */
#ifndef CONFIG_XIP_KERNEL
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
@@ -282,6 +284,10 @@ pmp_done:
/* first time here if hart_lottery in RAM is not set */
beq t0, t1, .Lsecondary_start
+#endif /* CONFIG_XIP */
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
+
+#ifdef CONFIG_XIP_KERNEL
la sp, _end + THREAD_SIZE
XIP_FIXUP_OFFSET sp
mv s0, a0
@@ -338,16 +344,16 @@ clear_bss_done:
call soc_early_init
tail start_kernel
+#if CONFIG_RISCV_BOOT_SPINWAIT
.Lsecondary_start:
-#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
- la a1, __cpu_up_stack_pointer
+ la a1, __cpu_spinwait_stack_pointer
XIP_FIXUP_OFFSET a1
- la a2, __cpu_up_task_pointer
+ la a2, __cpu_spinwait_task_pointer
XIP_FIXUP_OFFSET a2
add a1, a3, a1
add a2, a3, a2
@@ -365,7 +371,7 @@ clear_bss_done:
fence
tail .Lsecondary_start_common
-#endif
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
END(_start_kernel)
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index aabbc3ac3e48..726731ada534 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
asmlinkage void __init __copy_data(void);
#endif
-extern void *__cpu_up_stack_pointer[];
-extern void *__cpu_up_task_pointer[];
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
+extern void *__cpu_spinwait_stack_pointer[];
+extern void *__cpu_spinwait_task_pointer[];
+#endif
#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 9c0511119bad..a89243730153 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
struct pt_regs *regs;
regs = task_pt_regs(target);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
}
#ifdef CONFIG_FPU
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 9a84f0cb5175..f72527fcb347 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -16,8 +16,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
-static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init;
-static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
+static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init;
+static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5) __ro_after_init;
@@ -67,6 +67,30 @@ int sbi_err_map_linux_errno(int err)
EXPORT_SYMBOL(sbi_err_map_linux_errno);
#ifdef CONFIG_RISCV_SBI_V01
+static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
+{
+ unsigned long cpuid, hartid;
+ unsigned long hmask = 0;
+
+ /*
+ * There is no maximum hartid concept in RISC-V and NR_CPUS must not be
+ * associated with hartid. As SBI v0.1 is only kept for backward compatibility
+ * and will be removed in the future, there is no point in supporting hartid
+ * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2
+ * should be used for platforms with hartid greater than BITS_PER_LONG.
+ */
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hartid >= BITS_PER_LONG) {
+ pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
+ break;
+ }
+ hmask |= 1 << hartid;
+ }
+
+ return hmask;
+}
+
/**
* sbi_console_putchar() - Writes given character to the console device.
* @ch: The data to be written to the console.
@@ -132,33 +156,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
- sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask,
+ unsigned long hart_mask;
+
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
+
+ sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask),
0, 0, 0, 0, 0);
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
int result = 0;
+ unsigned long hart_mask;
+
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
/* v0.2 function IDs are equivalent to v0.1 extension IDs */
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0,
- (unsigned long)hart_mask, 0, 0, 0, 0, 0);
+ (unsigned long)&hart_mask, 0, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
arg4, 0, 0);
break;
default:
@@ -180,7 +215,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
sbi_major_version(), sbi_minor_version());
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
pr_warn("IPI extension is not available in SBI v%lu.%lu\n",
sbi_major_version(), sbi_minor_version());
@@ -188,7 +223,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
@@ -212,37 +247,33 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
- unsigned long hartid, hmask_val, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0;
struct sbiret ret = {0};
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
- hmask_val = 0;
+ hmask = 0;
hbase = 0;
}
- if (!hmask_val)
+ if (!hmask)
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ hmask |= 1UL << (hartid - hbase);
}
- if (hmask_val) {
+ if (hmask) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
}
@@ -252,11 +283,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
ecall_failed:
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
return result;
}
-static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
+static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask,
unsigned long hbase, unsigned long start,
unsigned long size, unsigned long arg4,
unsigned long arg5)
@@ -267,31 +298,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0);
+ ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
default:
@@ -303,43 +334,39 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
if (ret.error) {
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
}
return result;
}
-static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
- unsigned long hmask_val, hartid, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0;
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
- hmask_val = 0;
+ hmask = 0;
hbase = 0;
}
- if (!hmask_val)
+ if (!hmask)
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ hmask |= 1UL << (hartid - hbase);
}
- if (hmask_val) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ if (hmask) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
@@ -361,44 +388,44 @@ void sbi_set_timer(uint64_t stime_value)
/**
* sbi_send_ipi() - Send an IPI to any hart.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const struct cpumask *cpu_mask)
{
- return __sbi_send_ipi(hart_mask);
+ return __sbi_send_ipi(cpu_mask);
}
EXPORT_SYMBOL(sbi_send_ipi);
/**
* sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const struct cpumask *cpu_mask)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
- hart_mask, 0, 0, 0, 0);
+ cpu_mask, 0, 0, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_fence_i);
/**
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
* harts for the specified virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);
@@ -406,38 +433,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
* remote harts for a virtual address range belonging to a specific ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
/**
* sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote
* harts for the specified guest physical address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
*
* Return: None
*/
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
@@ -445,38 +472,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
* sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given
* remote harts for a guest physical address range belonging to a specific VMID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
* @vmid: The value of guest ID (VMID).
*
* Return: 0 if success, Error otherwise.
*/
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
- hart_mask, start, size, vmid, 0);
+ cpu_mask, start, size, vmid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid);
/**
* sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote
* harts for the current guest virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
*
* Return: None
*/
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma);
@@ -485,20 +512,20 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma);
* remote harts for current guest virtual address range belonging to a specific
* ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: None
*/
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid);
@@ -591,11 +618,7 @@ long sbi_get_mimpid(void)
static void sbi_send_cpumask_ipi(const struct cpumask *target)
{
- struct cpumask hartid_mask;
-
- riscv_cpuid_to_hartid_mask(target, &hartid_mask);
-
- sbi_send_ipi(cpumask_bits(&hartid_mask));
+ sbi_send_ipi(target);
}
static const struct riscv_ipi_ops sbi_ipi_ops = {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 63241abe84eb..b42bfdc67482 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -59,16 +59,6 @@ atomic_t hart_lottery __section(".sdata")
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
-{
- int cpu;
-
- cpumask_clear(out);
- for_each_cpu(cpu, in)
- cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
-}
-EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
-
/*
* Place kernel memory regions on the resource tree so that
* kexec-tools can retrieve them from /proc/iomem. While there
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index bd82375db51a..622f226454d5 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -96,7 +96,7 @@ void __init setup_smp(void)
if (cpuid >= NR_CPUS) {
pr_warn("Invalid cpuid [%d] for hartid [%d]\n",
cpuid, hart);
- break;
+ continue;
}
cpuid_to_hartid_map(cpuid) = hart;
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 9af67dbdc66a..f80a34fbf102 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -114,7 +114,6 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
{
- struct cpumask hmask;
unsigned long size = PAGE_SIZE;
struct kvm_vmid *vmid = &kvm->arch.vmid;
@@ -127,8 +126,7 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
* where the Guest/VM is running.
*/
preempt_disable();
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
+ sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size,
READ_ONCE(vmid->vmid));
preempt_enable();
}
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 00036b7f83b9..1bc0608a5bfd 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -82,7 +82,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
{
int ret = 0;
unsigned long i;
- struct cpumask cm, hm;
+ struct cpumask cm;
struct kvm_vcpu *tmp;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
unsigned long hmask = cp->a0;
@@ -90,7 +90,6 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
unsigned long funcid = cp->a6;
cpumask_clear(&cm);
- cpumask_clear(&hm);
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (hbase != -1UL) {
if (tmp->vcpu_id < hbase)
@@ -103,17 +102,15 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
cpumask_set_cpu(tmp->cpu, &cm);
}
- riscv_cpuid_to_hartid_mask(&cm, &hm);
-
switch (funcid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
- ret = sbi_remote_fence_i(cpumask_bits(&hm));
+ ret = sbi_remote_fence_i(&cm);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), cp->a2, cp->a3);
+ ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), cp->a2,
+ ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2,
cp->a3, cp->a4);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
index 4c7e13ec9ccc..07e2de14433a 100644
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -38,7 +38,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
int i, ret = 0;
u64 next_cycle;
struct kvm_vcpu *rvcpu;
- struct cpumask cm, hm;
+ struct cpumask cm;
struct kvm *kvm = vcpu->kvm;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -101,15 +101,12 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
continue;
cpumask_set_cpu(rvcpu->cpu, &cm);
}
- riscv_cpuid_to_hartid_mask(&cm, &hm);
if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
- ret = sbi_remote_fence_i(cpumask_bits(&hm));
+ ret = sbi_remote_fence_i(&cm);
else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
- ret = sbi_remote_hfence_vvma(cpumask_bits(&hm),
- cp->a1, cp->a2);
+ ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2);
else
- ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
- cp->a1, cp->a2, cp->a3);
+ ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3);
break;
default:
ret = -EINVAL;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 807228f8f409..2fa4f7b1813d 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -67,7 +67,6 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
{
unsigned long i;
struct kvm_vcpu *v;
- struct cpumask hmask;
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
@@ -102,8 +101,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* running, we force VM exits on all host CPUs using IPI and
* flush all Guest TLBs.
*/
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
+ sbi_remote_hfence_gvma(cpu_online_mask, 0, 0);
}
vmid->vmid = vmid_next;
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 89f81067e09e..6cb7d96ad9c7 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -67,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
smp_mb();
} else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
- cpumask_t hartid_mask;
-
- riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
- sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ sbi_remote_fence_i(&others);
} else {
on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ea54cc0c9106..7acbfbd14557 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
switch_mm_fast:
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
((cntx & asid_mask) << SATP_ASID_SHIFT) |
- SATP_MODE);
+ satp_mode);
if (need_flush_tlb)
local_flush_tlb_all();
@@ -201,7 +201,7 @@ switch_mm_fast:
static void set_mm_noasid(struct mm_struct *mm)
{
/* Switch the page table and blindly nuke entire local TLB */
- csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
local_flush_tlb_all();
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0624c68331d8..cf4d018b7d66 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -37,13 +37,19 @@ EXPORT_SYMBOL(kernel_map);
#define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
#endif
+#ifdef CONFIG_64BIT
+u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+#else
+u64 satp_mode = SATP_MODE_32;
+#endif
+EXPORT_SYMBOL(satp_mode);
+
+bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+EXPORT_SYMBOL(pgtable_l4_enabled);
+
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
-#ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[], __data_loc;
-#endif
-
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -53,15 +59,6 @@ extern char _start[];
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
-struct pt_alloc_ops {
- pte_t *(*get_pte_virt)(phys_addr_t pa);
- phys_addr_t (*alloc_pte)(uintptr_t va);
-#ifndef __PAGETABLE_PMD_FOLDED
- pmd_t *(*get_pmd_virt)(phys_addr_t pa);
- phys_addr_t (*alloc_pmd)(uintptr_t va);
-#endif
-};
-
static phys_addr_t dma32_phys_limit __initdata;
static void __init zone_sizes_init(void)
@@ -102,10 +99,14 @@ static void __init print_vm_layout(void)
(unsigned long)VMALLOC_END);
print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
(unsigned long)high_memory);
-#ifdef CONFIG_64BIT
- print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
- (unsigned long)ADDRESS_SPACE_END);
+ if (IS_ENABLED(CONFIG_64BIT)) {
+#ifdef CONFIG_KASAN
+ print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif
+
+ print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ (unsigned long)ADDRESS_SPACE_END);
+ }
}
#else
static void print_vm_layout(void) { }
@@ -130,18 +131,8 @@ void __init mem_init(void)
print_vm_layout();
}
-/*
- * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
- * whereas for 64-bit kernel, the end of the virtual address space is occupied
- * by the modules/BPF/kernel mappings which reduces the available size of the
- * linear mapping.
- * Limit the memory size via mem.
- */
-#ifdef CONFIG_64BIT
-static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
-#else
-static phys_addr_t memory_limit = -PAGE_OFFSET;
-#endif
+/* Limit the memory size via mem. */
+static phys_addr_t memory_limit;
static int __init early_mem(char *p)
{
@@ -162,35 +153,31 @@ early_param("mem", early_mem);
static void __init setup_bootmem(void)
{
phys_addr_t vmlinux_end = __pa_symbol(&_end);
- phys_addr_t vmlinux_start = __pa_symbol(&_start);
- phys_addr_t __maybe_unused max_mapped_addr;
- phys_addr_t phys_ram_end;
+ phys_addr_t max_mapped_addr;
+ phys_addr_t phys_ram_end, vmlinux_start;
-#ifdef CONFIG_XIP_KERNEL
- vmlinux_start = __pa_symbol(&_sdata);
-#endif
+ if (IS_ENABLED(CONFIG_XIP_KERNEL))
+ vmlinux_start = __pa_symbol(&_sdata);
+ else
+ vmlinux_start = __pa_symbol(&_start);
memblock_enforce_memory_limit(memory_limit);
/*
- * Reserve from the start of the kernel to the end of the kernel
- */
-#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
- /*
* Make sure we align the reservation on PMD_SIZE since we will
* map the kernel in the linear mapping as read-only: we do not want
* any allocation to happen between _end and the next pmd aligned page.
*/
- vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
-#endif
+ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+ /*
+ * Reserve from the start of the kernel to the end of the kernel
+ */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
-
phys_ram_end = memblock_end_of_DRAM();
-#ifndef CONFIG_XIP_KERNEL
- phys_ram_base = memblock_start_of_DRAM();
-#endif
-#ifndef CONFIG_64BIT
+ if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+ phys_ram_base = memblock_start_of_DRAM();
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -200,10 +187,11 @@ static void __init setup_bootmem(void)
* address space is occupied by the kernel mapping then this check must
* be done as soon as the kernel mapping base address is determined.
*/
- max_mapped_addr = __pa(~(ulong)0);
- if (max_mapped_addr == (phys_ram_end - 1))
- memblock_set_current_limit(max_mapped_addr - 4096);
-#endif
+ if (!IS_ENABLED(CONFIG_64BIT)) {
+ max_mapped_addr = __pa(~(ulong)0);
+ if (max_mapped_addr == (phys_ram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+ }
min_low_pfn = PFN_UP(phys_ram_base);
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
@@ -229,13 +217,7 @@ static void __init setup_bootmem(void)
}
#ifdef CONFIG_MMU
-static struct pt_alloc_ops _pt_ops __initdata;
-
-#ifdef CONFIG_XIP_KERNEL
-#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops))
-#else
-#define pt_ops _pt_ops
-#endif
+struct pt_alloc_ops pt_ops __initdata;
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
@@ -245,9 +227,11 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
+#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -333,6 +317,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
+static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
+#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud))
+#define early_pud ((pud_t *)XIP_FIXUP(early_pud))
+#endif /* CONFIG_XIP_KERNEL */
+
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
/* Before MMU is enabled */
@@ -352,7 +346,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+ BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
return (uintptr_t)early_pmd;
}
@@ -399,21 +393,97 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
create_pte_mapping(ptep, va, pa, sz, prot);
}
-#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
+static pud_t *__init get_pud_virt_early(phys_addr_t pa)
+{
+ return (pud_t *)((uintptr_t)pa);
+}
+
+static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PUD);
+ return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
+}
+
+static pud_t *__init get_pud_virt_late(phys_addr_t pa)
+{
+ return (pud_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_pud_early(uintptr_t va)
+{
+ /* Only one PUD is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pud;
+}
+
+static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pud_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
+static void __init create_pud_mapping(pud_t *pudp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pmd_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pud_index = pud_index(va);
+
+ if (sz == PUD_SIZE) {
+ if (pud_val(pudp[pud_index]) == 0)
+ pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pud_val(pudp[pud_index]) == 0) {
+ next_phys = pt_ops.alloc_pmd(va);
+ pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ }
+
+ create_pmd_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t pud_t
+#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
+ pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
+#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
+ pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
- create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pmd
+ (pgtable_l4_enabled ? \
+ create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
+ create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
+#define fixmap_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
+#define trampoline_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
+#define early_dtb_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pte
+#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
+#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
@@ -452,6 +522,8 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
}
#ifdef CONFIG_XIP_KERNEL
+extern char _xiprom[], _exiprom[], __data_loc;
+
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
@@ -500,6 +572,57 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
+#ifdef CONFIG_64BIT
+static void __init disable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L3;
+ satp_mode = SATP_MODE_39;
+}
+
+/*
+ * There is a simple way to determine if 4-level is supported by the
+ * underlying hardware: establish 1:1 mapping in 4-level page table mode
+ * then read SATP to see if the configuration was taken into account
+ * meaning sv48 is supported.
+ */
+static __init void set_satp_mode(void)
+{
+ u64 identity_satp, hw_satp;
+ uintptr_t set_satp_mode_pmd;
+
+ set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+ create_pgd_mapping(early_pg_dir,
+ set_satp_mode_pmd, (uintptr_t)early_pud,
+ PGDIR_SIZE, PAGE_TABLE);
+ create_pud_mapping(early_pud,
+ set_satp_mode_pmd, (uintptr_t)early_pmd,
+ PUD_SIZE, PAGE_TABLE);
+ /* Handle the case where set_satp_mode straddles 2 PMDs */
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd, set_satp_mode_pmd,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd + PMD_SIZE,
+ set_satp_mode_pmd + PMD_SIZE,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
+
+ local_flush_tlb_all();
+ csr_write(CSR_SATP, identity_satp);
+ hw_satp = csr_swap(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ if (hw_satp != identity_satp)
+ disable_pgtable_l4();
+
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_pud, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+}
+#endif
+
/*
* setup_vm() is called from head.S with MMU-off.
*
@@ -564,10 +687,15 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
- IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+ IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+ if (pgtable_l4_enabled) {
+ create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
+ }
+
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
pa, PMD_SIZE, PAGE_KERNEL);
@@ -589,11 +717,64 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
dtb_early_pa = dtb_pa;
}
+/*
+ * MMU is not enabled, the page tables are allocated directly using
+ * early_pmd/pud/p4d and the address returned is the physical one.
+ */
+void __init pt_ops_set_early(void)
+{
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+ pt_ops.alloc_pud = alloc_pud_early;
+ pt_ops.get_pud_virt = get_pud_virt_early;
+#endif
+}
+
+/*
+ * MMU is enabled but page table setup is not complete yet.
+ * fixmap page table alloc functions must be used as a means to temporarily
+ * map the allocated physical pages since the linear mapping does not exist yet.
+ *
+ * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
+ * but it will be used as described above.
+ */
+void __init pt_ops_set_fixmap(void)
+{
+ pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
+ pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
+ pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
+ pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
+ pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+#endif
+}
+
+/*
+ * MMU is enabled and page table setup is complete, so from now, we can use
+ * generic page allocation functions to setup page table.
+ */
+void __init pt_ops_set_late(void)
+{
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+ pt_ops.alloc_pud = alloc_pud_late;
+ pt_ops.get_pud_virt = get_pud_virt_late;
+#endif
+}
+
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
kernel_map.virt_addr = KERNEL_LINK_ADDR;
+ kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#ifdef CONFIG_XIP_KERNEL
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
@@ -608,11 +789,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
#endif
+
+#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+ set_satp_mode();
+#endif
+
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
+ /*
+ * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
+ * kernel, whereas for 64-bit kernel, the end of the virtual address
+ * space is occupied by the modules/BPF/kernel mappings which reduces
+ * the available size of the linear mapping.
+ */
+ memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
+
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
@@ -625,23 +819,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif
- pt_ops.alloc_pte = alloc_pte_early;
- pt_ops.get_pte_virt = get_pte_virt_early;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_early;
- pt_ops.get_pmd_virt = get_pmd_virt_early;
-#endif
+ pt_ops_set_early();
+
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
- (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
- /* Setup fixmap PMD */
+ /* Setup fixmap PUD and PMD */
+ if (pgtable_l4_enabled)
+ create_pud_mapping(fixmap_pud, FIXADDR_START,
+ (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
- (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+ trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l4_enabled)
+ create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
#ifdef CONFIG_XIP_KERNEL
create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
@@ -669,7 +865,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
* range can not span multiple pmds.
*/
- BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#ifndef __PAGETABLE_PMD_FOLDED
@@ -694,6 +890,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
#endif
+
+ pt_ops_set_fixmap();
}
static void __init setup_vm_final(void)
@@ -702,16 +900,6 @@ static void __init setup_vm_final(void)
phys_addr_t pa, start, end;
u64 i;
- /**
- * MMU is enabled at this point. But page table setup is not complete yet.
- * fixmap page table alloc functions should be used at this point
- */
- pt_ops.alloc_pte = alloc_pte_fixmap;
- pt_ops.get_pte_virt = get_pte_virt_fixmap;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_fixmap;
- pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
-#endif
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
@@ -736,26 +924,24 @@ static void __init setup_vm_final(void)
}
}
-#ifdef CONFIG_64BIT
/* Map the kernel */
- create_kernel_page_table(swapper_pg_dir, false);
+ if (IS_ENABLED(CONFIG_64BIT))
+ create_kernel_page_table(swapper_pg_dir, false);
+
+#ifdef CONFIG_KASAN
+ kasan_swapper_init();
#endif
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
+ clear_fixmap(FIX_PUD);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
local_flush_tlb_all();
- /* generic page allocation functions must be used to setup page table */
- pt_ops.alloc_pte = alloc_pte_late;
- pt_ops.get_pte_virt = get_pte_virt_late;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_late;
- pt_ops.get_pmd_virt = get_pmd_virt_late;
-#endif
+ pt_ops_set_late();
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
@@ -791,12 +977,10 @@ static void __init reserve_crashkernel(void)
* since it doesn't make much sense and we have limited memory
* resources.
*/
-#ifdef CONFIG_CRASH_DUMP
if (is_kdump_kernel()) {
pr_info("crashkernel: ignoring reservation request\n");
return;
}
-#endif
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 54294f83513d..f61f7ca6fe0f 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -11,45 +11,27 @@
#include <asm/fixmap.h>
#include <asm/pgalloc.h>
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
-asmlinkage void __init kasan_early_init(void)
-{
- uintptr_t i;
- pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
-
- BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
- KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
-
- for (i = 0; i < PTRS_PER_PTE; ++i)
- set_pte(kasan_early_shadow_pte + i,
- mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
-
- for (i = 0; i < PTRS_PER_PMD; ++i)
- set_pmd(kasan_early_shadow_pmd + i,
- pfn_pmd(PFN_DOWN
- (__pa((uintptr_t) kasan_early_shadow_pte)),
- __pgprot(_PAGE_TABLE)));
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
-
- /* init for swapper_pg_dir */
- pgd = pgd_offset_k(KASAN_SHADOW_START);
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+/*
+ * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57
+ * which is right before the kernel.
+ *
+ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
+ * the page global directory with kasan_early_shadow_pmd.
+ *
+ * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
+ * must be divided as follows:
+ * - the first PGD entry, although incomplete, is populated with
+ * kasan_early_shadow_pud/p4d
+ * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
+ * - the last PGD entry is shared with the kernel mapping so populated at the
+ * lower levels pud/p4d
+ *
+ * In addition, when shallow populating a kasan region (for example vmalloc),
+ * this region may also not be aligned on PGDIR size, so we must go down to the
+ * pud level too.
+ */
- local_flush_tlb_all();
-}
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
@@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
}
-static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pmd_t *pmdp, *base_pmd;
unsigned long next;
- base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
- if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ if (pud_none(*pud)) {
base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ } else {
+ base_pmd = (pmd_t *)pud_pgtable(*pud);
+ if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ }
pmdp = base_pmd + pmd_index(vaddr);
@@ -105,59 +91,207 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned
* it entirely, memblock could allocate a page at a physical address
* where KASAN is not populated yet and then we'd get a page fault.
*/
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+ set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_pud(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ pud_t *pudp, *base_pud;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else {
+ base_pud = (pud_t *)pgd_page_vaddr(*pgd);
+ if (base_pud == lm_alias(kasan_early_shadow_pud))
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ }
+
+ pudp = base_pud + pud_index(vaddr);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+
+ if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
+ if (phys_addr) {
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pmd(pudp, vaddr, next);
+ } while (pudp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole PGD to be populated before setting the PGD in
+ * the page table, otherwise, if we did set the PGD before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
-static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)kasan_early_shadow_pud : \
+ (uintptr_t)kasan_early_shadow_pmd)
+#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
+ (pgtable_l4_enabled ? \
+ kasan_populate_pud(pgdp, vaddr, next, early) : \
+ kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
+
+static void __init kasan_populate_pgd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool early)
{
phys_addr_t phys_addr;
- pgd_t *pgdp = pgd_offset_k(vaddr);
unsigned long next;
do {
next = pgd_addr_end(vaddr, end);
- /*
- * pgdp can't be none since kasan_early_init initialized all KASAN
- * shadow region with kasan_early_shadow_pmd: if this is stillthe case,
- * that means we can try to allocate a hugepage as a replacement.
- */
- if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
- IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
- phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
- if (phys_addr) {
- set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+ if (early) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
continue;
+ } else if (pgd_page_vaddr(*pgdp) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
+ /*
+ * pgdp can't be none since kasan_early_init
+ * initialized all KASAN shadow region with
+ * kasan_early_shadow_pud: if this is still the
+ * case, that means we can try to allocate a
+ * hugepage as a replacement.
+ */
+ phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+ if (phys_addr) {
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
}
}
- kasan_populate_pmd(pgdp, vaddr, next);
+ kasan_populate_pgd_next(pgdp, vaddr, next, early);
} while (pgdp++, vaddr = next, vaddr != end);
}
+asmlinkage void __init kasan_early_init(void)
+{
+ uintptr_t i;
+
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t)kasan_early_shadow_pte)),
+ PAGE_TABLE));
+
+ if (pgtable_l4_enabled) {
+ for (i = 0; i < PTRS_PER_PUD; ++i)
+ set_pud(kasan_early_shadow_pud + i,
+ pfn_pud(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pmd))),
+ PAGE_TABLE));
+ }
+
+ kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
+void __init kasan_swapper_init(void)
+{
+ kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
static void __init kasan_populate(void *start, void *end)
{
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- kasan_populate_pgd(vaddr, vend);
+ kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
local_flush_tlb_all();
memset(start, KASAN_SHADOW_INIT, end - start);
}
+static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool kasan_populate)
+{
+ unsigned long next;
+ pud_t *pudp, *base_pud;
+ pmd_t *base_pmd;
+ bool is_kasan_pmd;
+
+ base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
+ pudp = base_pud + pud_index(vaddr);
+
+ if (kasan_populate)
+ memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
+ sizeof(pud_t) * PTRS_PER_PUD);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+ is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
+
+ if (is_kasan_pmd) {
+ base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+ }
+ } while (pudp++, vaddr = next, vaddr != end);
+}
+
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
void *p;
pgd_t *pgd_k = pgd_offset_k(vaddr);
+ bool is_kasan_pgd_next;
do {
next = pgd_addr_end(vaddr, end);
- if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
+ is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next));
+
+ if (is_kasan_pgd_next) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
+
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
+ continue;
+
+ kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 64f8201237c2..37ed760d007c 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
{
struct cpumask *cmask = mm_cpumask(mm);
- struct cpumask hmask;
unsigned int cpuid;
bool broadcast;
@@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long asid = atomic_long_read(&mm->context.id);
if (broadcast) {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma_asid(cpumask_bits(&hmask),
- start, size, asid);
+ sbi_remote_sfence_vma_asid(cmask, start, size, asid);
} else if (size <= stride) {
local_flush_tlb_page_asid(start, asid);
} else {
@@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
}
} else {
if (broadcast) {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(cpumask_bits(&hmask),
- start, size);
+ sbi_remote_sfence_vma(cmask, start, size);
} else if (size <= stride) {
local_flush_tlb_page(start);
} else {
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 293dd6e171ed..0bcda99d1d68 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -497,7 +497,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
offset = pc - (long)&ex->insn;
if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
return -ERANGE;
- ex->insn = pc;
+ ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a9475cbc8c..be9f39fd06df 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -127,7 +127,6 @@ config S390
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
- select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
@@ -946,6 +945,9 @@ config S390_GUEST
endmenu
+config S390_MODULES_SANITY_TEST_HELPERS
+ def_bool n
+
menu "Selftests"
config S390_UNWIND_SELFTEST
@@ -972,4 +974,16 @@ config S390_KPROBES_SANITY_TEST
Say N if you are unsure.
+config S390_MODULES_SANITY_TEST
+ def_tristate n
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ prompt "Enable s390 specific modules tests"
+ select S390_MODULES_SANITY_TEST_HELPERS
+ help
+ This option enables an s390 specific modules test. This option is
+ not useful for distributions or general kernels, but only for
+ kernel developers working on architecture code.
+
+ Say N if you are unsure.
endmenu
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 354e51dcb3e2..498bed9b261b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
@@ -96,8 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
@@ -110,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
+CONFIG_ANON_VMA_NAME=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -117,7 +117,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_SWITCHDEV=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -186,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -392,6 +390,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -401,6 +400,7 @@ CONFIG_PCI_IOV=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -502,6 +502,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
@@ -512,7 +513,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -543,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -593,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -757,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -775,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
CONFIG_CRC7=m
@@ -808,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y
CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_VMACACHE=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
@@ -820,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
CONFIG_TEST_LOCKUP=m
-CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_IRQFLAGS=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 8dee6c3782f3..61e36b999f67 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
# CONFIG_GCC_PLUGINS is not set
@@ -91,8 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
@@ -102,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
+CONFIG_ANON_VMA_NAME=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -109,7 +109,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_SWITCHDEV=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -178,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -383,6 +381,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -392,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -493,6 +493,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
@@ -503,7 +504,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -534,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -583,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -744,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -763,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index eed3b9acfa71..c55c668dc3c7 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,6 +1,7 @@
# CONFIG_SWAP is not set
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
# CONFIG_TIME_NS is not set
@@ -34,6 +35,7 @@ CONFIG_NET=y
# CONFIG_PCPU_DEV_REFCNT is not set
# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_DCSSBLK is not set
# CONFIG_DASD is not set
@@ -58,6 +60,7 @@ CONFIG_ZFCP=y
# CONFIG_HID is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 33f973ff9744..e8f15dbb89d0 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -20,6 +20,7 @@
static char local_guest[] = " ";
static char all_guests[] = "* ";
+static char *all_groups = all_guests;
static char *guest_query;
struct diag2fc_data {
@@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr)
memcpy(parm_list.userid, query, NAME_LEN);
ASCEBC(parm_list.userid, NAME_LEN);
- parm_list.addr = (unsigned long) addr ;
+ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+ ASCEBC(parm_list.aci_grp, NAME_LEN);
+ parm_list.addr = (unsigned long)addr;
parm_list.size = size;
parm_list.fmt = 0x02;
- memset(parm_list.aci_grp, 0x40, NAME_LEN);
rc = -1;
diag_stat_inc(DIAG_STAT_X2FC);
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 5a530c552c23..1d40630128a5 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -387,7 +387,6 @@ static inline int fls(unsigned int word)
#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 0d90cbeb89b4..e3f12db46cfc 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -109,7 +109,9 @@ struct hws_basic_entry {
unsigned int AS:2; /* 29-30 PSW address-space control */
unsigned int I:1; /* 31 entry valid or invalid */
unsigned int CL:2; /* 32-33 Configuration Level */
- unsigned int:14;
+ unsigned int H:1; /* 34 Host Indicator */
+ unsigned int LS:1; /* 35 Limited Sampling */
+ unsigned int:12;
unsigned int prim_asn:16; /* primary ASN */
unsigned long long ia; /* Instruction Address */
unsigned long long gpp; /* Guest Program Parameter */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index ce550d06abc3..d74e26b48604 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -47,53 +47,87 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
int __put_user_bad(void) __attribute__((noreturn));
int __get_user_bad(void) __attribute__((noreturn));
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
+
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-#define __put_get_user_asm(to, from, size, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- insn " 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- ".pushsection .fixup, \"ax\"\n" \
- "3: lhi %[rc],%[retval]\n" \
- " jg 2b\n" \
- ".popsection\n" \
- EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
- : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [retval] "K" (-EFAULT), [spec] "K" (0x81UL) \
- : "cc", "0"); \
- __rc; \
+#define __put_get_user_asm(to, from, size, oac_spec) \
+({ \
+ int __rc; \
+ \
+ asm volatile( \
+ " lr 0,%[spec]\n" \
+ "0: mvcos %[_to],%[_from],%[_size]\n" \
+ "1: xr %[rc],%[rc]\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %[rc],%[retval]\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
+ : [_size] "d" (size), [_from] "Q" (*(from)), \
+ [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \
+ : "cc", "0"); \
+ __rc; \
})
+#define __put_user_asm(to, from, size) \
+ __put_get_user_asm(to, from, size, ((union oac) { \
+ .oac1.as = PSW_BITS_AS_SECONDARY, \
+ .oac1.a = 1 \
+ }))
+
+#define __get_user_asm(to, from, size) \
+ __put_get_user_asm(to, from, size, ((union oac) { \
+ .oac2.as = PSW_BITS_AS_SECONDARY, \
+ .oac2.a = 1 \
+ })) \
+
static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
int rc;
switch (size) {
case 1:
- rc = __put_get_user_asm((unsigned char __user *)ptr,
- (unsigned char *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned char __user *)ptr,
+ (unsigned char *)x,
+ size);
break;
case 2:
- rc = __put_get_user_asm((unsigned short __user *)ptr,
- (unsigned short *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned short __user *)ptr,
+ (unsigned short *)x,
+ size);
break;
case 4:
- rc = __put_get_user_asm((unsigned int __user *)ptr,
- (unsigned int *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned int __user *)ptr,
+ (unsigned int *)x,
+ size);
break;
case 8:
- rc = __put_get_user_asm((unsigned long __user *)ptr,
- (unsigned long *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned long __user *)ptr,
+ (unsigned long *)x,
+ size);
break;
default:
__put_user_bad();
@@ -108,24 +142,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
switch (size) {
case 1:
- rc = __put_get_user_asm((unsigned char *)x,
- (unsigned char __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned char *)x,
+ (unsigned char __user *)ptr,
+ size);
break;
case 2:
- rc = __put_get_user_asm((unsigned short *)x,
- (unsigned short __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned short *)x,
+ (unsigned short __user *)ptr,
+ size);
break;
case 4:
- rc = __put_get_user_asm((unsigned int *)x,
- (unsigned int __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned int *)x,
+ (unsigned int __user *)ptr,
+ size);
break;
case 8:
- rc = __put_get_user_asm((unsigned long *)x,
- (unsigned long __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned long *)x,
+ (unsigned long __user *)ptr,
+ size);
break;
default:
__get_user_bad();
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index d52d85367bf7..b032e556eeb7 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -33,7 +33,7 @@
#define DEBUGP(fmt , ...)
#endif
-#define PLT_ENTRY_SIZE 20
+#define PLT_ENTRY_SIZE 22
void *module_alloc(unsigned long size)
{
@@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int insn[5];
- unsigned int *ip = me->core_layout.base +
- me->arch.plt_offset +
- info->plt_offset;
-
- insn[0] = 0x0d10e310; /* basr 1,0 */
- insn[1] = 0x100a0004; /* lg 1,10(1) */
+ unsigned char insn[PLT_ENTRY_SIZE];
+ char *plt_base;
+ char *ip;
+
+ plt_base = me->core_layout.base + me->arch.plt_offset;
+ ip = plt_base + info->plt_offset;
+ *(int *)insn = 0x0d10e310; /* basr 1,0 */
+ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
- unsigned int *ij;
- ij = me->core_layout.base +
- me->arch.plt_offset +
- me->arch.plt_size - PLT_ENTRY_SIZE;
- insn[2] = 0xa7f40000 + /* j __jump_r1 */
- (unsigned int)(u16)
- (((unsigned long) ij - 8 -
- (unsigned long) ip) / 2);
+ char *jump_r1;
+
+ jump_r1 = plt_base + me->arch.plt_size -
+ PLT_ENTRY_SIZE;
+ /* brcl 0xf,__jump_r1 */
+ *(short *)&insn[8] = 0xc0f4;
+ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
} else {
- insn[2] = 0x07f10000; /* br %r1 */
+ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
}
- insn[3] = (unsigned int) (val >> 32);
- insn[4] = (unsigned int) val;
+ *(long *)&insn[14] = val;
write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0c9e894913dc..651a51914e34 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode)
/* Validate vector registers */
union ctlreg0 cr0;
- if (!mci.vr) {
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
/*
* Vector registers can't be restored. If the kernel
* currently uses vector registers the system is
@@ -307,11 +314,21 @@ static int notrace s390_validate_registers(union mci mci, int umode)
if (cr2.gse) {
if (!mci.gs) {
/*
- * Guarded storage register can't be restored and
- * the current processes uses guarded storage.
- * It has to be terminated.
+ * 2 cases:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of
+ * guarded storage control can not be recreated, the
+ * process must be terminated.
+ * For SIE the guest values of guarded storage can not
+ * be recreated. This is either due to a bug or due to
+ * GS being disabled in the guest. The guest will be
+ * notified by KVM code and the guests machine check
+ * handling must take care of this. The host values
+ * are saved by KVM and are not affected.
*/
- kill_task = 1;
+ if (!test_cpu_flag(CIF_MCCK_GUEST))
+ kill_task = 1;
} else {
load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
}
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
index 30f0242de4a5..8ee48672233f 100644
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ b/arch/s390/kernel/perf_cpum_cf_common.c
@@ -178,7 +178,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
case CPUMF_CTR_SET_CRYPTO:
if (info->csvn >= 1 && info->csvn <= 5)
ctrset_size = 16;
- else if (info->csvn == 6)
+ else if (info->csvn == 6 || info->csvn == 7)
ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
@@ -188,7 +188,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
ctrset_size = 48;
else if (info->csvn >= 3 && info->csvn <= 5)
ctrset_size = 128;
- else if (info->csvn == 6)
+ else if (info->csvn == 6 || info->csvn == 7)
ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 37265f551a11..52c1fe23b823 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -344,7 +344,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -715,8 +715,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 1 ... 5:
csvn = cpumcf_svn_12345_pmu_event_attr;
break;
- case 6:
- csvn = cpumcf_svn_6_pmu_event_attr;
+ case 6 ... 7:
+ csvn = cpumcf_svn_67_pmu_event_attr;
break;
default:
csvn = none;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index db62def4ef28..332a49965130 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1179,7 +1179,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
sample = (struct hws_basic_entry *) *sdbt;
while ((unsigned long *) sample < (unsigned long *) te) {
/* Check for an empty sample */
- if (!sample->def)
+ if (!sample->def || sample->LS)
break;
/* Update perf event period */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9c6d45d0d345..577f1ead6a51 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1990,7 +1990,7 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
- ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
}
return ms->base_gfn + ofs;
}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 707cd4622c13..69feb8ed3312 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n
obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
+obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
+
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
new file mode 100644
index 000000000000..d056baa8fbb0
--- /dev/null
+++ b/arch/s390/lib/test_modules.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <kunit/test.h>
+#include <linux/module.h>
+
+#include "test_modules.h"
+
+#define DECLARE_RETURN(i) int test_modules_return_ ## i(void)
+REPEAT_10000(DECLARE_RETURN);
+
+/*
+ * Test that modules with many relocations are loaded properly.
+ */
+static void test_modules_many_vmlinux_relocs(struct kunit *test)
+{
+ int result = 0;
+
+#define CALL_RETURN(i) result += test_modules_return_ ## i()
+ REPEAT_10000(CALL_RETURN);
+ KUNIT_ASSERT_EQ(test, result, 49995000);
+}
+
+static struct kunit_case modules_testcases[] = {
+ KUNIT_CASE(test_modules_many_vmlinux_relocs),
+ {}
+};
+
+static struct kunit_suite modules_test_suite = {
+ .name = "modules_test_s390",
+ .test_cases = modules_testcases,
+};
+
+kunit_test_suites(&modules_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h
new file mode 100644
index 000000000000..43b5e4b4af3e
--- /dev/null
+++ b/arch/s390/lib/test_modules.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef TEST_MODULES_H
+#define TEST_MODULES_H
+
+#define __REPEAT_10000_3(f, x) \
+ f(x ## 0); \
+ f(x ## 1); \
+ f(x ## 2); \
+ f(x ## 3); \
+ f(x ## 4); \
+ f(x ## 5); \
+ f(x ## 6); \
+ f(x ## 7); \
+ f(x ## 8); \
+ f(x ## 9)
+#define __REPEAT_10000_2(f, x) \
+ __REPEAT_10000_3(f, x ## 0); \
+ __REPEAT_10000_3(f, x ## 1); \
+ __REPEAT_10000_3(f, x ## 2); \
+ __REPEAT_10000_3(f, x ## 3); \
+ __REPEAT_10000_3(f, x ## 4); \
+ __REPEAT_10000_3(f, x ## 5); \
+ __REPEAT_10000_3(f, x ## 6); \
+ __REPEAT_10000_3(f, x ## 7); \
+ __REPEAT_10000_3(f, x ## 8); \
+ __REPEAT_10000_3(f, x ## 9)
+#define __REPEAT_10000_1(f, x) \
+ __REPEAT_10000_2(f, x ## 0); \
+ __REPEAT_10000_2(f, x ## 1); \
+ __REPEAT_10000_2(f, x ## 2); \
+ __REPEAT_10000_2(f, x ## 3); \
+ __REPEAT_10000_2(f, x ## 4); \
+ __REPEAT_10000_2(f, x ## 5); \
+ __REPEAT_10000_2(f, x ## 6); \
+ __REPEAT_10000_2(f, x ## 7); \
+ __REPEAT_10000_2(f, x ## 8); \
+ __REPEAT_10000_2(f, x ## 9)
+#define REPEAT_10000(f) \
+ __REPEAT_10000_1(f, 0); \
+ __REPEAT_10000_1(f, 1); \
+ __REPEAT_10000_1(f, 2); \
+ __REPEAT_10000_1(f, 3); \
+ __REPEAT_10000_1(f, 4); \
+ __REPEAT_10000_1(f, 5); \
+ __REPEAT_10000_1(f, 6); \
+ __REPEAT_10000_1(f, 7); \
+ __REPEAT_10000_1(f, 8); \
+ __REPEAT_10000_1(f, 9)
+
+#endif
diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c
new file mode 100644
index 000000000000..1670349a03eb
--- /dev/null
+++ b/arch/s390/lib/test_modules_helpers.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/export.h>
+
+#include "test_modules.h"
+
+#define DEFINE_RETURN(i) \
+ int test_modules_return_ ## i(void) \
+ { \
+ return 1 ## i - 10000; \
+ } \
+ EXPORT_SYMBOL_GPL(test_modules_return_ ## i)
+REPEAT_10000(DEFINE_RETURN);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index a596e69d3c47..8a5d21461889 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -62,10 +62,14 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " lghi 0,%[spec]\n"
+ " lr 0,%[spec]\n"
"0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
"6: jz 4f\n"
"1: algr %0,%3\n"
@@ -84,7 +88,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : [spec] "K" (0x81UL)
+ : [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
@@ -135,10 +139,14 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " llilh 0,%[spec]\n"
+ " lr 0,%[spec]\n"
"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
"6: jz 4f\n"
"1: algr %0,%3\n"
@@ -157,7 +165,7 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : [spec] "K" (0x81UL)
+ : [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
@@ -207,10 +215,14 @@ EXPORT_SYMBOL(raw_copy_to_user);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " llilh 0,%[spec]\n"
+ " lr 0,%[spec]\n"
"0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
" jz 4f\n"
"1: algr %0,%2\n"
@@ -228,7 +240,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
- : "a" (empty_zero_page), [spec] "K" (0x81UL)
+ : "a" (empty_zero_page), [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index 3b6c7b5b7ec9..10ceb0d6b5a9 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -68,6 +68,5 @@ static inline unsigned long __ffs(unsigned long word)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/le.h>
-#include <asm-generic/bitops/find.h>
#endif /* __ASM_SH_BITOPS_H */
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
index fb517b82a87b..3a76a766f423 100644
--- a/arch/sh/mm/alignment.c
+++ b/arch/sh/mm/alignment.c
@@ -140,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file)
static ssize_t alignment_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
- int *data = PDE_DATA(file_inode(file));
+ int *data = pde_data(file_inode(file));
char mode;
if (count > 0) {
@@ -161,7 +161,7 @@ static const struct proc_ops alignment_proc_ops = {
};
/*
- * This needs to be done after sysctl_init, otherwise sys/ will be
+ * This needs to be done after sysctl_init_bases(), otherwise sys/ will be
* overwritten. Actually, this shouldn't be in sys/ at all since
* it isn't a sysctl, and it doesn't contain sysctl information.
* We now locate it in /proc/cpu/alignment instead.
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 66fc08646be5..1cab1b284f1a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -97,6 +97,9 @@ config SPARC64
select PCI_DOMAINS if PCI
select ARCH_HAS_GIGANTIC_PAGE
select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
config ARCH_PROC_KCORE_TEXT
def_bool y
@@ -123,15 +126,6 @@ config AUDIT_ARCH
bool
default y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y if SPARC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y if SPARC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y if SPARC64
-
config MMU
bool
default y
diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h
index 0ceff3b915a8..889afa9f990f 100644
--- a/arch/sparc/include/asm/bitops_32.h
+++ b/arch/sparc/include/asm/bitops_32.h
@@ -100,7 +100,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index ca7ea5913494..005a8ae858f1 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -52,8 +52,6 @@ unsigned int __arch_hweight8(unsigned int w);
#include <asm-generic/bitops/lock.h>
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
#include <asm-generic/bitops/le.h>
diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c
index 3a66e62eb2a0..ab657b359789 100644
--- a/arch/sparc/kernel/led.c
+++ b/arch/sparc/kernel/led.c
@@ -114,18 +114,16 @@ static const struct proc_ops led_proc_ops = {
};
#endif
-static struct proc_dir_entry *led;
-
#define LED_VERSION "0.1"
static int __init led_init(void)
{
timer_setup(&led_blink_timer, led_blink, 0);
- led = proc_create("led", 0, NULL, &led_proc_ops);
- if (!led)
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("led", 0, NULL, &led_proc_ops))
return -ENOMEM;
-
+#endif
printk(KERN_INFO
"led: version %s, Lars Kotthoff <metalhead@metalhead.ws>\n",
LED_VERSION);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b98a7bbe6728..a1f78e9ddaf3 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1526,50 +1526,6 @@ void smp_send_stop(void)
smp_call_function(stop_this_cpu, NULL, 0);
}
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (cpu_to_node(from) == cpu_to_node(to))
@@ -1578,57 +1534,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-static void __init pcpu_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pgd_populate(&init_mm, pgd, new);
- }
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ return cpu_to_node(cpu);
}
void __init setup_per_cpu_areas(void)
@@ -1641,8 +1549,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, 4 << 20,
pcpu_cpu_distance,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -1650,9 +1557,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem,
- pcpu_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 407533c835fe..9f5bd41bf660 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -78,7 +78,7 @@ config X86
select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION
+ select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
@@ -137,7 +137,6 @@ config X86
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_ENTRY
- select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC
@@ -187,6 +186,7 @@ config X86
select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
select HAVE_OBJTOOL_MCOUNT if STACK_VALIDATION
+ select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
@@ -240,6 +240,7 @@ config X86
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_SETUP_PER_CPU_AREA
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
@@ -253,6 +254,8 @@ config X86
select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
@@ -333,15 +336,6 @@ config ARCH_HAS_CPU_RELAX
config ARCH_HAS_FILTER_PGPROT
def_bool y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y
-
config ARCH_HIBERNATION_POSSIBLE
def_bool y
@@ -1575,6 +1569,7 @@ config NUMA
depends on SMP
depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
default y if X86_BIGSMP
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -2450,10 +2445,6 @@ config ARCH_HAS_ADD_PAGES
config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
def_bool y
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fd9f908debe5..c91434056c29 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}
+
+ /*
+ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
+ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
+ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
+ * mistakenly add extra counters for P-cores. Correct the number of
+ * counters here.
+ */
+ if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
+
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
@@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void)
}
if (x86_pmu.lbr_nr) {
+ intel_pmu_lbr_init();
+
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/* only support branch_stack snapshot for perfmon >= v2 */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8043213b75a5..669c2be14784 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -8,14 +8,6 @@
#include "../perf_event.h"
-static const enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
@@ -305,11 +297,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
- int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+ return !tsx_support && x86_pmu.lbr_has_tsx;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
- int i;
- unsigned lbr_idx, mask;
+ bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
+ unsigned lbr_idx, mask;
+ int i;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (need_info)
wrlbr_info(lbr_idx, 0);
}
@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
+ bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- info = rdlbr_info(lbr_idx, NULL);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
-
- if (lbr_format == LBR_FORMAT_TIME) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- cycles = ((to >> 48) & LBR_INFO_CYCLES);
-
- to = (u64)((((s64)to) << 16) >> 16);
- }
-
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
+ if (x86_pmu.lbr_has_info) {
+ if (need_info) {
+ u64 info;
+
+ info = rdlbr_info(lbr_idx, NULL);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ cycles = (info & LBR_INFO_CYCLES);
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ }
+ }
+ } else {
+ int skip = 0;
+
+ if (x86_pmu.lbr_from_flags) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ if (x86_pmu.lbr_to_cycles) {
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
}
- from = (u64)((((s64)from) << skip) >> skip);
/*
* Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos;
}
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
+
static __always_inline int get_lbr_br_type(u64 info)
{
- if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
- return 0;
+ int type = 0;
- return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+ if (static_branch_likely(&x86_lbr_type))
+ type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+
+ return type;
}
static __always_inline bool get_lbr_mispred(u64 info)
{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ bool mispred = 0;
- return !!(info & LBR_INFO_MISPRED);
-}
+ if (static_branch_likely(&x86_lbr_mispred))
+ mispred = !!(info & LBR_INFO_MISPRED);
-static __always_inline bool get_lbr_predicted(u64 info)
-{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
-
- return !(info & LBR_INFO_MISPRED);
+ return mispred;
}
static __always_inline u16 get_lbr_cycles(u64 info)
{
+ u16 cycles = info & LBR_INFO_CYCLES;
+
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
- return 0;
+ (!static_branch_likely(&x86_lbr_cycles) ||
+ !(info & LBR_INFO_CYC_CNT_VALID)))
+ cycles = 0;
- return info & LBR_INFO_CYCLES;
+ return cycles;
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
+ e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;
return 0;
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+void intel_pmu_lbr_init(void)
+{
+ switch (x86_pmu.intel_cap.lbr_format) {
+ case LBR_FORMAT_EIP_FLAGS2:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_EIP_FLAGS:
+ x86_pmu.lbr_from_flags = 1;
+ break;
+
+ case LBR_FORMAT_INFO:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_INFO2:
+ x86_pmu.lbr_has_info = 1;
+ break;
+
+ case LBR_FORMAT_TIME:
+ x86_pmu.lbr_from_flags = 1;
+ x86_pmu.lbr_to_cycles = 1;
+ break;
+ }
+
+ if (x86_pmu.lbr_has_info) {
+ /*
+ * Only used in combination with baseline pebs.
+ */
+ static_branch_enable(&x86_lbr_mispred);
+ static_branch_enable(&x86_lbr_cycles);
+ }
+}
+
/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
+ if (xfeature_size(XFEATURE_LBR) == 0)
+ return false;
+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;
@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
+ if (x86_pmu.lbr_mispred)
+ static_branch_enable(&x86_lbr_mispred);
+ if (x86_pmu.lbr_timed_lbr)
+ static_branch_enable(&x86_lbr_cycles);
+ if (x86_pmu.lbr_br_type)
+ static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index f1ba6ab2e97e..e497da9bf427 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init,
- .mmio_init = tgl_uncore_mmio_init,
+ .mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index b9687980aab6..2adeaf4de4df 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
-void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
+void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
+void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 3049c646fa20..6ddadb482f68 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 6d735611c281..cfaf558bdb6b 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 0f63706cdadf..f698a55bde81 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -64,6 +65,20 @@
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
+#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601
+#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602
+#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609
+#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a
+#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621
+#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623
+#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629
+#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637
+#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b
+#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648
+#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649
+#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
+#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
+#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -155,6 +170,7 @@
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
@@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ }
};
@@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
-static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+static void __uncore_imc_init_box(struct intel_uncore_box *box,
+ unsigned int base_offset)
{
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu;
@@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
addr |= ((resource_size_t)mch_bar << 32);
#endif
+ addr += base_offset;
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
}
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, 0);
+}
+
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
.init_box = tgl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box,
@@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void)
}
/* end of Tiger Lake MMIO uncore support */
+
+/* Alder Lake MMIO uncore support */
+#define ADL_UNCORE_IMC_BASE 0xd900
+#define ADL_UNCORE_IMC_MAP_SIZE 0x200
+#define ADL_UNCORE_IMC_CTR 0xe8
+#define ADL_UNCORE_IMC_CTRL 0xd0
+#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0
+#define ADL_UNCORE_IMC_BOX_CTL 0xc4
+#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800
+#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100
+
+#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0)
+#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1)
+#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2)
+#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \
+ ADL_UNCORE_IMC_CTL_RST_CTRS)
+
+static void adl_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE);
+
+ /* The global control in MC1 can control both MCs. */
+ if (box->io_addr && (box->pmu->pmu_idx == 1))
+ writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL);
+}
+
+static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(0, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static struct intel_uncore_ops adl_uncore_mmio_ops = {
+ .init_box = adl_uncore_imc_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = adl_uncore_mmio_disable_box,
+ .enable_box = adl_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = intel_generic_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
+#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ ADL_UNC_CTL_CHMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET)
+
+static struct attribute *adl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_chmask.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static const struct attribute_group adl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = adl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type adl_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 64,
+ .perf_ctr = ADL_UNCORE_IMC_CTR,
+ .event_ctl = ADL_UNCORE_IMC_CTRL,
+ .event_mask = ADL_UNC_IMC_EVENT_MASK,
+ .box_ctl = ADL_UNCORE_IMC_BOX_CTL,
+ .mmio_offset = 0,
+ .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE,
+ .ops = &adl_uncore_mmio_ops,
+ .format_group = &adl_uncore_imc_format_group,
+};
+
+enum perf_adl_uncore_imc_freerunning_types {
+ ADL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ ADL_MMIO_UNCORE_IMC_DATA_READ,
+ ADL_MMIO_UNCORE_IMC_DATA_WRITE,
+ ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters adl_uncore_imc_freerunning[] = {
+ [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 },
+};
+
+static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE);
+}
+
+static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = {
+ .init_box = adl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type adl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE,
+ .freerunning = adl_uncore_imc_freerunning,
+ .ops = &adl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *adl_mmio_uncores[] = {
+ &adl_uncore_imc,
+ &adl_uncore_imc_free_running,
+ NULL
+};
+
+void adl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = adl_mmio_uncores;
+}
+
+/* end of Alder Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3660f698fb2a..ed869443efb2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = {
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
+ .event_descs = snr_uncore_imc_events,
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
.event_ctl = SNR_IMC_MMIO_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9d376e528dfc..150261d929b9 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -215,7 +215,8 @@ enum {
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_TIME = 0x06,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
+ LBR_FORMAT_INFO2 = 0x07,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
};
enum {
@@ -840,6 +841,11 @@ struct x86_pmu {
bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
+ unsigned int lbr_has_info:1;
+ unsigned int lbr_has_tsx:1;
+ unsigned int lbr_from_flags:1;
+ unsigned int lbr_to_cycles:1;
+
/*
* Intel Architectural LBR CPUID Enumeration
*/
@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_lbr_init(void);
+
void intel_pmu_arch_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void);
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 85feafacc445..77e3a47af5ad 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
* - perf_msr_probe(PERF_RAPL_MAX)
* - want to use same event codes across both architectures
*/
-static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
- [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+static struct perf_msr amd_rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
+ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
+ [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
+ [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
+ [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
};
-
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 0367efdc5b7a..a288ecd230ab 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -380,8 +380,6 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/fls64.h>
#endif
-#include <asm-generic/bitops/find.h>
-
#include <asm-generic/bitops/sched.h>
#include <asm/arch_hweight.h>
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index f658bb4dbb74..631d5040b31e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -55,6 +55,7 @@ KVM_X86_OP_NULL(tlb_remote_flush)
KVM_X86_OP_NULL(tlb_remote_flush_with_range)
KVM_X86_OP(tlb_flush_gva)
KVM_X86_OP(tlb_flush_guest)
+KVM_X86_OP(vcpu_pre_run)
KVM_X86_OP(run)
KVM_X86_OP_NULL(handle_exit)
KVM_X86_OP_NULL(skip_emulated_instruction)
@@ -98,8 +99,6 @@ KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_NULL(request_immediate_exit)
KVM_X86_OP(sched_in)
KVM_X86_OP_NULL(update_cpu_dirty_logging)
-KVM_X86_OP_NULL(pre_block)
-KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking)
KVM_X86_OP_NULL(update_pi_irte)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0677b9ea01c9..6e7c545bc7ee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1381,6 +1381,7 @@ struct kvm_x86_ops {
*/
void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+ int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
@@ -1454,18 +1455,6 @@ struct kvm_x86_ops {
const struct kvm_pmu_ops *pmu_ops;
const struct kvm_x86_nested_ops *nested_ops;
- /*
- * Architecture specific hooks for vCPU blocking due to
- * HLT instruction.
- * Returns for .pre_block():
- * - 0 means continue to block the vCPU.
- * - 1 means we cannot block the vCPU since some event
- * happens during this period, such as, 'ON' bit in
- * posted-interrupts descriptor is set.
- */
- int (*pre_block)(struct kvm_vcpu *vcpu);
- void (*post_block)(struct kvm_vcpu *vcpu);
-
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
@@ -1494,7 +1483,8 @@ struct kvm_x86_ops {
int (*get_msr_feature)(struct kvm_msr_entry *entry);
- bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
@@ -1507,6 +1497,7 @@ struct kvm_x86_ops {
};
struct kvm_x86_nested_ops {
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
int (*check_events)(struct kvm_vcpu *vcpu);
bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
void (*triple_fault)(struct kvm_vcpu *vcpu);
@@ -1872,7 +1863,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2da3316bb559..bf6e96011dfe 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -452,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index c132daabe615..3e6f6b448f6a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -760,9 +760,9 @@ void __init lapic_update_legacy_vectors(void)
void __init lapic_assign_system_vectors(void)
{
- unsigned int i, vector = 0;
+ unsigned int i, vector;
- for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
+ for_each_set_bit(vector, system_vectors, NR_VECTORS)
irq_matrix_assign_system(vector_matrix, vector, false);
if (nr_legacy_irqs() > 1)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index a1e2f41796dc..9f4b508886dd 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr)
u32 hi, lo;
/* sysfs write might race against an offline operation */
- if (this_cpu_read(threshold_banks))
+ if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
return;
rdmsr(tr->b->address, lo, hi);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index bb9a46a804bf..baafbb37be67 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index fd2d3ab38ebb..dc7da08bc700 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = {
.stolen_size = gen9_stolen_size,
};
+/* Intel integrated GPUs for which we need to reserve "stolen memory" */
static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_I830_IDS(&i830_early_ops),
INTEL_I845G_IDS(&i845_early_ops),
@@ -592,6 +593,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func)
u16 device;
int i;
+ /*
+ * Reserve "stolen memory" for an integrated GPU. If we've already
+ * found one, there's nothing to do for other (discrete) GPUs.
+ */
+ if (resource_size(&intel_graphics_stolen_res))
+ return;
+
device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
@@ -704,7 +712,7 @@ static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
- QFLAG_APPLY_ONCE, intel_graphics_quirks },
+ 0, intel_graphics_quirks },
/*
* HPET on the current version of the Baytrail platform has accuracy
* problems: it will halt in deep idle state - so we disable it.
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 882213df3713..71f336425e58 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1435,8 +1435,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_rtc_timer_reinit();
memset(&curr_time, 0, sizeof(struct rtc_time));
- if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- mc146818_get_time(&curr_time);
+ if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
+ if (unlikely(mc146818_get_time(&curr_time) < 0)) {
+ pr_err_ratelimited("unable to read current time from RTC\n");
+ return IRQ_HANDLED;
+ }
+ }
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7b65275544b2..49325caa7307 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -84,60 +84,6 @@ static bool __init pcpu_need_numa(void)
}
#endif
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
- unsigned long align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE,
- node);
-
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
- cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-/*
- * Helpers for first chunk memory allocation
- */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
- return pcpu_alloc_bootmem(cpu, size, align);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NUMA
@@ -150,7 +96,12 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
#endif
}
-static void __init pcpup_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
@@ -205,15 +156,14 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
pr_warn("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- pcpu_fc_alloc, pcpu_fc_free,
- pcpup_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c55e57b30e81..28be02adc669 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -119,6 +119,29 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures);
}
+/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
+static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
+ int nent)
+{
+ struct kvm_cpuid_entry2 *orig;
+ int i;
+
+ if (nent != vcpu->arch.cpuid_nent)
+ return -EINVAL;
+
+ for (i = 0; i < nent; i++) {
+ orig = &vcpu->arch.cpuid_entries[i];
+ if (e2[i].function != orig->function ||
+ e2[i].index != orig->index ||
+ e2[i].flags != orig->flags ||
+ e2[i].eax != orig->eax || e2[i].ebx != orig->ebx ||
+ e2[i].ecx != orig->ecx || e2[i].edx != orig->edx)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
{
u32 function;
@@ -145,14 +168,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
}
}
-static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entries, int nent)
{
u32 base = vcpu->arch.kvm_cpuid_base;
if (!base)
return NULL;
- return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+ return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+ return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -167,11 +197,28 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
vcpu->arch.pv_cpuid.features = best->eax;
}
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+/*
+ * Calculate guest's supported XCR0 taking into account guest CPUID data and
+ * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
+ */
+static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
{
struct kvm_cpuid_entry2 *best;
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ if (!best)
+ return 0;
+
+ return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+}
+
+static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
+ int nent)
+{
+ struct kvm_cpuid_entry2 *best;
+ u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
+
+ best = cpuid_entry2_find(entries, nent, 1, 0);
if (best) {
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
@@ -182,32 +229,52 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
}
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ best = cpuid_entry2_find(entries, nent, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
- best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+ best = cpuid_entry2_find(entries, nent, 0xD, 0);
if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
- best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+ best = cpuid_entry2_find(entries, nent, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = kvm_find_kvm_cpuid_features(vcpu);
+ best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
- best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ best = cpuid_entry2_find(entries, nent, 0x1, 0);
if (best)
cpuid_entry_change(best, X86_FEATURE_MWAIT,
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
+
+ /*
+ * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
+ * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
+ * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
+ * at the time of EENTER, thus adjust the allowed XFRM by the guest's
+ * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
+ * '1' even on CPUs that don't support XSAVE.
+ */
+ best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
+ if (best) {
+ best->ecx &= guest_supported_xcr0 & 0xffffffff;
+ best->edx &= guest_supported_xcr0 >> 32;
+ best->ecx |= XFEATURE_MASK_FPSSE;
+ }
+}
+
+void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+{
+ __kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
}
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
@@ -226,27 +293,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
- if (!best)
- vcpu->arch.guest_supported_xcr0 = 0;
- else
- vcpu->arch.guest_supported_xcr0 =
- (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
-
- /*
- * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
- * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
- * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
- * at the time of EENTER, thus adjust the allowed XFRM by the guest's
- * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
- * '1' even on CPUs that don't support XSAVE.
- */
- best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1);
- if (best) {
- best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff;
- best->edx &= vcpu->arch.guest_supported_xcr0 >> 32;
- best->ecx |= XFEATURE_MASK_FPSSE;
- }
+ vcpu->arch.guest_supported_xcr0 =
+ cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
kvm_update_pv_runtime(vcpu);
@@ -298,6 +346,28 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
{
int r;
+ __kvm_update_cpuid_runtime(vcpu, e2, nent);
+
+ /*
+ * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+ * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+ * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
+ * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
+ * the core vCPU model on the fly. It would've been better to forbid any
+ * KVM_SET_CPUID{,2} calls after KVM_RUN altogether but unfortunately
+ * some VMMs (e.g. QEMU) reuse vCPU fds for CPU hotplug/unplug and do
+ * KVM_SET_CPUID{,2} again. To support this legacy behavior, check
+ * whether the supplied CPUID data is equal to what's already set.
+ */
+ if (vcpu->arch.last_vmentry_cpu != -1) {
+ r = kvm_cpuid_check_equal(vcpu, e2, nent);
+ if (r)
+ return r;
+
+ kvfree(e2);
+ return 0;
+ }
+
r = kvm_check_cpuid(vcpu, e2, nent);
if (r)
return r;
@@ -307,7 +377,6 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
vcpu->arch.cpuid_nent = nent;
kvm_update_kvm_cpuid_base(vcpu);
- kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
return 0;
@@ -795,10 +864,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
perf_get_x86_pmu_capability(&cap);
/*
- * Only support guest architectural pmu on a host
- * with architectural pmu.
+ * The guest architecture pmu is only supported if the architecture
+ * pmu exists on the host and the module parameters allow it.
*/
- if (!cap.version)
+ if (!cap.version || !enable_pmu)
memset(&cap, 0, sizeof(cap));
eax.split.version_id = min(cap.version, 2);
@@ -837,13 +906,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case 0xd: {
- u64 guest_perm = xstate_get_guest_group_perm();
+ u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm();
+ u64 permitted_xss = supported_xss;
- entry->eax &= supported_xcr0 & guest_perm;
- entry->ebx = xstate_required_size(supported_xcr0, false);
+ entry->eax &= permitted_xcr0;
+ entry->ebx = xstate_required_size(permitted_xcr0, false);
entry->ecx = entry->ebx;
- entry->edx &= (supported_xcr0 & guest_perm) >> 32;
- if (!supported_xcr0)
+ entry->edx &= permitted_xcr0 >> 32;
+ if (!permitted_xcr0)
break;
entry = do_host_cpuid(array, function, 1);
@@ -852,20 +922,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
cpuid_entry_override(entry, CPUID_D_1_EAX);
if (entry->eax & (F(XSAVES)|F(XSAVEC)))
- entry->ebx = xstate_required_size(supported_xcr0 | supported_xss,
+ entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss,
true);
else {
- WARN_ON_ONCE(supported_xss != 0);
+ WARN_ON_ONCE(permitted_xss != 0);
entry->ebx = 0;
}
- entry->ecx &= supported_xss;
- entry->edx &= supported_xss >> 32;
+ entry->ecx &= permitted_xss;
+ entry->edx &= permitted_xss >> 32;
for (i = 2; i < 64; ++i) {
bool s_state;
- if (supported_xcr0 & BIT_ULL(i))
+ if (permitted_xcr0 & BIT_ULL(i))
s_state = false;
- else if (supported_xss & BIT_ULL(i))
+ else if (permitted_xss & BIT_ULL(i))
s_state = true;
else
continue;
@@ -879,13 +949,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* invalid sub-leafs. Only valid sub-leafs should
* reach this point, and they should have a non-zero
* save state size. Furthermore, check whether the
- * processor agrees with supported_xcr0/supported_xss
+ * processor agrees with permitted_xcr0/permitted_xss
* on whether this is an XCR0- or IA32_XSS-managed area.
*/
if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) {
--array->nent;
continue;
}
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
+ entry->ecx &= ~BIT_ULL(2);
entry->edx = 0;
}
break;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c5028e6b0f96..4662469240bc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1950,7 +1950,6 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
restart_apic_timer(vcpu->arch.apic);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
{
@@ -1962,7 +1961,6 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
start_sw_timer(apic);
preempt_enable();
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
{
@@ -2631,7 +2629,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1d275e9d76b5..593093b52395 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5756,6 +5756,7 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
continue;
flush = slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
+
PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
start, end - 1, true, flush);
}
@@ -5825,15 +5826,27 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
}
/*
- * We can flush all the TLBs out of the mmu lock without TLB
- * corruption since we just change the spte from writable to
- * readonly so that we only need to care the case of changing
- * spte from present to present (changing the spte from present
- * to nonpresent will flush all the TLBs immediately), in other
- * words, the only case we care is mmu_spte_update() where we
- * have checked Host-writable | MMU-writable instead of
- * PT_WRITABLE_MASK, that means it does not depend on PT_WRITABLE_MASK
- * anymore.
+ * Flush TLBs if any SPTEs had to be write-protected to ensure that
+ * guest writes are reflected in the dirty bitmap before the memslot
+ * update completes, i.e. before enabling dirty logging is visible to
+ * userspace.
+ *
+ * Perform the TLB flush outside the mmu_lock to reduce the amount of
+ * time the lock is held. However, this does mean that another CPU can
+ * now grab mmu_lock and encounter a write-protected SPTE while CPUs
+ * still have a writable mapping for the associated GFN in their TLB.
+ *
+ * This is safe but requires KVM to be careful when making decisions
+ * based on the write-protection status of an SPTE. Specifically, KVM
+ * also write-protects SPTEs to monitor changes to guest page tables
+ * during shadow paging, and must guarantee no CPUs can write to those
+ * page before the lock is dropped. As mentioned in the previous
+ * paragraph, a write-protected SPTE is no guarantee that CPU cannot
+ * perform writes. So to determine if a TLB flush is truly required, KVM
+ * will clear a separate software-only bit (MMU-writable) and skip the
+ * flush if-and-only-if this bit was already clear.
+ *
+ * See DEFAULT_SPTE_MMU_WRITEABLE for more details.
*/
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 351b04ad62a1..73cfe62fdad1 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -216,6 +216,7 @@ u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~shadow_host_writable_mask;
+ new_spte &= ~shadow_mmu_writable_mask;
new_spte = mark_spte_for_access_track(new_spte);
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index a4af2a42695c..be6a007a4af3 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -60,10 +60,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
-/* Bits 9 and 10 are ignored by all non-EPT PTEs. */
-#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9)
-#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10)
-
/*
* The mask/shift to use for saving the original R/X bits when marking the PTE
* as not-present for access tracking purposes. We do not save the W bit as the
@@ -79,6 +75,35 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
static_assert(!(SPTE_TDP_AD_MASK & SHADOW_ACC_TRACK_SAVED_MASK));
/*
+ * *_SPTE_HOST_WRITEABLE (aka Host-writable) indicates whether the host permits
+ * writes to the guest page mapped by the SPTE. This bit is cleared on SPTEs
+ * that map guest pages in read-only memslots and read-only VMAs.
+ *
+ * Invariants:
+ * - If Host-writable is clear, PT_WRITABLE_MASK must be clear.
+ *
+ *
+ * *_SPTE_MMU_WRITEABLE (aka MMU-writable) indicates whether the shadow MMU
+ * allows writes to the guest page mapped by the SPTE. This bit is cleared when
+ * the guest page mapped by the SPTE contains a page table that is being
+ * monitored for shadow paging. In this case the SPTE can only be made writable
+ * by unsyncing the shadow page under the mmu_lock.
+ *
+ * Invariants:
+ * - If MMU-writable is clear, PT_WRITABLE_MASK must be clear.
+ * - If MMU-writable is set, Host-writable must be set.
+ *
+ * If MMU-writable is set, PT_WRITABLE_MASK is normally set but can be cleared
+ * to track writes for dirty logging. For such SPTEs, KVM will locklessly set
+ * PT_WRITABLE_MASK upon the next write from the guest and record the write in
+ * the dirty log (see fast_page_fault()).
+ */
+
+/* Bits 9 and 10 are ignored by all non-EPT PTEs. */
+#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9)
+#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10)
+
+/*
* Low ignored bits are at a premium for EPT, use high ignored bits, taking care
* to not overlap the A/D type mask or the saved access bits of access-tracked
* SPTEs when A/D bits are disabled.
@@ -316,8 +341,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
static inline bool spte_can_locklessly_be_made_writable(u64 spte)
{
- return (spte & shadow_host_writable_mask) &&
- (spte & shadow_mmu_writable_mask);
+ if (spte & shadow_mmu_writable_mask) {
+ WARN_ON_ONCE(!(spte & shadow_host_writable_mask));
+ return true;
+ }
+
+ WARN_ON_ONCE(spte & PT_WRITABLE_MASK);
+ return false;
}
static inline u64 get_mmio_spte_generation(u64 spte)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7b1bc816b7c3..bc9e3553fba2 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1442,12 +1442,12 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- if (!is_writable_pte(iter.old_spte))
- break;
-
new_spte = iter.old_spte &
~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
+ if (new_spte == iter.old_spte)
+ break;
+
tdp_mmu_set_spte(kvm, &iter, new_spte);
spte_set = true;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 261b39cbef6e..f614f95acc6b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -13,6 +13,8 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
#include <asm/perf_event.h>
#include "x86.h"
#include "cpuid.h"
@@ -109,6 +111,9 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.config = config,
};
+ if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
+ return;
+
attr.sample_period = get_sample_period(pmc, pmc->counter);
if (in_tx)
@@ -169,12 +174,16 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
+static int cmp_u64(const void *a, const void *b)
+{
+ return *(__u64 *)a - *(__u64 *)b;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
unsigned config, type = PERF_TYPE_RAW;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
- int i;
bool allow_event = true;
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
@@ -189,16 +198,13 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
if (filter) {
- for (i = 0; i < filter->nevents; i++)
- if (filter->events[i] ==
- (eventsel & AMD64_RAW_EVENT_MASK_NB))
- break;
- if (filter->action == KVM_PMU_EVENT_ALLOW &&
- i == filter->nevents)
- allow_event = false;
- if (filter->action == KVM_PMU_EVENT_DENY &&
- i < filter->nevents)
- allow_event = false;
+ __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
+
+ if (bsearch(&key, filter->events, filter->nevents,
+ sizeof(__u64), cmp_u64))
+ allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
+ else
+ allow_event = filter->action == KVM_PMU_EVENT_DENY;
}
if (!allow_event)
return;
@@ -573,6 +579,11 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
/* Ensure nevents can't be changed between the user copies. */
*filter = tmp;
+ /*
+ * Sort the in-kernel list so that we can search it with bsearch.
+ */
+ sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
+
mutex_lock(&kvm->lock);
filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
mutex_is_locked(&kvm->lock));
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0e5b49294086..90364d02f22a 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -295,13 +295,16 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
struct kvm_vcpu *vcpu;
unsigned long i;
+ /*
+ * Wake any target vCPUs that are blocking, i.e. waiting for a wake
+ * event. There's no need to signal doorbells, as hardware has handled
+ * vCPUs that were in guest at the time of the IPI, and vCPUs that have
+ * since entered the guest will have processed pending IRQs at VMRUN.
+ */
kvm_for_each_vcpu(i, vcpu, kvm) {
- bool m = kvm_apic_match_dest(vcpu, source,
- icrl & APIC_SHORT_MASK,
- GET_APIC_DEST_FIELD(icrh),
- icrl & APIC_DEST_MASK);
-
- if (m && !avic_vcpu_is_running(vcpu))
+ if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & APIC_DEST_MASK))
kvm_vcpu_wake_up(vcpu);
}
}
@@ -672,9 +675,22 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
return -1;
kvm_lapic_set_irr(vec, vcpu->arch.apic);
+
+ /*
+ * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
+ * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
+ * the read of guest_mode, which guarantees that either VMRUN will see
+ * and process the new vIRR entry, or that the below code will signal
+ * the doorbell if the vCPU is already running in the guest.
+ */
smp_mb__after_atomic();
- if (avic_vcpu_is_running(vcpu)) {
+ /*
+ * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
+ * is in the guest. If the vCPU is not in the guest, hardware will
+ * automatically process AVIC interrupts at VMRUN.
+ */
+ if (vcpu->mode == IN_GUEST_MODE) {
int cpu = READ_ONCE(vcpu->cpu);
/*
@@ -688,8 +704,13 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
if (cpu != get_cpu())
wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
put_cpu();
- } else
+ } else {
+ /*
+ * Wake the vCPU if it was blocking. KVM will then detect the
+ * pending IRQ when checking if the vCPU has a wake event.
+ */
kvm_vcpu_wake_up(vcpu);
+ }
return 0;
}
@@ -957,6 +978,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_preemption_disabled();
+
/*
* Since the host physical APIC id is 8 bits,
* we can support host APIC ID upto 255.
@@ -964,19 +987,25 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
+ /*
+ * No need to update anything if the vCPU is blocking, i.e. if the vCPU
+ * is being scheduled in after being preempted. The CPU entries in the
+ * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
+ * If the vCPU was migrated, its new CPU value will be stuffed when the
+ * vCPU unblocks.
+ */
+ if (kvm_vcpu_is_blocking(vcpu))
+ return;
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
-
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
- if (svm->avic_is_running)
- entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
- avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
- svm->avic_is_running);
+ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -984,42 +1013,56 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_preemption_disabled();
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
- if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
- avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
+ /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
+ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
+ return;
+
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/*
- * This function is called during VCPU halt/unhalt.
- */
-static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- int cpu = get_cpu();
-
- WARN_ON(cpu != vcpu->cpu);
- svm->avic_is_running = is_run;
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
- if (kvm_vcpu_apicv_active(vcpu)) {
- if (is_run)
- avic_vcpu_load(vcpu, cpu);
- else
- avic_vcpu_put(vcpu);
- }
- put_cpu();
+ preempt_disable();
+
+ /*
+ * Unload the AVIC when the vCPU is about to block, _before_
+ * the vCPU actually blocks.
+ *
+ * Any IRQs that arrive before IsRunning=0 will not cause an
+ * incomplete IPI vmexit on the source, therefore vIRR will also
+ * be checked by kvm_vcpu_check_block() before blocking. The
+ * memory barrier implicit in set_current_state orders writing
+ * IsRunning=0 before reading the vIRR. The processor needs a
+ * matching memory barrier on interrupt delivery between writing
+ * IRR and reading IsRunning; the lack of this barrier might be
+ * the cause of errata #1235).
+ */
+ avic_vcpu_put(vcpu);
+
+ preempt_enable();
}
-void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
+void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- avic_set_running(vcpu, false);
-}
+ int cpu;
-void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
- if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
- kvm_vcpu_update_apicv(vcpu);
- avic_set_running(vcpu, true);
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ cpu = get_cpu();
+ WARN_ON(cpu != vcpu->cpu);
+
+ avic_vcpu_load(vcpu, cpu);
+
+ put_cpu();
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cf206855ebf0..1218b5a342fc 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm)
/*
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
*/
-void svm_leave_nested(struct vcpu_svm *svm)
+void svm_leave_nested(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
if (is_guest_mode(vcpu)) {
svm->nested.nested_run_pending = 0;
@@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
return 0;
}
@@ -1478,7 +1478,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
*/
if (is_guest_mode(vcpu))
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
else
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
@@ -1532,6 +1532,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
}
struct kvm_x86_nested_ops svm_nested_ops = {
+ .leave_nested = svm_leave_nested,
.check_events = svm_check_nested_events,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 12d8b301065a..5aa45f13b16d 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
- if (!pmu)
+ if (!enable_pmu)
return NULL;
switch (msr) {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6a22798eaaee..17b53457d866 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void)
if (!sev_enabled || !npt_enabled)
goto out;
- /* Does the CPU support SEV? */
- if (!boot_cpu_has(X86_FEATURE_SEV))
+ /*
+ * SEV must obviously be supported in hardware. Sanity check that the
+ * CPU supports decode assists, which is mandatory for SEV guests to
+ * support instruction emulation.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SEV) ||
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
goto out;
/* Retrieve SEV CPUID information */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 46bcc706f257..6d97629655e3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -192,10 +192,6 @@ module_param(vgif, int, 0444);
static int lbrv = true;
module_param(lbrv, int, 0444);
-/* enable/disable PMU virtualization */
-bool pmu = true;
-module_param(pmu, bool, 0444);
-
static int tsc_scaling = true;
module_param(tsc_scaling, int, 0444);
@@ -294,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
if (!(efer & EFER_SVME)) {
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
svm_set_gif(svm, true);
/* #GP intercept is still needed for vmware backdoor */
if (!enable_vmware_backdoor)
@@ -316,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return ret;
}
- if (svm_gp_erratum_intercept)
+ /*
+ * Never intercept #GP for SEV guests, KVM can't
+ * decrypt guest memory to workaround the erratum.
+ */
+ if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
}
}
@@ -873,47 +873,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
}
-/*
- * The default MMIO mask is a single bit (excluding the present bit),
- * which could conflict with the memory encryption bit. Check for
- * memory encryption support and override the default MMIO mask if
- * memory encryption is enabled.
- */
-static __init void svm_adjust_mmio_mask(void)
-{
- unsigned int enc_bit, mask_bit;
- u64 msr, mask;
-
- /* If there is no memory encryption support, use existing mask */
- if (cpuid_eax(0x80000000) < 0x8000001f)
- return;
-
- /* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_AMD64_SYSCFG, msr);
- if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
- return;
-
- enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
- mask_bit = boot_cpu_data.x86_phys_bits;
-
- /* Increment the mask bit if it is the same as the encryption bit */
- if (enc_bit == mask_bit)
- mask_bit++;
-
- /*
- * If the mask bit location is below 52, then some bits above the
- * physical addressing limit will always be reserved, so use the
- * rsvd_bits() function to generate the mask. This mask, along with
- * the present bit, will be used to generate a page fault with
- * PFER.RSV = 1.
- *
- * If the mask bit location is 52 (or above), then clear the mask.
- */
- mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
-
- kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
-}
-
static void svm_hardware_teardown(void)
{
int cpu;
@@ -928,198 +887,6 @@ static void svm_hardware_teardown(void)
iopm_base = 0;
}
-static __init void svm_set_cpu_caps(void)
-{
- kvm_set_cpu_caps();
-
- supported_xss = 0;
-
- /* CPUID 0x80000001 and 0x8000000A (SVM features) */
- if (nested) {
- kvm_cpu_cap_set(X86_FEATURE_SVM);
-
- if (nrips)
- kvm_cpu_cap_set(X86_FEATURE_NRIPS);
-
- if (npt_enabled)
- kvm_cpu_cap_set(X86_FEATURE_NPT);
-
- if (tsc_scaling)
- kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
-
- /* Nested VM can receive #VMEXIT instead of triggering #GP */
- kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
- }
-
- /* CPUID 0x80000008 */
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
- boot_cpu_has(X86_FEATURE_AMD_SSBD))
- kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
-
- /* AMD PMU PERFCTR_CORE CPUID */
- if (pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
- kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
-
- /* CPUID 0x8000001F (SME/SEV features) */
- sev_set_cpu_caps();
-}
-
-static __init int svm_hardware_setup(void)
-{
- int cpu;
- struct page *iopm_pages;
- void *iopm_va;
- int r;
- unsigned int order = get_order(IOPM_SIZE);
-
- /*
- * NX is required for shadow paging and for NPT if the NX huge pages
- * mitigation is enabled.
- */
- if (!boot_cpu_has(X86_FEATURE_NX)) {
- pr_err_ratelimited("NX (Execute Disable) not supported\n");
- return -EOPNOTSUPP;
- }
- kvm_enable_efer_bits(EFER_NX);
-
- iopm_pages = alloc_pages(GFP_KERNEL, order);
-
- if (!iopm_pages)
- return -ENOMEM;
-
- iopm_va = page_address(iopm_pages);
- memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
- iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
-
- init_msrpm_offsets();
-
- supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
- if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
- kvm_enable_efer_bits(EFER_FFXSR);
-
- if (tsc_scaling) {
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- tsc_scaling = false;
- } else {
- pr_info("TSC scaling supported\n");
- kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 32;
- }
- }
-
- tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
-
- /* Check for pause filtering support */
- if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
- pause_filter_count = 0;
- pause_filter_thresh = 0;
- } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
- pause_filter_thresh = 0;
- }
-
- if (nested) {
- printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
- kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
- }
-
- /*
- * KVM's MMU doesn't support using 2-level paging for itself, and thus
- * NPT isn't supported if the host is using 2-level paging since host
- * CR4 is unchanged on VMRUN.
- */
- if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
- npt_enabled = false;
-
- if (!boot_cpu_has(X86_FEATURE_NPT))
- npt_enabled = false;
-
- /* Force VM NPT level equal to the host's paging level */
- kvm_configure_mmu(npt_enabled, get_npt_level(),
- get_npt_level(), PG_LEVEL_1G);
- pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
-
- /* Note, SEV setup consumes npt_enabled. */
- sev_hardware_setup();
-
- svm_hv_hardware_setup();
-
- svm_adjust_mmio_mask();
-
- for_each_possible_cpu(cpu) {
- r = svm_cpu_init(cpu);
- if (r)
- goto err;
- }
-
- if (nrips) {
- if (!boot_cpu_has(X86_FEATURE_NRIPS))
- nrips = false;
- }
-
- enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
-
- if (enable_apicv) {
- pr_info("AVIC enabled\n");
-
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- }
-
- if (vls) {
- if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
- !IS_ENABLED(CONFIG_X86_64)) {
- vls = false;
- } else {
- pr_info("Virtual VMLOAD VMSAVE supported\n");
- }
- }
-
- if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
- svm_gp_erratum_intercept = false;
-
- if (vgif) {
- if (!boot_cpu_has(X86_FEATURE_VGIF))
- vgif = false;
- else
- pr_info("Virtual GIF supported\n");
- }
-
- if (lbrv) {
- if (!boot_cpu_has(X86_FEATURE_LBRV))
- lbrv = false;
- else
- pr_info("LBR virtualization supported\n");
- }
-
- if (!pmu)
- pr_info("PMU virtualization is disabled\n");
-
- svm_set_cpu_caps();
-
- /*
- * It seems that on AMD processors PTE's accessed bit is
- * being set by the CPU hardware before the NPF vmexit.
- * This is not expected behaviour and our tests fail because
- * of it.
- * A workaround here is to disable support for
- * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
- * In this case userspace can know if there is support using
- * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
- * it
- * If future AMD CPU models change the behaviour described above,
- * this variable can be changed accordingly
- */
- allow_smaller_maxphyaddr = !npt_enabled;
-
- return 0;
-
-err:
- svm_hardware_teardown();
- return r;
-}
-
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@@ -1247,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
- * as VMware does.
+ * as VMware does. Don't intercept #GP for SEV guests as KVM can't
+ * decrypt guest memory to decode the faulting instruction.
*/
- if (enable_vmware_backdoor)
+ if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1444,12 +1212,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (err)
goto error_free_vmsa_page;
- /* We initialize this flag to true to make sure that the is_running
- * bit would be set the first time the vcpu is loaded.
- */
- if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
- svm->avic_is_running = true;
-
svm->msrpm = svm_vcpu_alloc_msrpm();
if (!svm->msrpm) {
err = -ENOMEM;
@@ -2334,10 +2096,6 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (error_code)
goto reinject;
- /* All SVM instructions expect page aligned RAX */
- if (svm->vmcb->save.rax & ~PAGE_MASK)
- goto reinject;
-
/* Decode the instruction for usage later */
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject;
@@ -2355,8 +2113,13 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu))
return kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
- } else
+ } else {
+ /* All SVM instructions expect page aligned RAX */
+ if (svm->vmcb->save.rax & ~PAGE_MASK)
+ goto reinject;
+
return emulate_svm_instr(vcpu, opcode);
+ }
reinject:
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
@@ -3833,6 +3596,11 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
svm_complete_interrupts(vcpu);
}
+static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
@@ -4490,79 +4258,140 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
}
}
-static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
bool smep, smap, is_user;
unsigned long cr4;
+ u64 error_code;
+
+ /* Emulation is always possible when KVM has access to all guest state. */
+ if (!sev_guest(vcpu->kvm))
+ return true;
+
+ /* #UD and #GP should never be intercepted for SEV guests. */
+ WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD |
+ EMULTYPE_TRAP_UD_FORCED |
+ EMULTYPE_VMWARE_GP));
/*
- * When the guest is an SEV-ES guest, emulation is not possible.
+ * Emulation is impossible for SEV-ES guests as KVM doesn't have access
+ * to guest register state.
*/
if (sev_es_guest(vcpu->kvm))
return false;
/*
+ * Emulation is possible if the instruction is already decoded, e.g.
+ * when completing I/O after returning from userspace.
+ */
+ if (emul_type & EMULTYPE_NO_DECODE)
+ return true;
+
+ /*
+ * Emulation is possible for SEV guests if and only if a prefilled
+ * buffer containing the bytes of the intercepted instruction is
+ * available. SEV guest memory is encrypted with a guest specific key
+ * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
+ * decode garbage.
+ *
+ * Inject #UD if KVM reached this point without an instruction buffer.
+ * In practice, this path should never be hit by a well-behaved guest,
+ * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
+ * is still theoretically reachable, e.g. via unaccelerated fault-like
+ * AVIC access, and needs to be handled by KVM to avoid putting the
+ * guest into an infinite loop. Injecting #UD is somewhat arbitrary,
+ * but its the least awful option given lack of insight into the guest.
+ */
+ if (unlikely(!insn)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return false;
+ }
+
+ /*
+ * Emulate for SEV guests if the insn buffer is not empty. The buffer
+ * will be empty if the DecodeAssist microcode cannot fetch bytes for
+ * the faulting instruction because the code fetch itself faulted, e.g.
+ * the guest attempted to fetch from emulated MMIO or a guest page
+ * table used to translate CS:RIP resides in emulated MMIO.
+ */
+ if (likely(insn_len))
+ return true;
+
+ /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
*
* Errata:
- * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
- * possible that CPU microcode implementing DecodeAssist will fail
- * to read bytes of instruction which caused #NPF. In this case,
- * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
- * return 0 instead of the correct guest instruction bytes.
+ * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is
+ * possible that CPU microcode implementing DecodeAssist will fail to
+ * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly
+ * be '0'. This happens because microcode reads CS:RIP using a _data_
+ * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode
+ * gives up and does not fill the instruction bytes buffer.
*
- * This happens because CPU microcode reading instruction bytes
- * uses a special opcode which attempts to read data using CPL=0
- * privileges. The microcode reads CS:RIP and if it hits a SMAP
- * fault, it gives up and returns no instruction bytes.
+ * As above, KVM reaches this point iff the VM is an SEV guest, the CPU
+ * supports DecodeAssist, a #NPF was raised, KVM's page fault handler
+ * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the
+ * GuestIntrBytes field of the VMCB.
*
- * Detection:
- * We reach here in case CPU supports DecodeAssist, raised #NPF and
- * returned 0 in GuestIntrBytes field of the VMCB.
- * First, errata can only be triggered in case vCPU CR4.SMAP=1.
- * Second, if vCPU CR4.SMEP=1, errata could only be triggered
- * in case vCPU CPL==3 (Because otherwise guest would have triggered
- * a SMEP fault instead of #NPF).
- * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
- * As most guests enable SMAP if they have also enabled SMEP, use above
- * logic in order to attempt minimize false-positive of detecting errata
- * while still preserving all cases semantic correctness.
+ * This does _not_ mean that the erratum has been encountered, as the
+ * DecodeAssist will also fail if the load for CS:RIP hits a legitimate
+ * #PF, e.g. if the guest attempt to execute from emulated MMIO and
+ * encountered a reserved/not-present #PF.
*
- * Workaround:
- * To determine what instruction the guest was executing, the hypervisor
- * will have to decode the instruction at the instruction pointer.
+ * To hit the erratum, the following conditions must be true:
+ * 1. CR4.SMAP=1 (obviously).
+ * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot
+ * have been hit as the guest would have encountered a SMEP
+ * violation #PF, not a #NPF.
+ * 3. The #NPF is not due to a code fetch, in which case failure to
+ * retrieve the instruction bytes is legitimate (see abvoe).
*
- * In non SEV guest, hypervisor will be able to read the guest
- * memory to decode the instruction pointer when insn_len is zero
- * so we return true to indicate that decoding is possible.
- *
- * But in the SEV guest, the guest memory is encrypted with the
- * guest specific key and hypervisor will not be able to decode the
- * instruction pointer so we will not able to workaround it. Lets
- * print the error and request to kill the guest.
+ * In addition, don't apply the erratum workaround if the #NPF occurred
+ * while translating guest page tables (see below).
*/
- if (likely(!insn || insn_len))
- return true;
-
- /*
- * If RIP is invalid, go ahead with emulation which will cause an
- * internal error exit.
- */
- if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
- return true;
+ error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
+ if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
+ goto resume_guest;
cr4 = kvm_read_cr4(vcpu);
smep = cr4 & X86_CR4_SMEP;
smap = cr4 & X86_CR4_SMAP;
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
- if (!sev_guest(vcpu->kvm))
- return true;
-
pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /*
+ * If the fault occurred in userspace, arbitrarily inject #GP
+ * to avoid killing the guest and to hopefully avoid confusing
+ * the guest kernel too much, e.g. injecting #PF would not be
+ * coherent with respect to the guest's page tables. Request
+ * triple fault if the fault occurred in the kernel as there's
+ * no fault that KVM can inject without confusing the guest.
+ * In practice, the triple fault is moot as no sane SEV kernel
+ * will execute from user memory while also running with SMAP=1.
+ */
+ if (is_user)
+ kvm_inject_gp(vcpu, 0);
+ else
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
+resume_guest:
+ /*
+ * If the erratum was not hit, simply resume the guest and let it fault
+ * again. While awful, e.g. the vCPU may get stuck in an infinite loop
+ * if the fault is at CPL=0, it's the lesser of all evils. Exiting to
+ * userspace will kill the guest, and letting the emulator read garbage
+ * will yield random behavior and potentially corrupt the guest.
+ *
+ * Simply resuming the guest is technically not a violation of the SEV
+ * architecture. AMD's APM states that all code fetches and page table
+ * accesses for SEV guest are encrypted, regardless of the C-Bit. The
+ * APM also states that encrypted accesses to MMIO are "ignored", but
+ * doesn't explicitly define "ignored", i.e. doing nothing and letting
+ * the guest spin is technically "ignoring" the access.
+ */
return false;
}
@@ -4629,8 +4458,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
- .vcpu_blocking = svm_vcpu_blocking,
- .vcpu_unblocking = svm_vcpu_unblocking,
+ .vcpu_blocking = avic_vcpu_blocking,
+ .vcpu_unblocking = avic_vcpu_unblocking,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
@@ -4662,6 +4491,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.tlb_flush_gva = svm_flush_tlb_gva,
.tlb_flush_guest = svm_flush_tlb,
+ .vcpu_pre_run = svm_vcpu_pre_run,
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
@@ -4742,6 +4572,243 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};
+/*
+ * The default MMIO mask is a single bit (excluding the present bit),
+ * which could conflict with the memory encryption bit. Check for
+ * memory encryption support and override the default MMIO mask if
+ * memory encryption is enabled.
+ */
+static __init void svm_adjust_mmio_mask(void)
+{
+ unsigned int enc_bit, mask_bit;
+ u64 msr, mask;
+
+ /* If there is no memory encryption support, use existing mask */
+ if (cpuid_eax(0x80000000) < 0x8000001f)
+ return;
+
+ /* If memory encryption is not enabled, use existing mask */
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
+ return;
+
+ enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
+ mask_bit = boot_cpu_data.x86_phys_bits;
+
+ /* Increment the mask bit if it is the same as the encryption bit */
+ if (enc_bit == mask_bit)
+ mask_bit++;
+
+ /*
+ * If the mask bit location is below 52, then some bits above the
+ * physical addressing limit will always be reserved, so use the
+ * rsvd_bits() function to generate the mask. This mask, along with
+ * the present bit, will be used to generate a page fault with
+ * PFER.RSV = 1.
+ *
+ * If the mask bit location is 52 (or above), then clear the mask.
+ */
+ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+}
+
+static __init void svm_set_cpu_caps(void)
+{
+ kvm_set_cpu_caps();
+
+ supported_xss = 0;
+
+ /* CPUID 0x80000001 and 0x8000000A (SVM features) */
+ if (nested) {
+ kvm_cpu_cap_set(X86_FEATURE_SVM);
+
+ if (nrips)
+ kvm_cpu_cap_set(X86_FEATURE_NRIPS);
+
+ if (npt_enabled)
+ kvm_cpu_cap_set(X86_FEATURE_NPT);
+
+ if (tsc_scaling)
+ kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
+
+ /* Nested VM can receive #VMEXIT instead of triggering #GP */
+ kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
+ }
+
+ /* CPUID 0x80000008 */
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+ /* AMD PMU PERFCTR_CORE CPUID */
+ if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
+
+ /* CPUID 0x8000001F (SME/SEV features) */
+ sev_set_cpu_caps();
+}
+
+static __init int svm_hardware_setup(void)
+{
+ int cpu;
+ struct page *iopm_pages;
+ void *iopm_va;
+ int r;
+ unsigned int order = get_order(IOPM_SIZE);
+
+ /*
+ * NX is required for shadow paging and for NPT if the NX huge pages
+ * mitigation is enabled.
+ */
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
+ pr_err_ratelimited("NX (Execute Disable) not supported\n");
+ return -EOPNOTSUPP;
+ }
+ kvm_enable_efer_bits(EFER_NX);
+
+ iopm_pages = alloc_pages(GFP_KERNEL, order);
+
+ if (!iopm_pages)
+ return -ENOMEM;
+
+ iopm_va = page_address(iopm_pages);
+ memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
+ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+
+ init_msrpm_offsets();
+
+ supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ if (tsc_scaling) {
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ tsc_scaling = false;
+ } else {
+ pr_info("TSC scaling supported\n");
+ kvm_has_tsc_control = true;
+ kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 32;
+ }
+ }
+
+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
+
+ /* Check for pause filtering support */
+ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+ pause_filter_count = 0;
+ pause_filter_thresh = 0;
+ } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+ pause_filter_thresh = 0;
+ }
+
+ if (nested) {
+ printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+ kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
+ }
+
+ /*
+ * KVM's MMU doesn't support using 2-level paging for itself, and thus
+ * NPT isn't supported if the host is using 2-level paging since host
+ * CR4 is unchanged on VMRUN.
+ */
+ if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
+ npt_enabled = false;
+
+ if (!boot_cpu_has(X86_FEATURE_NPT))
+ npt_enabled = false;
+
+ /* Force VM NPT level equal to the host's paging level */
+ kvm_configure_mmu(npt_enabled, get_npt_level(),
+ get_npt_level(), PG_LEVEL_1G);
+ pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+
+ /* Note, SEV setup consumes npt_enabled. */
+ sev_hardware_setup();
+
+ svm_hv_hardware_setup();
+
+ svm_adjust_mmio_mask();
+
+ for_each_possible_cpu(cpu) {
+ r = svm_cpu_init(cpu);
+ if (r)
+ goto err;
+ }
+
+ if (nrips) {
+ if (!boot_cpu_has(X86_FEATURE_NRIPS))
+ nrips = false;
+ }
+
+ enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
+
+ if (enable_apicv) {
+ pr_info("AVIC enabled\n");
+
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ } else {
+ svm_x86_ops.vcpu_blocking = NULL;
+ svm_x86_ops.vcpu_unblocking = NULL;
+ }
+
+ if (vls) {
+ if (!npt_enabled ||
+ !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
+ !IS_ENABLED(CONFIG_X86_64)) {
+ vls = false;
+ } else {
+ pr_info("Virtual VMLOAD VMSAVE supported\n");
+ }
+ }
+
+ if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
+ svm_gp_erratum_intercept = false;
+
+ if (vgif) {
+ if (!boot_cpu_has(X86_FEATURE_VGIF))
+ vgif = false;
+ else
+ pr_info("Virtual GIF supported\n");
+ }
+
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
+
+ if (!enable_pmu)
+ pr_info("PMU virtualization is disabled\n");
+
+ svm_set_cpu_caps();
+
+ /*
+ * It seems that on AMD processors PTE's accessed bit is
+ * being set by the CPU hardware before the NPF vmexit.
+ * This is not expected behaviour and our tests fail because
+ * of it.
+ * A workaround here is to disable support for
+ * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
+ * In this case userspace can know if there is support using
+ * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
+ * it
+ * If future AMD CPU models change the behaviour described above,
+ * this variable can be changed accordingly
+ */
+ allow_smaller_maxphyaddr = !npt_enabled;
+
+ return 0;
+
+err:
+ svm_hardware_teardown();
+ return r;
+}
+
+
static struct kvm_x86_init_ops svm_init_ops __initdata = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9f153c59f2c8..73525353e424 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -32,7 +32,6 @@
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern bool intercept_smi;
-extern bool pmu;
/*
* Clean bits in VMCB.
@@ -226,7 +225,6 @@ struct vcpu_svm {
u32 dfr_reg;
struct page *avic_backing_page;
u64 *avic_physical_id_cache;
- bool avic_is_running;
/*
* Per-vcpu list of struct amd_svm_iommu_ir:
@@ -306,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
& ~VMCB_ALWAYS_DIRTY_MASK;
}
-static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
-{
- return (vmcb->control.clean & (1 << bit));
-}
-
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
@@ -527,7 +520,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
-void svm_leave_nested(struct vcpu_svm *svm);
+void svm_leave_nested(struct kvm_vcpu *vcpu);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
@@ -574,17 +567,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u64 *entry = svm->avic_physical_id_cache;
-
- if (!entry)
- return false;
-
- return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
-}
-
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -605,8 +587,8 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
-void svm_vcpu_blocking(struct kvm_vcpu *vcpu);
-void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
+void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index c53b8bf8d013..489ca56212c6 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
if (npt_enabled &&
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)
+ hve->hv_enlightenments_control.msr_bitmap = 1;
}
static inline void svm_hv_hardware_setup(void)
@@ -83,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
struct hv_enlightenments *hve =
(struct hv_enlightenments *)vmcb->control.reserved_sw;
- /*
- * vmcb can be NULL if called during early vcpu init.
- * And its okay not to mark vmcb dirty during vcpu init
- * as we mark it dirty unconditionally towards end of vcpu
- * init phase.
- */
- if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
- hve->hv_enlightenments_control.msr_bitmap)
+ if (hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index c8029b7845b6..3f430e218375 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -5,6 +5,7 @@
#include <asm/vmx.h>
#include "lapic.h"
+#include "x86.h"
extern bool __read_mostly enable_vpid;
extern bool __read_mostly flexpriority_enabled;
@@ -53,7 +54,6 @@ struct nested_vmx_msrs {
struct vmcs_config {
int size;
- int order;
u32 basic_cap;
u32 revision_id;
u32 pin_based_exec_ctrl;
@@ -389,6 +389,9 @@ static inline u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = 0;
+ if (!enable_pmu)
+ return perf_cap;
+
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index ba6f99f584ac..87e3dc10edf4 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -12,8 +12,6 @@
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
-#if IS_ENABLED(CONFIG_HYPERV)
-
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
{EVMCS1_OFFSET(name), clean_field}
@@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
};
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
+#if IS_ENABLED(CONFIG_HYPERV)
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
@@ -362,6 +361,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
case MSR_IA32_VMX_PROCBASED_CTLS2:
ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
break;
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
break;
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 16731d2cf231..8d70f9aea94b 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -59,12 +59,12 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
SECONDARY_EXEC_SHADOW_VMCS | \
SECONDARY_EXEC_TSC_SCALING | \
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
-#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
+ (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
-#if IS_ENABLED(CONFIG_HYPERV)
-
struct evmcs_field {
u16 offset;
u16 clean_field;
@@ -73,26 +73,56 @@ struct evmcs_field {
extern const struct evmcs_field vmcs_field_to_evmcs_1[];
extern const unsigned int nr_evmcs_1_fields;
-static __always_inline int get_evmcs_offset(unsigned long field,
- u16 *clean_field)
+static __always_inline int evmcs_field_offset(unsigned long field,
+ u16 *clean_field)
{
unsigned int index = ROL16(field, 6);
const struct evmcs_field *evmcs_field;
- if (unlikely(index >= nr_evmcs_1_fields)) {
- WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
- field);
+ if (unlikely(index >= nr_evmcs_1_fields))
return -ENOENT;
- }
evmcs_field = &vmcs_field_to_evmcs_1[index];
+ /*
+ * Use offset=0 to detect holes in eVMCS. This offset belongs to
+ * 'revision_id' but this field has no encoding and is supposed to
+ * be accessed directly.
+ */
+ if (unlikely(!evmcs_field->offset))
+ return -ENOENT;
+
if (clean_field)
*clean_field = evmcs_field->clean_field;
return evmcs_field->offset;
}
+static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
+ unsigned long field, u16 offset)
+{
+ /*
+ * vmcs12_read_any() doesn't care whether the supplied structure
+ * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes
+ * the exact offset of the required field, use it for convenience
+ * here.
+ */
+ return vmcs12_read_any((void *)evmcs, field, offset);
+}
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+ u16 *clean_field)
+{
+ int offset = evmcs_field_offset(field, clean_field);
+
+ WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n",
+ field);
+
+ return offset;
+}
+
static __always_inline void evmcs_write64(unsigned long field, u64 value)
{
u16 clean_field;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f235f77cbc03..ba34e94049c7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7,6 +7,7 @@
#include <asm/mmu_context.h>
#include "cpuid.h"
+#include "evmcs.h"
#include "hyperv.h"
#include "mmu.h"
#include "nested.h"
@@ -4851,18 +4852,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
/*
- * We should allocate a shadow vmcs for vmcs01 only when L1
- * executes VMXON and free it when L1 executes VMXOFF.
- * As it is invalid to execute VMXON twice, we shouldn't reach
- * here when vmcs01 already have an allocated shadow vmcs.
+ * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it
+ * when L1 executes VMXOFF or the vCPU is forced out of nested
+ * operation. VMXON faults if the CPU is already post-VMXON, so it
+ * should be impossible to already have an allocated shadow VMCS. KVM
+ * doesn't support virtualization of VMCS shadowing, so vmcs01 should
+ * always be the loaded VMCS.
*/
- WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
+ if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs))
+ return loaded_vmcs->shadow_vmcs;
+
+ loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
+ if (loaded_vmcs->shadow_vmcs)
+ vmcs_clear(loaded_vmcs->shadow_vmcs);
- if (!loaded_vmcs->shadow_vmcs) {
- loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
- if (loaded_vmcs->shadow_vmcs)
- vmcs_clear(loaded_vmcs->shadow_vmcs);
- }
return loaded_vmcs->shadow_vmcs;
}
@@ -5099,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- /*
- * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
- * any VMREAD sets the ALU flags for VMfailInvalid.
- */
- if (vmx->nested.current_vmptr == INVALID_GPA ||
- (is_guest_mode(vcpu) &&
- get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
- return nested_vmx_failInvalid(vcpu);
-
/* Decode instruction info and find the field to read */
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- offset = vmcs_field_to_offset(field);
- if (offset < 0)
- return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+ if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ /*
+ * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
+ * any VMREAD sets the ALU flags for VMfailInvalid.
+ */
+ if (vmx->nested.current_vmptr == INVALID_GPA ||
+ (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
+ return nested_vmx_failInvalid(vcpu);
- if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
- copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ offset = get_vmcs12_field_offset(field);
+ if (offset < 0)
+ return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
+ if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- /* Read the field, zero-extended to a u64 value */
- value = vmcs12_read_any(vmcs12, field, offset);
+ /* Read the field, zero-extended to a u64 value */
+ value = vmcs12_read_any(vmcs12, field, offset);
+ } else {
+ /*
+ * Hyper-V TLFS (as of 6.0b) explicitly states, that while an
+ * enlightened VMCS is active VMREAD/VMWRITE instructions are
+ * unsupported. Unfortunately, certain versions of Windows 11
+ * don't comply with this requirement which is not enforced in
+ * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a
+ * workaround, as misbehaving guests will panic on VM-Fail.
+ * Note, enlightened VMCS is incompatible with shadow VMCS so
+ * all VMREADs from L2 should go to L1.
+ */
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+ return nested_vmx_failInvalid(vcpu);
+
+ offset = evmcs_field_offset(field, NULL);
+ if (offset < 0)
+ return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
+ /* Read the field, zero-extended to a u64 value */
+ value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset);
+ }
/*
* Now copy part of this value to register or memory, as requested.
@@ -5214,7 +5239,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- offset = vmcs_field_to_offset(field);
+ offset = get_vmcs12_field_offset(field);
if (offset < 0)
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
@@ -6462,7 +6487,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)
max_idx = 0;
for (i = 0; i < nr_vmcs12_fields; i++) {
/* The vmcs12 table is very, very sparsely populated. */
- if (!vmcs_field_to_offset_table[i])
+ if (!vmcs12_field_offsets[i])
continue;
idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
@@ -6771,6 +6796,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
}
struct kvm_x86_nested_ops vmx_nested_ops = {
+ .leave_nested = vmx_leave_nested,
.check_events = vmx_check_nested_events,
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.triple_fault = nested_vmx_triple_fault,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5e0ac57d6d1b..466d18fc0c5d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -21,7 +21,6 @@
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
- /* Index must match CPUID 0x0A.EBX bit vector */
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
@@ -29,6 +28,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ /* The above index must match CPUID 0x0A.EBX bit vector */
[7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
@@ -75,11 +75,17 @@ static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
- for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
- if (intel_arch_events[i].eventsel == event_select &&
- intel_arch_events[i].unit_mask == unit_mask &&
- (pmc_is_fixed(pmc) || pmu->available_event_types & (1 << i)))
- break;
+ for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+ if (intel_arch_events[i].eventsel != event_select ||
+ intel_arch_events[i].unit_mask != unit_mask)
+ continue;
+
+ /* disable event that reported as not present by cpuid */
+ if ((i < 7) && !(pmu->available_event_types & (1 << i)))
+ return PERF_COUNT_HW_MAX + 1;
+
+ break;
+ }
if (i == ARRAY_SIZE(intel_arch_events))
return PERF_COUNT_HW_MAX;
@@ -481,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry)
+ if (!entry || !enable_pmu)
return;
eax.full = entry->eax;
edx.full = entry->edx;
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 88c53c521094..aa1fe9085d77 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -19,7 +19,7 @@
* wake the target vCPUs. vCPUs are removed from the list and the notification
* vector is reset when the vCPU is scheduled in.
*/
-static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
/*
* Protect the per-CPU list with a per-CPU spinlock to handle task migration.
* When a blocking vCPU is awakened _and_ migrated to a different pCPU, the
@@ -27,7 +27,7 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
* CPU. IRQs must be disabled when taking this lock, otherwise deadlock will
* occur if a wakeup IRQ arrives and attempts to acquire the lock.
*/
-static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
@@ -51,7 +51,9 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 old, u64 new)
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct pi_desc old, new;
+ unsigned long flags;
unsigned int dest;
/*
@@ -62,23 +64,34 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
if (!enable_apicv || !lapic_in_kernel(vcpu))
return;
- /* Nothing to do if PI.SN and PI.NDST both have the desired value. */
- if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+ /*
+ * If the vCPU wasn't on the wakeup list and wasn't migrated, then the
+ * full update can be skipped as neither the vector nor the destination
+ * needs to be changed.
+ */
+ if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR && vcpu->cpu == cpu) {
+ /*
+ * Clear SN if it was set due to being preempted. Again, do
+ * this even if there is no assigned device for simplicity.
+ */
+ if (pi_test_and_clear_sn(pi_desc))
+ goto after_clear_sn;
return;
+ }
+
+ local_irq_save(flags);
/*
- * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
- * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
- * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
- * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
- * correctly.
+ * If the vCPU was waiting for wakeup, remove the vCPU from the wakeup
+ * list of the _previous_ pCPU, which will not be the same as the
+ * current pCPU if the task was migrated.
*/
- if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
- pi_clear_sn(pi_desc);
- goto after_clear_sn;
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ list_del(&vmx->pi_wakeup_list);
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
}
- /* The full case. Set the new destination and clear SN. */
dest = cpu_physical_id(cpu);
if (!x2apic_mode)
dest = (dest << 8) & 0xFF00;
@@ -86,10 +99,22 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
do {
old.control = new.control = READ_ONCE(pi_desc->control);
+ /*
+ * Clear SN (as above) and refresh the destination APIC ID to
+ * handle task migration (@cpu != vcpu->cpu).
+ */
new.ndst = dest;
new.sn = 0;
+
+ /*
+ * Restore the notification vector; in the blocking case, the
+ * descriptor was modified on "put" to use the wakeup vector.
+ */
+ new.nv = POSTED_INTR_VECTOR;
} while (pi_try_set_control(pi_desc, old.control, new.control));
+ local_irq_restore(flags);
+
after_clear_sn:
/*
@@ -111,83 +136,25 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
irq_remapping_cap(IRQ_POSTING_CAP);
}
-void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (!vmx_can_use_vtd_pi(vcpu->kvm))
- return;
-
- /* Set SN when the vCPU is preempted */
- if (vcpu->preempted)
- pi_set_sn(pi_desc);
-}
-
-static void __pi_post_block(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
-
- /*
- * Remove the vCPU from the wakeup list of the _previous_ pCPU, which
- * will not be the same as the current pCPU if the task was migrated.
- */
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-
- dest = cpu_physical_id(vcpu->cpu);
- if (!x2apic_mode)
- dest = (dest << 8) & 0xFF00;
-
- WARN(pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR,
- "Wakeup handler not enabled while the vCPU was blocking");
-
- do {
- old.control = new.control = READ_ONCE(pi_desc->control);
-
- new.ndst = dest;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (pi_try_set_control(pi_desc, old.control, new.control));
-
- vcpu->pre_pcpu = -1;
-}
-
/*
- * This routine does the following things for vCPU which is going
- * to be blocked if VT-d PI is enabled.
- * - Store the vCPU to the wakeup list, so when interrupts happen
- * we can find the right vCPU to wake up.
- * - Change the Posted-interrupt descriptor as below:
- * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
- * - If 'ON' is set during this process, which means at least one
- * interrupt is posted for this vCPU, we cannot block it, in
- * this case, return 1, otherwise, return 0.
- *
+ * Put the vCPU on this pCPU's list of vCPUs that needs to be awakened and set
+ * WAKEUP as the notification vector in the PI descriptor.
*/
-int pi_pre_block(struct kvm_vcpu *vcpu)
+static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
{
- struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct pi_desc old, new;
unsigned long flags;
- if (!vmx_can_use_vtd_pi(vcpu->kvm) ||
- vmx_interrupt_blocked(vcpu))
- return 0;
-
local_irq_save(flags);
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu));
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu, vcpu->cpu));
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu));
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ list_add_tail(&vmx->pi_wakeup_list,
+ &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
- WARN(pi_desc->sn == 1,
- "Posted Interrupt Suppress Notification set before blocking");
+ WARN(pi_desc->sn, "PI descriptor SN field set before blocking");
do {
old.control = new.control = READ_ONCE(pi_desc->control);
@@ -196,24 +163,37 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
new.nv = POSTED_INTR_WAKEUP_VECTOR;
} while (pi_try_set_control(pi_desc, old.control, new.control));
- /* We should not block the vCPU if an interrupt is posted for it. */
- if (pi_test_on(pi_desc))
- __pi_post_block(vcpu);
+ /*
+ * Send a wakeup IPI to this CPU if an interrupt may have been posted
+ * before the notification vector was updated, in which case the IRQ
+ * will arrive on the non-wakeup vector. An IPI is needed as calling
+ * try_to_wake_up() from ->sched_out() isn't allowed (IRQs are not
+ * enabled until it is safe to call try_to_wake_up() on the task being
+ * scheduled out).
+ */
+ if (pi_test_on(&new))
+ apic->send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);
local_irq_restore(flags);
- return (vcpu->pre_pcpu == -1);
}
-void pi_post_block(struct kvm_vcpu *vcpu)
+void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- if (vcpu->pre_pcpu == -1)
+ if (!vmx_can_use_vtd_pi(vcpu->kvm))
return;
- local_irq_save(flags);
- __pi_post_block(vcpu);
- local_irq_restore(flags);
+ if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
+ pi_enable_wakeup_handler(vcpu);
+
+ /*
+ * Set SN when the vCPU is preempted. Note, the vCPU can both be seen
+ * as blocking and preempted, e.g. if it's preempted between setting
+ * its wait state and manually scheduling out.
+ */
+ if (vcpu->preempted)
+ pi_set_sn(pi_desc);
}
/*
@@ -221,24 +201,23 @@ void pi_post_block(struct kvm_vcpu *vcpu)
*/
void pi_wakeup_handler(void)
{
- struct kvm_vcpu *vcpu;
int cpu = smp_processor_id();
+ struct vcpu_vmx *vmx;
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
- list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
- blocked_vcpu_list) {
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
+ list_for_each_entry(vmx, &per_cpu(wakeup_vcpus_on_cpu, cpu),
+ pi_wakeup_list) {
- if (pi_test_on(pi_desc))
- kvm_vcpu_kick(vcpu);
+ if (pi_test_on(&vmx->pi_desc))
+ kvm_vcpu_wake_up(&vmx->vcpu);
}
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
}
void __init pi_init_cpu(int cpu)
{
- INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
- spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ INIT_LIST_HEAD(&per_cpu(wakeup_vcpus_on_cpu, cpu));
+ raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
}
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
@@ -254,7 +233,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
* Bail out of the block loop if the VM has an assigned
* device, but the blocking vCPU didn't reconfigure the
* PI.NV to the wakeup vector, i.e. the assigned device
- * came along after the initial check in pi_pre_block().
+ * came along after the initial check in vmx_vcpu_pi_put().
*/
void vmx_pi_start_assignment(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 36ae035f14aa..eb14e76b84ef 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -40,6 +40,12 @@ static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
{
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
@@ -88,8 +94,6 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
-int pi_pre_block(struct kvm_vcpu *vcpu);
-void pi_post_block(struct kvm_vcpu *vcpu);
void pi_wakeup_handler(void);
void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index cab6ba7a5005..2251b60920f8 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -8,7 +8,7 @@
FIELD(number, name), \
[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
-const unsigned short vmcs_field_to_offset_table[] = {
+const unsigned short vmcs12_field_offsets[] = {
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
FIELD(POSTED_INTR_NV, posted_intr_nv),
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = {
FIELD(HOST_RSP, host_rsp),
FIELD(HOST_RIP, host_rip),
};
-const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table);
+const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 2a45f026ee11..746129ddd5ae 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void)
CHECK_OFFSET(guest_pml_index, 996);
}
-extern const unsigned short vmcs_field_to_offset_table[];
+extern const unsigned short vmcs12_field_offsets[];
extern const unsigned int nr_vmcs12_fields;
-static inline short vmcs_field_to_offset(unsigned long field)
+static inline short get_vmcs12_field_offset(unsigned long field)
{
unsigned short offset;
unsigned int index;
@@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field)
return -ENOENT;
index = array_index_nospec(index, nr_vmcs12_fields);
- offset = vmcs_field_to_offset_table[index];
+ offset = vmcs12_field_offsets[index];
if (offset == 0)
return -ENOENT;
return offset;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1b2e9d8c5cc9..aca3ae2a02f3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1487,11 +1487,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
-static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
/*
* Emulation of instructions in SGX enclaves is impossible as RIP does
- * not point tthe failing instruction, and even if it did, the code
+ * not point at the failing instruction, and even if it did, the code
* stream is inaccessible. Inject #UD instead of exiting to userspace
* so that guest userspace can't DoS the guest simply by triggering
* emulation (enclaves are CPL3 only).
@@ -2603,7 +2604,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
return -EIO;
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
vmcs_conf->revision_id = vmx_msr_low;
@@ -2628,7 +2628,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
struct page *pages;
struct vmcs *vmcs;
- pages = __alloc_pages_node(node, flags, vmcs_config.order);
+ pages = __alloc_pages_node(node, flags, 0);
if (!pages)
return NULL;
vmcs = page_address(pages);
@@ -2647,7 +2647,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
void free_vmcs(struct vmcs *vmcs)
{
- free_pages((unsigned long)vmcs, vmcs_config.order);
+ free_page((unsigned long)vmcs);
}
/*
@@ -3931,12 +3931,10 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
pt_update_intercept_for_msr(vcpu);
}
-static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
- bool nested)
+static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
+ int pi_vec)
{
#ifdef CONFIG_SMP
- int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
-
if (vcpu->mode == IN_GUEST_MODE) {
/*
* The vector of interrupt to be delivered to vcpu had
@@ -3964,10 +3962,15 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
*/
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
- return true;
+ return;
}
#endif
- return false;
+ /*
+ * The vCPU isn't in the guest; wake the vCPU in case it is blocking,
+ * otherwise do nothing as KVM will grab the highest priority pending
+ * IRQ via ->sync_pir_to_irr() in vcpu_enter_guest().
+ */
+ kvm_vcpu_wake_up(vcpu);
}
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
@@ -3997,8 +4000,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
smp_mb__after_atomic();
/* the PIR and ON have been set by L1. */
- if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
- kvm_vcpu_kick(vcpu);
+ kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR);
return 0;
}
return -1;
@@ -4035,9 +4037,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
* guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a
* posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE.
*/
- if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
- kvm_vcpu_kick(vcpu);
-
+ kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR);
return 0;
}
@@ -4094,10 +4094,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
/*
- * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites
- * HOST_IA32_SYSENTER_ESP.
+ * SYSENTER is used for 32-bit system calls on either 32-bit or
+ * 64-bit kernels. It is always zero If neither is allowed, otherwise
+ * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may
+ * have already done so!).
*/
- vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+ if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
+ vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+
rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
@@ -4901,8 +4905,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
dr6 = vmx_get_exit_qual(vcpu);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ /*
+ * If the #DB was due to ICEBP, a.k.a. INT1, skip the
+ * instruction. ICEBP generates a trap-like #DB, but
+ * despite its interception control being tied to #DB,
+ * is an instruction intercept, i.e. the VM-Exit occurs
+ * on the ICEBP itself. Note, skipping ICEBP also
+ * clears STI and MOVSS blocking.
+ *
+ * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
+ * if single-step is enabled in RFLAGS and STI or MOVSS
+ * blocking is active, as the CPU doesn't set the bit
+ * on VM-Exit due to #DB interception. VM-Entry has a
+ * consistency check that a single-step #DB is pending
+ * in this scenario as the previous instruction cannot
+ * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
+ * don't modify RFLAGS), therefore the one instruction
+ * delay when activating single-step breakpoints must
+ * have already expired. Note, the CPU sets/clears BS
+ * as appropriate for all other VM-Exits types.
+ */
if (is_icebp(intr_info))
WARN_ON(!skip_emulated_instruction(vcpu));
+ else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
return 1;
@@ -5397,7 +5426,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
gpa_t gpa;
- if (!vmx_can_emulate_instruction(vcpu, NULL, 0))
+ if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
return 1;
/*
@@ -5426,6 +5455,14 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
return 1;
}
+static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ return vmx->emulation_required && !vmx->rmode.vm86_active &&
+ vcpu->arch.exception.pending;
+}
+
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5445,8 +5482,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (!kvm_emulate_instruction(vcpu, 0))
return 0;
- if (vmx->emulation_required && !vmx->rmode.vm86_active &&
- vcpu->arch.exception.pending) {
+ if (vmx_emulation_required_with_pending_exception(vcpu)) {
kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
@@ -5468,6 +5504,16 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
return 1;
}
+static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ if (vmx_emulation_required_with_pending_exception(vcpu)) {
+ kvm_prepare_emulation_failure_exit(vcpu);
+ return 0;
+ }
+
+ return 1;
+}
+
static void grow_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6928,6 +6974,8 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
vmx = to_vmx(vcpu);
+ INIT_LIST_HEAD(&vmx->pi_wakeup_list);
+
err = -ENOMEM;
vmx->vpid = allocate_vpid();
@@ -7549,25 +7597,6 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
}
-static int vmx_pre_block(struct kvm_vcpu *vcpu)
-{
- if (pi_pre_block(vcpu))
- return 1;
-
- if (kvm_lapic_hv_timer_in_use(vcpu))
- kvm_lapic_switch_to_sw_timer(vcpu);
-
- return 0;
-}
-
-static void vmx_post_block(struct kvm_vcpu *vcpu)
-{
- if (kvm_x86_ops.set_hv_timer)
- kvm_lapic_switch_to_hv_timer(vcpu);
-
- pi_post_block(vcpu);
-}
-
static void vmx_setup_mce(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.mcg_cap & MCG_LMCE_P)
@@ -7710,6 +7739,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.tlb_flush_gva = vmx_flush_tlb_gva,
.tlb_flush_guest = vmx_flush_tlb_guest,
+ .vcpu_pre_run = vmx_vcpu_pre_run,
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
.skip_emulated_instruction = vmx_skip_emulated_instruction,
@@ -7768,9 +7798,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.cpu_dirty_log_size = PML_ENTITY_NUM,
.update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
- .pre_block = vmx_pre_block,
- .post_block = vmx_post_block,
-
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index f8fc7441baea..7f2c82e7f38f 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -317,6 +317,9 @@ struct vcpu_vmx {
/* Posted interrupt descriptor */
struct pi_desc pi_desc;
+ /* Used if this vCPU is waiting for PI notification wakeup. */
+ struct list_head pi_wakeup_list;
+
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 76b4803dd3bd..74b53a16f38a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -187,6 +187,11 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+/* Enable/disable PMU virtualization */
+bool __read_mostly enable_pmu = true;
+EXPORT_SYMBOL_GPL(enable_pmu);
+module_param(enable_pmu, bool, 0444);
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -3530,6 +3535,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
+ kvm_update_cpuid_runtime(vcpu);
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@@ -4224,6 +4230,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SREGS2:
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
+ case KVM_CAP_SYS_ATTRIBUTES:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -4326,7 +4333,49 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
}
return r;
+}
+
+static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
+{
+ void __user *uaddr = (void __user*)(unsigned long)attr->addr;
+
+ if ((u64)(unsigned long)uaddr != attr->addr)
+ return ERR_PTR(-EFAULT);
+ return uaddr;
+}
+
+static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
+{
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
+
+ if (attr->group)
+ return -ENXIO;
+
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_SUPP:
+ if (put_user(supported_xcr0, uaddr))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENXIO;
+ break;
+ }
+}
+static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
+{
+ if (attr->group)
+ return -ENXIO;
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_SUPP:
+ return 0;
+ default:
+ return -ENXIO;
+ }
}
long kvm_arch_dev_ioctl(struct file *filp,
@@ -4417,6 +4466,22 @@ long kvm_arch_dev_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
break;
+ case KVM_GET_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_get_attr(&attr);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_has_attr(&attr);
+ break;
+ }
default:
r = -EINVAL;
break;
@@ -4855,8 +4920,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
+ }
vcpu->arch.smi_pending = events->smi.pending;
@@ -5017,11 +5084,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
int r;
- if ((u64)(unsigned long)uaddr != attr->addr)
- return -EFAULT;
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET:
@@ -5040,12 +5107,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
struct kvm *kvm = vcpu->kvm;
int r;
- if ((u64)(unsigned long)uaddr != attr->addr)
- return -EFAULT;
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET: {
@@ -5230,17 +5297,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
- /*
- * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
- * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
- * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
- * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
- * the core vCPU model on the fly, so fail.
- */
- r = -EINVAL;
- if (vcpu->arch.last_vmentry_cpu != -1)
- goto out;
-
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@@ -5251,14 +5307,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
- /*
- * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
- * KVM_SET_CPUID case above.
- */
- r = -EINVAL;
- if (vcpu->arch.last_vmentry_cpu != -1)
- goto out;
-
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@@ -6824,6 +6872,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
+{
+ return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type,
+ insn, insn_len);
+}
+
int handle_ud(struct kvm_vcpu *vcpu)
{
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
@@ -6831,7 +6886,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
- if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
+ if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
return 1;
if (force_emulation_prefix &&
@@ -8207,7 +8262,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
bool writeback = true;
bool write_fault_to_spt;
- if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
+ if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
return 1;
vcpu->arch.l1tf_flush_l1d = true;
@@ -9720,7 +9775,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
return;
@@ -9945,10 +10000,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
smp_mb__after_srcu_read_unlock();
/*
- * This handles the case where a posted interrupt was
- * notified with kvm_vcpu_kick. Assigned devices can
- * use the POSTED_INTR_VECTOR even if APICv is disabled,
- * so do it even if APICv is disabled on this vCPU.
+ * Process pending posted interrupts to handle the case where the
+ * notification IRQ arrived in the host, or was never sent (because the
+ * target vCPU wasn't running). Do this regardless of the vCPU's APICv
+ * status, KVM doesn't update assigned devices when APICv is inhibited,
+ * i.e. they can post interrupts even if APICv is temporarily disabled.
*/
if (kvm_lapic_enabled(vcpu))
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
@@ -10113,8 +10169,20 @@ out:
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
- if (!kvm_arch_vcpu_runnable(vcpu) &&
- (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
+ bool hv_timer;
+
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ /*
+ * Switch to the software timer before halt-polling/blocking as
+ * the guest's timer may be a break event for the vCPU, and the
+ * hypervisor timer runs only when the CPU is in guest mode.
+ * Switch before halt-polling so that KVM recognizes an expired
+ * timer before blocking.
+ */
+ hv_timer = kvm_lapic_hv_timer_in_use(vcpu);
+ if (hv_timer)
+ kvm_lapic_switch_to_sw_timer(vcpu);
+
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
kvm_vcpu_halt(vcpu);
@@ -10122,8 +10190,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_x86_ops.post_block)
- static_call(kvm_x86_post_block)(vcpu);
+ if (hv_timer)
+ kvm_lapic_switch_to_hv_timer(vcpu);
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
return 1;
@@ -10316,6 +10384,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = -EINTR;
goto out;
}
+ /*
+ * It should be impossible for the hypervisor timer to be in
+ * use before KVM has ever run the vCPU.
+ */
+ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
kvm_vcpu_block(vcpu);
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
@@ -10360,10 +10433,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
- if (kvm_run->immediate_exit)
+ if (kvm_run->immediate_exit) {
r = -EINTR;
- else
- r = vcpu_run(vcpu);
+ goto out;
+ }
+
+ r = static_call(kvm_x86_vcpu_pre_run)(vcpu);
+ if (r <= 0)
+ goto out;
+
+ r = vcpu_run(vcpu);
out:
kvm_put_guest_fpu(vcpu);
@@ -11199,7 +11278,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.msr_misc_features_enables = 0;
- vcpu->arch.xcr0 = XFEATURE_MASK_FP;
+ __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
+ __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
}
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
@@ -11216,8 +11296,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
- vcpu->arch.ia32_xss = 0;
-
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index bec8ed090abc..635b75f9e145 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -336,6 +336,7 @@ extern u64 host_xcr0;
extern u64 supported_xcr0;
extern u64 host_xss;
extern u64 supported_xss;
+extern bool enable_pmu;
static inline bool kvm_mpx_supported(void)
{
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 0e3f7d6e9fd7..bad57535fad0 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -316,10 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
"\tnotq %0\n"
"\t" LOCK_PREFIX "andq %0, %2\n"
"2:\n"
- "\t.section .fixup,\"ax\"\n"
- "3:\tjmp\t2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
@@ -335,10 +332,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
"\tnotl %0\n"
"\t" LOCK_PREFIX "andl %0, %2\n"
"2:\n"
- "\t.section .fixup,\"ax\"\n"
- "3:\tjmp\t2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel32),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2edd86649468..615a76d70019 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -353,8 +353,8 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
}
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
static const struct dmi_system_id msi_k8t_dmi_table[] = {
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 95d26a69088b..40d6a06e41c8 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -8,7 +8,6 @@ endmenu
config UML_X86
def_bool y
- select GENERIC_FIND_FIRST_BIT
config 64BIT
bool "64-bit kernel" if "$(SUBARCH)" = "x86"
diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index 3f71d364ba90..cd225896c40f 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h
@@ -205,7 +205,6 @@ BIT_OPS(change, "xor", )
#undef BIT_OP
#undef TEST_AND_BIT_OP
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 07b642c1916a..8eb6ad1a3a1d 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -208,7 +208,7 @@ static int simdisk_detach(struct simdisk *dev)
static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
- struct simdisk *dev = PDE_DATA(file_inode(file));
+ struct simdisk *dev = pde_data(file_inode(file));
const char *s = dev->filename;
if (s) {
ssize_t n = simple_read_from_buffer(buf, size, ppos,
@@ -225,7 +225,7 @@ static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
char *tmp = memdup_user_nul(buf, count);
- struct simdisk *dev = PDE_DATA(file_inode(file));
+ struct simdisk *dev = pde_data(file_inode(file));
int err;
if (IS_ERR(tmp))
diff --git a/block/bdev.c b/block/bdev.c
index 8bf93a19041b..102837a37051 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -24,7 +24,6 @@
#include <linux/pseudo_fs.h>
#include <linux/uio.h>
#include <linux/namei.h>
-#include <linux/cleancache.h>
#include <linux/part_stat.h>
#include <linux/uaccess.h>
#include "../fs/internal.h"
@@ -88,10 +87,6 @@ void invalidate_bdev(struct block_device *bdev)
lru_add_drain_all(); /* make sure all lru add caches are flushed */
invalidate_mapping_pages(mapping, 0, -1);
}
- /* 99% of the time, we don't need to flush the cleancache on the bdev.
- * But, for the strange corners, lets be cautious
- */
- cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(invalidate_bdev);
diff --git a/block/bio.c b/block/bio.c
index 0d400ba2dbd1..4312a8085396 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size)
offset = new_size - done;
else
offset = 0;
- zero_user(bv.bv_page, offset, bv.bv_len - offset);
+ zero_user(bv.bv_page, bv.bv_offset + offset,
+ bv.bv_len - offset);
truncated = true;
}
done += bv.bv_len;
diff --git a/block/blk-core.c b/block/blk-core.c
index 97f8bc8d3a79..d93e3bb9a769 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1061,20 +1061,32 @@ again:
}
static unsigned long __part_start_io_acct(struct block_device *part,
- unsigned int sectors, unsigned int op)
+ unsigned int sectors, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
- unsigned long now = READ_ONCE(jiffies);
part_stat_lock();
- update_io_ticks(part, now, false);
+ update_io_ticks(part, start_time, false);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors);
part_stat_local_inc(part, in_flight[op_is_write(op)]);
part_stat_unlock();
- return now;
+ return start_time;
+}
+
+/**
+ * bio_start_io_acct_time - start I/O accounting for bio based drivers
+ * @bio: bio to start account for
+ * @start_time: start time that should be passed back to bio_end_io_acct().
+ */
+void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
+{
+ __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), start_time);
}
+EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
/**
* bio_start_io_acct - start I/O accounting for bio based drivers
@@ -1084,14 +1096,15 @@ static unsigned long __part_start_io_acct(struct block_device *part,
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio));
+ return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
unsigned int op)
{
- return __part_start_io_acct(disk->part0, sectors, op);
+ return __part_start_io_acct(disk->part0, sectors, op, jiffies);
}
EXPORT_SYMBOL(disk_start_io_acct);
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index b925f3db3ab7..18c68d8b9138 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -144,7 +144,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
&q->kobj, "%s", "independent_access_ranges");
if (ret) {
q->ia_ranges = NULL;
- kfree(iars);
+ kobject_put(&iars->kobj);
return ret;
}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -17,6 +17,21 @@
#include "blk-mq-tag.h"
/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+ unsigned int users)
+{
+ if (!users)
+ return;
+
+ sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+ users);
+ sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+ users);
+}
+
+/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
* to get tag when first time, the other shared-tag users could reserve
@@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+ return true;
+ }
} else {
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+ return true;
+ }
}
+ users = atomic_inc_return(&hctx->tags->active_queues);
+
+ blk_mq_update_wake_batch(hctx->tags, users);
+
return true;
}
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
+ unsigned int users;
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- atomic_dec(&tags->active_queues);
+ users = atomic_dec_return(&tags->active_queues);
+
+ blk_mq_update_wake_batch(tags, users);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a6d4780580fc..1adfe4824ef5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2922,6 +2922,8 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
*/
blk_mq_run_dispatch_ops(rq->q,
ret = blk_mq_request_issue_directly(rq, true));
+ if (ret)
+ blk_account_io_done(rq, ktime_get_ns());
return ret;
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
@@ -2976,6 +2978,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
bio = bio_clone_fast(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;
+ bio->bi_bdev = rq->q->disk->part0;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
@@ -3284,7 +3287,7 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
struct blk_mq_hw_ctx *hctx)
{
- if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
+ if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu)
return false;
if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
return false;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e20eadfcf5c8..9f32882ceb2f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -811,6 +811,9 @@ static void blk_release_queue(struct kobject *kobj)
bioset_exit(&q->bio_split);
+ if (blk_queue_has_srcu(q))
+ cleanup_srcu_struct(q->srcu);
+
ida_simple_remove(&blk_queue_ida, q->id);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
@@ -887,7 +890,6 @@ int blk_register_queue(struct gendisk *disk)
kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
mutex_unlock(&q->sysfs_lock);
- ret = 0;
unlock:
mutex_unlock(&q->sysfs_dir_lock);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 85d919bf60c7..3ed5eaf3446a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -865,7 +865,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->front_merges);
+SHOW_INT(deadline_async_depth_show, dd->async_depth);
SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
#undef SHOW_INT
#undef SHOW_JIFFIES
@@ -895,7 +895,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
+STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
#undef STORE_FUNCTION
#undef STORE_INT
diff --git a/certs/Makefile b/certs/Makefile
index f7041c29a2e0..3ea7fe60823f 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -68,14 +68,14 @@ $(obj)/x509.genkey:
endif # CONFIG_MODULE_SIG_KEY
# If CONFIG_MODULE_SIG_KEY isn't a PKCS#11 URI, depend on it
-ifneq ($(filter-out pkcs11:%, %(CONFIG_MODULE_SIG_KEY)),)
+ifneq ($(filter-out pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),)
X509_DEP := $(CONFIG_MODULE_SIG_KEY)
endif
$(obj)/system_certificates.o: $(obj)/signing_key.x509
$(obj)/signing_key.x509: $(X509_DEP) $(obj)/extract-cert FORCE
- $(call if_changed,extract_certs,$(if $(X509_DEP),$<,$(CONFIG_MODULE_SIG_KEY)))
+ $(call if_changed,extract_certs,$(if $(CONFIG_MODULE_SIG_KEY),$(if $(X509_DEP),$<,$(CONFIG_MODULE_SIG_KEY)),""))
endif # CONFIG_MODULE_SIG
targets += signing_key.x509
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index a9d2de403c0c..866560cbb082 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -915,30 +915,31 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
{
- int ret_val = 0;
void __iomem *vaddr = NULL;
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
struct cpc_reg *reg = &reg_res->cpc_entry.reg;
if (reg_res->type == ACPI_TYPE_INTEGER) {
*val = reg_res->cpc_entry.int_value;
- return ret_val;
+ return 0;
}
*val = 0;
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
u32 width = 8 << (reg->access_width - 1);
+ u32 val_u32;
acpi_status status;
status = acpi_os_read_port((acpi_io_address)reg->address,
- (u32 *)val, width);
+ &val_u32, width);
if (ACPI_FAILURE(status)) {
pr_debug("Error: Failed to read SystemIO port %llx\n",
reg->address);
return -EFAULT;
}
+ *val = val_u32;
return 0;
} else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
@@ -966,10 +967,10 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
default:
pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n",
reg->bit_width, pcc_ss_id);
- ret_val = -EFAULT;
+ return -EFAULT;
}
- return ret_val;
+ return 0;
}
static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c
index e7ab0fc90db9..c0da24c9f8c3 100644
--- a/drivers/acpi/dptf/dptf_pch_fivr.c
+++ b/drivers/acpi/dptf/dptf_pch_fivr.c
@@ -151,6 +151,7 @@ static int pch_fivr_remove(struct platform_device *pdev)
static const struct acpi_device_id pch_fivr_device_ids[] = {
{"INTC1045", 0},
{"INTC1049", 0},
+ {"INTC10A3", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, pch_fivr_device_ids);
diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index a24d5d7aa117..dc1f52a5b3f4 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -231,6 +231,8 @@ static const struct acpi_device_id int3407_device_ids[] = {
{"INTC1050", 0},
{"INTC1060", 0},
{"INTC1061", 0},
+ {"INTC10A4", 0},
+ {"INTC10A5", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, int3407_device_ids);
diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c
index da5d5f0be2f2..42a556346548 100644
--- a/drivers/acpi/dptf/int340x_thermal.c
+++ b/drivers/acpi/dptf/int340x_thermal.c
@@ -37,6 +37,12 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = {
{"INTC1050"},
{"INTC1060"},
{"INTC1061"},
+ {"INTC10A0"},
+ {"INTC10A1"},
+ {"INTC10A2"},
+ {"INTC10A3"},
+ {"INTC10A4"},
+ {"INTC10A5"},
{""},
};
diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index dc9a6efa514b..dd9bb8ca2244 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -10,4 +10,5 @@
{"INT3404", }, /* Fan */ \
{"INTC1044", }, /* Fan for Tiger Lake generation */ \
{"INTC1048", }, /* Fan for Alder Lake generation */ \
+ {"INTC10A2", }, /* Fan for Raptor Lake generation */ \
{"PNP0C0B", } /* Generic ACPI fan */
diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c
index 0cca7991f186..4322f2da6d10 100644
--- a/drivers/acpi/proc.c
+++ b/drivers/acpi/proc.c
@@ -127,7 +127,7 @@ static int
acpi_system_wakeup_device_open_fs(struct inode *inode, struct file *file)
{
return single_open(file, acpi_system_wakeup_device_seq_show,
- PDE_DATA(inode));
+ pde_data(inode));
}
static const struct proc_ops acpi_system_wakeup_device_proc_ops = {
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index 0912846bc1b0..05c2ab375756 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -595,7 +595,7 @@ static unsigned int octeon_cf_dma_finished(struct ata_port *ap,
union cvmx_mio_boot_dma_intx dma_int;
u8 status;
- trace_ata_bmdma_stop(qc, &qc->tf, qc->tag);
+ trace_ata_bmdma_stop(ap, &qc->tf, qc->tag);
if (ap->hsm_task_state != HSM_ST_LAST)
return 0;
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 028329428b75..87c7c90676ca 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -128,6 +128,8 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res,
ap = host->ports[0];
ap->ops = devm_kzalloc(dev, sizeof(*ap->ops), GFP_KERNEL);
+ if (!ap->ops)
+ return -ENOMEM;
ap->ops->inherits = &ata_sff_port_ops;
ap->ops->cable_detect = ata_cable_unknown;
ap->ops->set_mode = pata_platform_set_mode;
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index bc8e8d9f176b..3e726ee91fdc 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -178,7 +178,6 @@ static void ia_hack_tcq(IADEV *dev) {
static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) {
u_short desc_num, i;
- struct sk_buff *skb;
struct ia_vcc *iavcc_r = NULL;
unsigned long delta;
static unsigned long timer = 0;
@@ -202,8 +201,7 @@ static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) {
else
dev->ffL.tcq_rd -= 2;
*(u_short *)(dev->seg_ram + dev->ffL.tcq_rd) = i+1;
- if (!(skb = dev->desc_tbl[i].txskb) ||
- !(iavcc_r = dev->desc_tbl[i].iavcc))
+ if (!dev->desc_tbl[i].txskb || !(iavcc_r = dev->desc_tbl[i].iavcc))
printk("Fatal err, desc table vcc or skb is NULL\n");
else
iavcc_r->vc_desc_cnt--;
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index bc1876915457..eaa31e567d1e 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -14,7 +14,6 @@
#include <linux/of.h>
#include <asm/sections.h>
-#include <asm/pgalloc.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -155,66 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
}
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
-{
- int nid = early_cpu_to_node(cpu);
-
- return memblock_alloc_try_nid(size, align,
- __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-static void __init pcpu_populate_pte(unsigned long addr)
-{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-#endif
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -229,7 +168,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ early_cpu_to_node);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
@@ -239,10 +178,7 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
- rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_fc_alloc,
- pcpu_fc_free,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
#endif
if (rc < 0)
panic("Failed to initialize percpu areas (err=%d).", rc);
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index d7d63c1aa993..4afb0e9312c0 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -199,11 +199,16 @@ static struct class firmware_class = {
int register_sysfs_loader(void)
{
- return class_register(&firmware_class);
+ int ret = class_register(&firmware_class);
+
+ if (ret != 0)
+ return ret;
+ return register_firmware_config_sysctl();
}
void unregister_sysfs_loader(void)
{
+ unregister_firmware_config_sysctl();
class_unregister(&firmware_class);
}
diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h
index 3af7205b302f..9f3055d3b4ca 100644
--- a/drivers/base/firmware_loader/fallback.h
+++ b/drivers/base/firmware_loader/fallback.h
@@ -42,6 +42,17 @@ void fw_fallback_set_default_timeout(void);
int register_sysfs_loader(void);
void unregister_sysfs_loader(void);
+#ifdef CONFIG_SYSCTL
+extern int register_firmware_config_sysctl(void);
+extern void unregister_firmware_config_sysctl(void);
+#else
+static inline int register_firmware_config_sysctl(void)
+{
+ return 0;
+}
+static inline void unregister_firmware_config_sysctl(void) { }
+#endif /* CONFIG_SYSCTL */
+
#else /* CONFIG_FW_LOADER_USER_HELPER */
static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name,
struct device *device,
diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c
index 46a731dede6f..e5ac098d0742 100644
--- a/drivers/base/firmware_loader/fallback_table.c
+++ b/drivers/base/firmware_loader/fallback_table.c
@@ -4,6 +4,7 @@
#include <linux/kconfig.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <linux/security.h>
#include <linux/highmem.h>
#include <linux/umh.h>
@@ -24,7 +25,7 @@ struct firmware_fallback_config fw_fallback_config = {
EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE);
#ifdef CONFIG_SYSCTL
-struct ctl_table firmware_config_table[] = {
+static struct ctl_table firmware_config_table[] = {
{
.procname = "force_sysfs_fallback",
.data = &fw_fallback_config.force_sysfs_fallback,
@@ -45,4 +46,24 @@ struct ctl_table firmware_config_table[] = {
},
{ }
};
-#endif
+
+static struct ctl_table_header *firmware_config_sysct_table_header;
+int register_firmware_config_sysctl(void)
+{
+ firmware_config_sysct_table_header =
+ register_sysctl("kernel/firmware_config",
+ firmware_config_table);
+ if (!firmware_config_sysct_table_header)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(register_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE);
+
+void unregister_firmware_config_sysctl(void)
+{
+ unregister_sysctl_table(firmware_config_sysct_table_header);
+ firmware_config_sysct_table_header = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(unregister_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE);
+
+#endif /* CONFIG_SYSCTL */
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 94665037f4a3..72b7a92337b1 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void)
struct rtc_time time;
unsigned int val;
- mc146818_get_time(&time);
+ if (mc146818_get_time(&time) < 0) {
+ pr_err("Unable to read current time from RTC\n");
+ return 0;
+ }
+
pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
val = time.tm_year; /* 100 years */
if (val > 100)
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 588889bea7c3..6af111f568e4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -122,7 +122,7 @@ newtag(struct aoedev *d)
register ulong n;
n = jiffies & 0xffff;
- return n |= (++d->lasttag & 0x7fff) << 16;
+ return n | (++d->lasttag & 0x7fff) << 16;
}
static u32
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8fe2e4289dae..6e3f2f0d2352 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -362,7 +362,6 @@ __setup("ramdisk_size=", ramdisk_size);
* (should share code eventually).
*/
static LIST_HEAD(brd_devices);
-static DEFINE_MUTEX(brd_devices_mutex);
static struct dentry *brd_debugfs_dir;
static int brd_alloc(int i)
@@ -372,21 +371,14 @@ static int brd_alloc(int i)
char buf[DISK_NAME_LEN];
int err = -ENOMEM;
- mutex_lock(&brd_devices_mutex);
- list_for_each_entry(brd, &brd_devices, brd_list) {
- if (brd->brd_number == i) {
- mutex_unlock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list)
+ if (brd->brd_number == i)
return -EEXIST;
- }
- }
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
- if (!brd) {
- mutex_unlock(&brd_devices_mutex);
+ if (!brd)
return -ENOMEM;
- }
brd->brd_number = i;
list_add_tail(&brd->brd_list, &brd_devices);
- mutex_unlock(&brd_devices_mutex);
spin_lock_init(&brd->brd_lock);
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
@@ -429,9 +421,7 @@ static int brd_alloc(int i)
out_cleanup_disk:
blk_cleanup_disk(disk);
out_free_dev:
- mutex_lock(&brd_devices_mutex);
list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
kfree(brd);
return err;
}
@@ -441,15 +431,19 @@ static void brd_probe(dev_t dev)
brd_alloc(MINOR(dev) / max_part);
}
-static void brd_del_one(struct brd_device *brd)
+static void brd_cleanup(void)
{
- del_gendisk(brd->brd_disk);
- blk_cleanup_disk(brd->brd_disk);
- brd_free_pages(brd);
- mutex_lock(&brd_devices_mutex);
- list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
- kfree(brd);
+ struct brd_device *brd, *next;
+
+ debugfs_remove_recursive(brd_debugfs_dir);
+
+ list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+ del_gendisk(brd->brd_disk);
+ blk_cleanup_disk(brd->brd_disk);
+ brd_free_pages(brd);
+ list_del(&brd->brd_list);
+ kfree(brd);
+ }
}
static inline void brd_check_and_reset_par(void)
@@ -473,9 +467,18 @@ static inline void brd_check_and_reset_par(void)
static int __init brd_init(void)
{
- struct brd_device *brd, *next;
int err, i;
+ brd_check_and_reset_par();
+
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
+ for (i = 0; i < rd_nr; i++) {
+ err = brd_alloc(i);
+ if (err)
+ goto out_free;
+ }
+
/*
* brd module now has a feature to instantiate underlying device
* structure on-demand, provided that there is an access dev node.
@@ -491,28 +494,16 @@ static int __init brd_init(void)
* dynamically.
*/
- if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe))
- return -EIO;
-
- brd_check_and_reset_par();
-
- brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
- for (i = 0; i < rd_nr; i++) {
- err = brd_alloc(i);
- if (err)
- goto out_free;
+ if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
+ err = -EIO;
+ goto out_free;
}
pr_info("brd: module loaded\n");
return 0;
out_free:
- unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module NOT loaded !!!\n");
return err;
@@ -520,13 +511,9 @@ out_free:
static void __exit brd_exit(void)
{
- struct brd_device *brd, *next;
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module unloaded\n");
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..01cbbfc4e9e2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -820,7 +820,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
- struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+ struct rb_node **node, *parent = NULL;
struct loop_worker *cur_worker, *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8f140da1efe3..4203cdab8abf 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf)
* These are the characters that produce nonzero for
* isspace() in the "C" and "POSIX" locales.
*/
- const char *spaces = " \f\n\r\t\v";
+ static const char spaces[] = " \f\n\r\t\v";
*buf += strspn(*buf, spaces); /* Find start of token */
@@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;
- ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+ ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL,
+ ',');
if (ret)
goto out_err;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 8f8443ee6fe4..c08971de369f 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -196,7 +196,7 @@ rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu)
return per_cpu_ptr(sess->cpu_queues, bit);
} else if (cpu != 0) {
/* Search from 0 to cpu */
- bit = find_next_bit(sess->cpu_queues_bm, cpu, 0);
+ bit = find_first_bit(sess->cpu_queues_bm, cpu);
if (bit < cpu)
return per_cpu_ptr(sess->cpu_queues, bit);
}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 9877e413fce3..1b57d4666e43 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3691,27 +3691,6 @@ static struct ctl_table cdrom_table[] = {
},
{ }
};
-
-static struct ctl_table cdrom_cdrom_table[] = {
- {
- .procname = "cdrom",
- .maxlen = 0,
- .mode = 0555,
- .child = cdrom_table,
- },
- { }
-};
-
-/* Make sure that /proc/sys/dev is there */
-static struct ctl_table cdrom_root_table[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = cdrom_cdrom_table,
- },
- { }
-};
static struct ctl_table_header *cdrom_sysctl_header;
static void cdrom_sysctl_register(void)
@@ -3721,7 +3700,7 @@ static void cdrom_sysctl_register(void)
if (!atomic_add_unless(&initialized, 1, 1))
return;
- cdrom_sysctl_header = register_sysctl_table(cdrom_root_table);
+ cdrom_sysctl_header = register_sysctl("dev/cdrom", cdrom_table);
/* set the defaults */
cdrom_sysctl_settings.autoclose = autoclose;
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 4e5431f01450..563dfae3b8da 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -746,26 +746,6 @@ static struct ctl_table hpet_table[] = {
{}
};
-static struct ctl_table hpet_root[] = {
- {
- .procname = "hpet",
- .maxlen = 0,
- .mode = 0555,
- .child = hpet_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = hpet_root,
- },
- {}
-};
-
static struct ctl_table_header *sysctl_header;
/*
@@ -1061,7 +1041,7 @@ static int __init hpet_init(void)
if (result < 0)
return -ENODEV;
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl("dev/hpet", hpet_table);
result = acpi_bus_register_driver(&hpet_acpi_driver);
if (result < 0) {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b411182df6f6..68613f0b6887 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1992,8 +1992,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, void *buffer,
}
static int sysctl_poolsize = POOL_BITS;
-extern struct ctl_table random_table[];
-struct ctl_table random_table[] = {
+static struct ctl_table random_table[] = {
{
.procname = "poolsize",
.data = &sysctl_poolsize,
@@ -2055,6 +2054,17 @@ struct ctl_table random_table[] = {
#endif
{ }
};
+
+/*
+ * rand_initialize() is called before sysctl_init(),
+ * so we cannot call register_sysctl_init() in rand_initialize()
+ */
+static int __init random_sysctls_init(void)
+{
+ register_sysctl_init("kernel/random", random_table);
+ return 0;
+}
+device_initcall(random_sysctls_init);
#endif /* CONFIG_SYSCTL */
struct batched_entropy {
diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c
index 57ae183982d8..f7b41366666e 100644
--- a/drivers/clk/clk-si5341.c
+++ b/drivers/clk/clk-si5341.c
@@ -1740,7 +1740,7 @@ static int si5341_probe(struct i2c_client *client,
clk_prepare(data->clk[i].hw.clk);
}
- err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get,
+ err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get,
data);
if (err) {
dev_err(&client->dev, "unable to add clk provider\n");
diff --git a/drivers/clk/mediatek/clk-mt7986-apmixed.c b/drivers/clk/mediatek/clk-mt7986-apmixed.c
index 76c8ebdeae96..98ec3887585f 100644
--- a/drivers/clk/mediatek/clk-mt7986-apmixed.c
+++ b/drivers/clk/mediatek/clk-mt7986-apmixed.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021 MediaTek Inc.
* Author: Sam Shih <sam.shih@mediatek.com>
diff --git a/drivers/clk/mediatek/clk-mt7986-infracfg.c b/drivers/clk/mediatek/clk-mt7986-infracfg.c
index 3be168c34fc0..f209c559fbc3 100644
--- a/drivers/clk/mediatek/clk-mt7986-infracfg.c
+++ b/drivers/clk/mediatek/clk-mt7986-infracfg.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021 MediaTek Inc.
* Author: Sam Shih <sam.shih@mediatek.com>
diff --git a/drivers/clk/mediatek/clk-mt7986-topckgen.c b/drivers/clk/mediatek/clk-mt7986-topckgen.c
index 8550e2be7773..8f6f79b6e31e 100644
--- a/drivers/clk/mediatek/clk-mt7986-topckgen.c
+++ b/drivers/clk/mediatek/clk-mt7986-topckgen.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021 MediaTek Inc.
* Author: Sam Shih <sam.shih@mediatek.com>
diff --git a/drivers/clk/visconti/pll.c b/drivers/clk/visconti/pll.c
index a2398bc6c6e4..a484cb945d67 100644
--- a/drivers/clk/visconti/pll.c
+++ b/drivers/clk/visconti/pll.c
@@ -246,7 +246,6 @@ static struct clk_hw *visconti_register_pll(struct visconti_pll_provider *ctx,
{
struct clk_init_data init;
struct visconti_pll *pll;
- struct clk *pll_clk;
struct clk_hw *pll_hw_clk;
size_t len;
int ret;
@@ -277,7 +276,7 @@ static struct clk_hw *visconti_register_pll(struct visconti_pll_provider *ctx,
pll_hw_clk = &pll->hw;
ret = clk_hw_register(NULL, &pll->hw);
if (ret) {
- pr_err("failed to register pll clock %s : %ld\n", name, PTR_ERR(pll_clk));
+ pr_err("failed to register pll clock %s : %d\n", name, ret);
kfree(pll);
pll_hw_clk = ERR_PTR(ret);
}
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 646ad385e490..ccac1c453080 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -358,7 +358,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
* other namespaces.
*/
if ((current_user_ns() != &init_user_ns) ||
- (task_active_pid_ns(current) != &init_pid_ns))
+ !task_is_in_init_pid_ns(current))
return;
/* Can only change if privileged. */
diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
index 7e0957eea094..869894b74741 100644
--- a/drivers/counter/counter-core.c
+++ b/drivers/counter/counter-core.c
@@ -90,10 +90,8 @@ struct counter_device *counter_alloc(size_t sizeof_priv)
int err;
ch = kzalloc(sizeof(*ch) + sizeof_priv, GFP_KERNEL);
- if (!ch) {
- err = -ENOMEM;
- goto err_alloc_ch;
- }
+ if (!ch)
+ return NULL;
counter = &ch->counter;
dev = &counter->dev;
@@ -123,9 +121,8 @@ err_chrdev_add:
err_ida_alloc:
kfree(ch);
-err_alloc_ch:
- return ERR_PTR(err);
+ return NULL;
}
EXPORT_SYMBOL_GPL(counter_alloc);
@@ -208,12 +205,12 @@ struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv
int err;
counter = counter_alloc(sizeof_priv);
- if (IS_ERR(counter))
- return counter;
+ if (!counter)
+ return NULL;
err = devm_add_action_or_reset(dev, devm_counter_put, counter);
if (err < 0)
- return ERR_PTR(err);
+ return NULL;
return counter;
}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index ae79c3300129..7de3f5b6e8d0 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -722,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr,
systab_hdr->revision >> 16,
systab_hdr->revision & 0xffff,
vendor);
+
+ if (IS_ENABLED(CONFIG_X86_64) &&
+ systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION &&
+ !strcmp(vendor, "Apple")) {
+ pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n");
+ efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION;
+ }
}
static __initdata char memory_type_name[][13] = {
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 2363fee9211c..9cc556013d08 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
if (image->image_base != _text)
efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
- if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN))
- efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n",
- EFI_KIMG_ALIGN >> 10);
+ if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN))
+ efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n",
+ SEGMENT_ALIGN >> 10);
kernel_size = _edata - _text;
kernel_memsize = kernel_size + (_end - _edata);
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index e87e7f1b1a33..da93864d7abc 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -40,6 +40,8 @@
#ifdef CONFIG_ARM64
# define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64
+#elif defined(CONFIG_RISCV)
+# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN
#else
# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE
#endif
diff --git a/drivers/gpio/gpio-idt3243x.c b/drivers/gpio/gpio-idt3243x.c
index 50003ad2e589..52b8b72ded77 100644
--- a/drivers/gpio/gpio-idt3243x.c
+++ b/drivers/gpio/gpio-idt3243x.c
@@ -132,7 +132,7 @@ static int idt_gpio_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct gpio_irq_chip *girq;
struct idt_gpio_ctrl *ctrl;
- unsigned int parent_irq;
+ int parent_irq;
int ngpios;
int ret;
@@ -164,8 +164,8 @@ static int idt_gpio_probe(struct platform_device *pdev)
return PTR_ERR(ctrl->pic);
parent_irq = platform_get_irq(pdev, 0);
- if (!parent_irq)
- return -EINVAL;
+ if (parent_irq < 0)
+ return parent_irq;
girq = &ctrl->gc.irq;
girq->chip = &idt_gpio_irqchip;
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 70d6ae20b1da..a964e25ea620 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -47,7 +47,7 @@ struct mpc8xxx_gpio_chip {
unsigned offset, int value);
struct irq_domain *irq;
- unsigned int irqn;
+ int irqn;
};
/*
@@ -388,8 +388,8 @@ static int mpc8xxx_probe(struct platform_device *pdev)
}
mpc8xxx_gc->irqn = platform_get_irq(pdev, 0);
- if (!mpc8xxx_gc->irqn)
- return 0;
+ if (mpc8xxx_gc->irqn < 0)
+ return mpc8xxx_gc->irqn;
mpc8xxx_gc->irq = irq_domain_create_linear(fwnode,
MPC8XXX_GPIO_PINS,
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 838bbfed11d3..04b137eca8da 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -816,7 +816,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank,
properties[prop_idx++] = PROPERTY_ENTRY_U32("ngpios", bank->num_lines);
- if (bank->label)
+ if (bank->label && (strlen(bank->label) > 0))
properties[prop_idx++] = PROPERTY_ENTRY_STRING("gpio-sim,label",
bank->label);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index cf7fad88c138..ed077de426d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2354,7 +2354,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev))
- amdgpu_virt_exchange_data(adev);
+ amdgpu_virt_init_data_exchange(adev);
r = amdgpu_ib_pool_init(adev);
if (r) {
@@ -4450,33 +4450,24 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
if (amdgpu_gpu_recovery == -1) {
switch (adev->asic_type) {
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_VEGA20:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_VANGOGH:
- case CHIP_ALDEBARAN:
- break;
- default:
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_CYAN_SKILLFISH:
goto disabled;
+ default:
+ break;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index be45650250fa..81bfee978b74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -243,6 +243,30 @@ static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
}
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+ /*
+ * So far, apply this quirk only on those Navy Flounder boards which
+ * have a bad harvest table of VCN config.
+ */
+ if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) {
+ switch (adev->pdev->revision) {
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC5:
+ case 0xC7:
+ case 0xCF:
+ case 0xDF:
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
@@ -548,11 +572,9 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
break;
}
}
- /* some IP discovery tables on Navy Flounder don't have this set correctly */
- if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2)) &&
- (adev->pdev->revision != 0xFF))
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+
+ amdgpu_discovery_harvest_config_quirk(adev);
+
if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c610e2794c18..4c83f1db8a24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1525,6 +1525,87 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
0x99A0,
0x99A2,
0x99A4,
+ /* radeon secondary ids */
+ 0x3171,
+ 0x3e70,
+ 0x4164,
+ 0x4165,
+ 0x4166,
+ 0x4168,
+ 0x4170,
+ 0x4171,
+ 0x4172,
+ 0x4173,
+ 0x496e,
+ 0x4a69,
+ 0x4a6a,
+ 0x4a6b,
+ 0x4a70,
+ 0x4a74,
+ 0x4b69,
+ 0x4b6b,
+ 0x4b6c,
+ 0x4c6e,
+ 0x4e64,
+ 0x4e65,
+ 0x4e66,
+ 0x4e67,
+ 0x4e68,
+ 0x4e69,
+ 0x4e6a,
+ 0x4e71,
+ 0x4f73,
+ 0x5569,
+ 0x556b,
+ 0x556d,
+ 0x556f,
+ 0x5571,
+ 0x5854,
+ 0x5874,
+ 0x5940,
+ 0x5941,
+ 0x5b72,
+ 0x5b73,
+ 0x5b74,
+ 0x5b75,
+ 0x5d44,
+ 0x5d45,
+ 0x5d6d,
+ 0x5d6f,
+ 0x5d72,
+ 0x5d77,
+ 0x5e6b,
+ 0x5e6d,
+ 0x7120,
+ 0x7124,
+ 0x7129,
+ 0x712e,
+ 0x712f,
+ 0x7162,
+ 0x7163,
+ 0x7166,
+ 0x7167,
+ 0x7172,
+ 0x7173,
+ 0x71a0,
+ 0x71a1,
+ 0x71a3,
+ 0x71a7,
+ 0x71bb,
+ 0x71e0,
+ 0x71e1,
+ 0x71e2,
+ 0x71e6,
+ 0x71e7,
+ 0x71f2,
+ 0x7269,
+ 0x726b,
+ 0x726e,
+ 0x72a0,
+ 0x72a8,
+ 0x72b1,
+ 0x72b3,
+ 0x793f,
};
static const struct pci_device_id pciidlist[] = {
@@ -1930,11 +2011,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
- if (flags == 0) {
- DRM_INFO("Unsupported asic. Remove me when IP discovery init is in place.\n");
- return -ENODEV;
- }
-
if (amdgpu_virtual_display ||
amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
supports_atomic = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 894444ab0032..07bc0f504713 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -625,20 +625,20 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
- if (adev->bios != NULL) {
- adev->virt.vf2pf_update_interval_ms = 2000;
+ if (adev->mman.fw_vram_usage_va != NULL) {
+ /* go through this logic in ip_init and reset to init workqueue*/
+ amdgpu_virt_exchange_data(adev);
+ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+ } else if (adev->bios != NULL) {
+ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
amdgpu_virt_read_pf2vf_data(adev);
}
-
- if (adev->virt.vf2pf_update_interval_ms != 0) {
- INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
- schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
- }
}
@@ -674,12 +674,6 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
}
- } else if (adev->bios != NULL) {
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
- amdgpu_virt_read_pf2vf_data(adev);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 54f28c075f21..f10ce740a29c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1428,6 +1428,10 @@ static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index fe9a7cc8d9eb..6645ebbd2696 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -956,6 +956,10 @@ static int vi_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index ff5bb152ef49..e6ef36de0825 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -2033,10 +2033,10 @@ static void calculate_bandwidth(
kfree(surface_type);
free_tiling_mode:
kfree(tiling_mode);
-free_yclk:
- kfree(yclk);
free_sclk:
kfree(sclk);
+free_yclk:
+ kfree(yclk);
}
/*******************************************************************************
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index ec19678a0702..e447c74be713 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -503,7 +503,6 @@ static void dcn_bw_calc_rq_dlg_ttu(
//input[in_idx].dout.output_standard;
/*todo: soc->sr_enter_plus_exit_time??*/
- dlg_sys_param->t_srx_delay_us = dc->dcn_ip->dcfclk_cstate_latency / v->dcf_clk_deep_sleep;
dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src);
dml1_extract_rq_regs(dml, rq_regs, rq_param);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 9f35f2e8f971..cac80ba69072 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -38,7 +38,6 @@
#include "clk/clk_11_0_0_offset.h"
#include "clk/clk_11_0_0_sh_mask.h"
-#include "irq/dcn20/irq_service_dcn20.h"
#undef FN
#define FN(reg_name, field_name) \
@@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
bool force_reset = false;
bool p_state_change_support;
int total_plane_count;
- int irq_src;
- uint32_t hpd_state;
if (dc->work_arounds.skip_clock_update)
return;
@@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->res_pool->pp_smu)
pp_smu = &dc->res_pool->pp_smu->nv_funcs;
- for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD6; irq_src++) {
- hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs, irq_src);
- if (hpd_state)
- break;
- }
-
- if (display_count == 0 && !hpd_state)
+ if (display_count == 0)
enter_display_off = true;
if (enter_display_off == safe_to_lower) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index fbda42313bfe..f4dee0e48a67 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -42,7 +42,6 @@
#include "clk/clk_10_0_2_sh_mask.h"
#include "renoir_ip_offset.h"
-#include "irq/dcn21/irq_service_dcn21.h"
/* Constants */
@@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
int display_count;
- int irq_src;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
- uint32_t hpd_state;
struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
@@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
display_count = rn_get_active_display_cnt_wa(dc, context);
- for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD5; irq_src++) {
- hpd_state = dc_get_hpd_state_dcn21(dc->res_pool->irqs, irq_src);
- if (hpd_state)
- break;
- }
-
/* if we can go lower, go lower */
- if (display_count == 0 && !hpd_state) {
+ if (display_count == 0) {
rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 01c8849b9db2..6f5528d34093 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1404,20 +1404,34 @@ static void program_timing_sync(
status->timing_sync_info.master = false;
}
- /* remove any other unblanked pipes as they have already been synced */
- for (j = j + 1; j < group_size; j++) {
- bool is_blanked;
- if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked)
- is_blanked =
- pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp);
- else
- is_blanked =
- pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg);
- if (!is_blanked) {
- group_size--;
- pipe_set[j] = pipe_set[group_size];
- j--;
+ /* remove any other pipes that are already been synced */
+ if (dc->config.use_pipe_ctx_sync_logic) {
+ /* check pipe's syncd to decide which pipe to be removed */
+ for (j = 1; j < group_size; j++) {
+ if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) {
+ group_size--;
+ pipe_set[j] = pipe_set[group_size];
+ j--;
+ } else
+ /* link slave pipe's syncd with master pipe */
+ pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
+ }
+ } else {
+ for (j = j + 1; j < group_size; j++) {
+ bool is_blanked;
+
+ if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked)
+ is_blanked =
+ pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp);
+ else
+ is_blanked =
+ pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg);
+ if (!is_blanked) {
+ group_size--;
+ pipe_set[j] = pipe_set[group_size];
+ j--;
+ }
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 05e216524370..4c3ab2575e4b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -202,7 +202,7 @@ void dp_wait_for_training_aux_rd_interval(
uint32_t wait_in_micro_secs)
{
#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (wait_in_micro_secs > 16000)
+ if (wait_in_micro_secs > 1000)
msleep(wait_in_micro_secs/1000);
else
udelay(wait_in_micro_secs);
@@ -6935,7 +6935,7 @@ bool dpcd_write_128b_132b_sst_payload_allocation_table(
}
}
retries++;
- udelay(5000);
+ msleep(5);
}
if (!result && retries == max_retries) {
@@ -6987,7 +6987,7 @@ bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link)
break;
}
- udelay(5000);
+ msleep(5);
}
if (result == ACT_FAILED) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index d4ff6cc6b8d9..b3912ff9dc91 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3217,6 +3217,60 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
}
#endif
+void reset_syncd_pipes_from_disabled_pipes(struct dc *dc,
+ struct dc_state *context)
+{
+ int i, j;
+ struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd;
+
+ /* If pipe backend is reset, need to reset pipe syncd status */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i];
+ pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx_old->stream)
+ continue;
+
+ if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+ continue;
+
+ if (!pipe_ctx->stream ||
+ pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
+
+ /* Reset all the syncd pipes from the disabled pipe */
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j];
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) ||
+ !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j);
+ }
+ }
+ }
+}
+
+void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc,
+ struct dc_state *context,
+ uint8_t disabled_master_pipe_idx)
+{
+ int i;
+ struct pipe_ctx *pipe_ctx, *pipe_ctx_check;
+
+ pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx];
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) ||
+ !IS_PIPE_SYNCD_VALID(pipe_ctx))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx);
+
+ /* for the pipe disabled, check if any slave pipe exists and assert */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_check = &context->res_ctx.pipe_ctx[i];
+
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) &&
+ IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx))
+ DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n",
+ i, disabled_master_pipe_idx);
+ }
+}
+
uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter)
{
/* TODO - get transmitter to phy idx mapping from DMUB */
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index da2c78ce14d6..288e7b01f561 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -344,6 +344,7 @@ struct dc_config {
uint8_t vblank_alignment_max_frame_time_diff;
bool is_asymmetric_memory;
bool is_single_rank_dimm;
+ bool use_pipe_ctx_sync_logic;
};
enum visual_confirm {
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 78192ecba102..f3ff141b706a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1566,6 +1566,10 @@ static enum dc_status apply_single_controller_ctx_to_hw(
&pipe_ctx->stream->audio_info);
}
+ /* make sure no pipes syncd to the pipe being enabled */
+ if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic)
+ check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx);
+
#if defined(CONFIG_DRM_AMD_DC_DCN)
/* DCN3.1 FPGA Workaround
* Need to enable HPO DP Stream Encoder before setting OTG master enable.
@@ -1604,7 +1608,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
pipe_ctx->stream_res.stream_enc,
pipe_ctx->stream_res.tg->inst);
- if (dc_is_dp_signal(pipe_ctx->stream->signal) &&
+ if (dc_is_embedded_signal(pipe_ctx->stream->signal) &&
pipe_ctx->stream_res.stream_enc->funcs->reset_fifo)
pipe_ctx->stream_res.stream_enc->funcs->reset_fifo(
pipe_ctx->stream_res.stream_enc);
@@ -2297,6 +2301,10 @@ enum dc_status dce110_apply_ctx_to_hw(
enum dc_status status;
int i;
+ /* reset syncd pipes from disabled pipes */
+ if (dc->config.use_pipe_ctx_sync_logic)
+ reset_syncd_pipes_from_disabled_pipes(dc, context);
+
/* Reset old context */
/* look up the targets that have been removed since last commit */
hws->funcs.reset_hw_ctx_wrap(dc, context);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 602ec9a08549..8ca26383b568 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -1878,7 +1878,6 @@ noinline bool dcn30_internal_validate_bw(
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
- DC_FP_START();
if (!pipe_cnt) {
out = true;
goto validate_out;
@@ -2104,7 +2103,6 @@ validate_fail:
out = false;
validate_out:
- DC_FP_END();
return out;
}
@@ -2306,7 +2304,9 @@ bool dcn30_validate_bandwidth(struct dc *dc,
BW_VAL_TRACE_COUNT();
+ DC_FP_START();
out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
+ DC_FP_END();
if (pipe_cnt == 0)
goto validate_out;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index c1c6e602b06c..5d9637b07429 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_clock_gate = true,
.disable_pplib_clock_request = true,
.disable_pplib_wm_range = true,
- .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
@@ -1380,6 +1380,17 @@ static void set_wm_ranges(
pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges);
}
+static void dcn301_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ DC_FP_START();
+ dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
+ DC_FP_END();
+}
+
static struct resource_funcs dcn301_res_pool_funcs = {
.destroy = dcn301_destroy_resource_pool,
.link_enc_create = dcn301_link_encoder_create,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 42ed47e8133d..8d64187478e4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -2260,6 +2260,9 @@ static bool dcn31_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
+
/* read VBIOS LTTPR caps */
{
if (ctx->dc_bios->funcs->get_lttpr_caps) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 246071c72f6b..548cdef8a8ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -1576,8 +1576,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index 015e7f2c0b16..0fc9f3e3ffae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -1577,8 +1577,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index 8bc27de4c104..618f4b682ab1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -1688,8 +1688,6 @@ void dml21_rq_dlg_get_dlg_reg(
mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index aef854270054..747167083dea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -1858,8 +1858,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 94c32832a0e7..0a7a33864973 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -327,7 +327,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
}
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
index fc7065d17842..774b0fdfc80b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
@@ -34,7 +34,7 @@ void dcn301_fpu_set_wm_ranges(int i,
void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index d46a2733024c..8f9f1d607f7c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -546,7 +546,6 @@ struct _vcs_dpi_display_dlg_sys_params_st {
double t_sr_wm_us;
double t_extra_us;
double mem_trip_us;
- double t_srx_delay_us;
double deepsleep_dcfclk_mhz;
double total_flip_bw;
unsigned int total_flip_bytes;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
index 71ea503cb32f..412e75eb4704 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
@@ -142,9 +142,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v
dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us);
dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us);
dml_print(
- "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n",
- dlg_sys_param->t_srx_delay_us);
- dml_print(
"DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n",
dlg_sys_param->deepsleep_dcfclk_mhz);
dml_print(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index 59dc2c5b58dd..3df559c591f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -1331,10 +1331,6 @@ void dml1_rq_dlg_get_dlg_params(
if (dual_plane)
DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c);
- DTRACE(
- "DLG: %s: t_srx_delay_us = %3.2f",
- __func__,
- (double) dlg_sys_param->t_srx_delay_us);
DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us);
DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset);
DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 890280026e69..943240e2809e 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -382,6 +382,7 @@ struct pipe_ctx {
struct pll_settings pll_settings;
uint8_t pipe_idx;
+ uint8_t pipe_idx_syncd;
struct pipe_ctx *top_pipe;
struct pipe_ctx *bottom_pipe;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 4249bf306e09..dbfe6690ded8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -34,6 +34,10 @@
#define MEMORY_TYPE_HBM 2
+#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0)
+#define GET_PIPE_SYNCD_FROM_PIPE(pipe) ((pipe)->pipe_idx_syncd & 0x7F)
+#define SET_PIPE_SYNCD_TO_PIPE(pipe, pipe_syncd) ((pipe)->pipe_idx_syncd = (0x80 | pipe_syncd))
+
enum dce_version resource_parse_asic_id(
struct hw_asic_id asic_id);
@@ -208,6 +212,13 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
const struct dc_link *link);
#endif
+void reset_syncd_pipes_from_disabled_pipes(struct dc *dc,
+ struct dc_state *context);
+
+void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc,
+ struct dc_state *context,
+ uint8_t disabled_master_pipe_idx);
+
uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter);
#endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
index 9ccafe007b23..c4b067d01895 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
@@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20(
}
}
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source)
-{
- const struct irq_source_info *info;
- uint32_t addr;
- uint32_t value;
- uint32_t current_status;
-
- info = find_irq_source_info(irq_service, source);
- if (!info)
- return 0;
-
- addr = info->status_reg;
- if (!addr)
- return 0;
-
- value = dm_read_reg(irq_service->ctx, addr);
- current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE);
-
- return current_status;
-}
-
static bool hpd_ack(
struct irq_service *irq_service,
const struct irq_source_info *info)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
index 4d69ab24ca25..aee4b37999f1 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
@@ -31,6 +31,4 @@
struct irq_service *dal_irq_service_dcn20_create(
struct irq_service_init_data *init_data);
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 235294534c43..0f15bcada4e9 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -134,31 +134,6 @@ static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_servic
return DC_IRQ_SOURCE_INVALID;
}
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source)
-{
- const struct irq_source_info *info;
- uint32_t addr;
- uint32_t value;
- uint32_t current_status;
-
- info = find_irq_source_info(irq_service, source);
- if (!info)
- return 0;
-
- addr = info->status_reg;
- if (!addr)
- return 0;
-
- value = dm_read_reg(irq_service->ctx, addr);
- current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE);
-
- return current_status;
-}
-
static bool hpd_ack(
struct irq_service *irq_service,
const struct irq_source_info *info)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
index 616470e32380..da2bd0e93d7a 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
@@ -31,6 +31,4 @@
struct irq_service *dal_irq_service_dcn21_create(
struct irq_service_init_data *init_data);
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 4db1133e4466..a2a4fbeb83f8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service **irq_service)
*irq_service = NULL;
}
-const struct irq_source_info *find_irq_source_info(
+static const struct irq_source_info *find_irq_source_info(
struct irq_service *irq_service,
enum dc_irq_source source)
{
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
index e60b82480093..dbfcb096eedd 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
@@ -69,10 +69,6 @@ struct irq_service {
const struct irq_service_funcs *funcs;
};
-const struct irq_source_info *find_irq_source_info(
- struct irq_service *irq_service,
- enum dc_irq_source source);
-
void dal_irq_service_construct(
struct irq_service *irq_service,
struct irq_service_init_data *init_data);
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index d9eb353a4bf0..dbe1cc620f6e 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -282,8 +282,6 @@ static const struct ast_vbios_enhtable res_1360x768[] = {
};
static const struct ast_vbios_enhtable res_1600x900[] = {
- {1800, 1600, 24, 80, 1000, 900, 1, 3, VCLK108, /* 60Hz */
- (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 3, 0x3A },
{1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75, /* 60Hz CVT RB */
(SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
AST2500PreCatchCRT), 60, 1, 0x3A },
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 21174efd91be..88cd992df356 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1327,8 +1327,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
drm_dbg_atomic(dev, "checking %p\n", state);
- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
- requested_crtc |= drm_crtc_mask(crtc);
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ if (new_crtc_state->enable)
+ requested_crtc |= drm_crtc_mask(crtc);
+ }
for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
ret = drm_atomic_plane_check(old_plane_state, new_plane_state);
@@ -1377,8 +1379,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
}
}
- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
- affected_crtc |= drm_crtc_mask(crtc);
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ if (new_crtc_state->enable)
+ affected_crtc |= drm_crtc_mask(crtc);
+ }
/*
* For commits that allow modesets drivers can add other CRTCs to the
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index f3d79eda94bb..8b3822142fed 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -5511,6 +5511,7 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
mutex_init(&mgr->probe_lock);
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
mutex_init(&mgr->topology_ref_history_lock);
+ stack_depot_init();
#endif
INIT_LIST_HEAD(&mgr->tx_msg_downq);
INIT_LIST_HEAD(&mgr->destroy_port_list);
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 7d1c578388d3..8257f9d4f619 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -980,6 +980,10 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
add_hole(&mm->head_node);
mm->scan_active = 0;
+
+#ifdef CONFIG_DRM_DEBUG_MM
+ stack_depot_init();
+#endif
}
EXPORT_SYMBOL(drm_mm_init);
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index c97323365675..918065982db4 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -107,6 +107,11 @@ static void __drm_stack_depot_print(depot_stack_handle_t stack_depot)
kfree(buf);
}
+
+static void __drm_stack_depot_init(void)
+{
+ stack_depot_init();
+}
#else /* CONFIG_DRM_DEBUG_MODESET_LOCK */
static depot_stack_handle_t __drm_stack_depot_save(void)
{
@@ -115,6 +120,9 @@ static depot_stack_handle_t __drm_stack_depot_save(void)
static void __drm_stack_depot_print(depot_stack_handle_t stack_depot)
{
}
+static void __drm_stack_depot_init(void)
+{
+}
#endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */
/**
@@ -359,6 +367,7 @@ void drm_modeset_lock_init(struct drm_modeset_lock *lock)
{
ww_mutex_init(&lock->mutex, &crtc_ww_class);
INIT_LIST_HEAD(&lock->head);
+ __drm_stack_depot_init();
}
EXPORT_SYMBOL(drm_modeset_lock_init);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 042bb80383c9..b910978d3e48 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -115,6 +115,12 @@ static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = {
.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
};
+static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = {
+ .width = 1600,
+ .height = 2560,
+ .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP,
+};
+
static const struct dmi_system_id orientation_data[] = {
{ /* Acer One 10 (S1003) */
.matches = {
@@ -275,6 +281,12 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"),
},
.driver_data = (void *)&onegx1_pro,
+ }, { /* OneXPlayer */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"),
+ },
+ .driver_data = (void *)&lcd1600x2560_leftside_up,
}, { /* Samsung GalaxyBook 10.6 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
diff --git a/drivers/gpu/drm/drm_privacy_screen_x86.c b/drivers/gpu/drm/drm_privacy_screen_x86.c
index a2cafb294ca6..e7aa74ad0b24 100644
--- a/drivers/gpu/drm/drm_privacy_screen_x86.c
+++ b/drivers/gpu/drm/drm_privacy_screen_x86.c
@@ -33,6 +33,9 @@ static bool __init detect_thinkpad_privacy_screen(void)
unsigned long long output;
acpi_status status;
+ if (acpi_disabled)
+ return false;
+
/* Get embedded-controller handle */
status = acpi_get_devices("PNP0C09", acpi_set_handle, NULL, &ec_handle);
if (ACPI_FAILURE(status) || !ec_handle)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index b03c20c14ca1..a17313282e8b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -469,8 +469,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
return -EINVAL;
}
- if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K ||
- args->nr_bos > SZ_64K || args->nr_pmrs > 128) {
+ if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K ||
+ args->nr_bos > SZ_128K || args->nr_pmrs > 128) {
DRM_ERROR("submit arguments out of size limits\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index ba5fd012a40a..37018bc55810 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1047,7 +1047,7 @@ pm_put:
void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
{
- unsigned int i = 0;
+ unsigned int i;
dev_err(gpu->dev, "recover hung GPU!\n");
@@ -1060,7 +1060,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
/* complete all events, the GPU won't do it after the reset */
spin_lock(&gpu->event_spinlock);
- for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS)
+ for_each_set_bit(i, gpu->event_bitmap, ETNA_NR_EVENTS)
complete(&gpu->event_free);
bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS);
spin_unlock(&gpu->event_spinlock);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 9c9d574f0b8c..cab505277595 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -1298,6 +1298,28 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder,
intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port),
DKL_TX_DP20BITMODE, 0);
+
+ if (IS_ALDERLAKE_P(dev_priv)) {
+ u32 val;
+
+ if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
+ if (ln == 0) {
+ val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0);
+ val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(2);
+ } else {
+ val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(3);
+ val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(3);
+ }
+ } else {
+ val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0);
+ val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(0);
+ }
+
+ intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port),
+ DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK |
+ DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK,
+ val);
+ }
}
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
index 1e689d573512..e2dfb93a82bd 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
@@ -477,14 +477,14 @@ static const struct intel_ddi_buf_trans icl_combo_phy_trans_hdmi = {
static const union intel_ddi_buf_trans_entry _ehl_combo_phy_trans_dp[] = {
/* NT mV Trans mV db */
{ .icl = { 0xA, 0x33, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */
- { .icl = { 0xA, 0x47, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */
- { .icl = { 0xC, 0x64, 0x34, 0x00, 0x0B } }, /* 350 700 6.0 */
- { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 350 900 8.2 */
+ { .icl = { 0xA, 0x47, 0x38, 0x00, 0x07 } }, /* 350 500 3.1 */
+ { .icl = { 0xC, 0x64, 0x33, 0x00, 0x0C } }, /* 350 700 6.0 */
+ { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 350 900 8.2 */
{ .icl = { 0xA, 0x46, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */
- { .icl = { 0xC, 0x64, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */
+ { .icl = { 0xC, 0x64, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */
{ .icl = { 0x6, 0x7F, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */
{ .icl = { 0xC, 0x61, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */
- { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */
+ { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 600 900 3.5 */
{ .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 4b4829eb16c2..0dd107dcecc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -311,6 +311,7 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(6)
#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(8)
+#define I915_BO_WAS_BOUND_BIT 9
/**
* @mem_flags - Mutable placement-related flags
*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 9f429ed6e78a..a50f884973bc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -10,6 +10,8 @@
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
+#include "gt/intel_gt.h"
+
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -221,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+ intel_gt_invalidate_tlbs(to_gt(i915));
+ }
+
return pages;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f98f0fb21efb..35d0fcd3a86c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
spin_lock_init(&gt->irq_lock);
+ mutex_init(&gt->tlb_invalidate_lock);
+
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
+
+struct reg_and_bit {
+ i915_reg_t reg;
+ u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+ const i915_reg_t *regs, const unsigned int num)
+{
+ const unsigned int class = engine->class;
+ struct reg_and_bit rb = { };
+
+ if (drm_WARN_ON_ONCE(&engine->i915->drm,
+ class >= num || !regs[class].reg))
+ return rb;
+
+ rb.reg = regs[class];
+ if (gen8 && class == VIDEO_DECODE_CLASS)
+ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+ else
+ rb.bit = engine->instance;
+
+ rb.bit = BIT(rb.bit);
+
+ return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+ static const i915_reg_t gen8_regs[] = {
+ [RENDER_CLASS] = GEN8_RTCR,
+ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
+ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
+ [COPY_ENGINE_CLASS] = GEN8_BTCR,
+ };
+ static const i915_reg_t gen12_regs[] = {
+ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
+ };
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const i915_reg_t *regs;
+ unsigned int num = 0;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (GRAPHICS_VER(i915) == 12) {
+ regs = gen12_regs;
+ num = ARRAY_SIZE(gen12_regs);
+ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+ regs = gen8_regs;
+ num = ARRAY_SIZE(gen8_regs);
+ } else if (GRAPHICS_VER(i915) < 8) {
+ return;
+ }
+
+ if (drm_WARN_ONCE(&i915->drm, !num,
+ "Platform does not implement TLB invalidation!"))
+ return;
+
+ GEM_TRACE("\n");
+
+ assert_rpm_wakelock_held(&i915->runtime_pm);
+
+ mutex_lock(&gt->tlb_invalidate_lock);
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ for_each_engine(engine, gt, id) {
+ /*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+ const unsigned int timeout_us = 100;
+ const unsigned int timeout_ms = 4;
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ if (__intel_wait_for_register_fw(uncore,
+ rb.reg, rb.bit, 0,
+ timeout_us, timeout_ms,
+ NULL))
+ drm_err_ratelimited(&gt->i915->drm,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, timeout_ms);
+ }
+
+ /*
+ * Use delayed put since a) we mostly expect a flurry of TLB
+ * invalidations so it is good to avoid paying the forcewake cost and
+ * b) it works around a bug in Icelake which cannot cope with too rapid
+ * transitions.
+ */
+ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+ mutex_unlock(&gt->tlb_invalidate_lock);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 3ace129eb2af..a913fb6ffec3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -91,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 14216cc471b1..f20687796490 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -73,6 +73,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct mutex tlb_invalidate_lock;
+
struct i915_wa_list wa_list;
struct intel_gt_timelines {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 170bba913c30..e27f3b7cf094 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4273,26 +4273,6 @@ static struct ctl_table oa_table[] = {
{}
};
-static struct ctl_table i915_root[] = {
- {
- .procname = "i915",
- .maxlen = 0,
- .mode = 0555,
- .child = oa_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = i915_root,
- },
- {}
-};
-
static void oa_init_supported_formats(struct i915_perf *perf)
{
struct drm_i915_private *i915 = perf->i915;
@@ -4488,7 +4468,7 @@ static int destroy_config(int id, void *p, void *data)
int i915_perf_sysctl_register(void)
{
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl("dev/i915", oa_table);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4c28dadf8d69..c32420cb8ed5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2721,6 +2721,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
#define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
+#define GEN8_RTCR _MMIO(0x4260)
+#define GEN8_M1TCR _MMIO(0x4264)
+#define GEN8_M2TCR _MMIO(0x4268)
+#define GEN8_BTCR _MMIO(0x426c)
+#define GEN8_VTCR _MMIO(0x4270)
+
#if 0
#define PRB0_TAIL _MMIO(0x2030)
#define PRB0_HEAD _MMIO(0x2034)
@@ -2819,6 +2825,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define FAULT_VA_HIGH_BITS (0xf << 0)
#define FAULT_GTT_SEL (1 << 4)
+#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
+
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
#define FPGA_DBG _MMIO(0x42300)
@@ -11166,8 +11177,12 @@ enum skl_power_gate {
_DKL_PHY2_BASE) + \
_DKL_TX_DPCNTL1)
-#define _DKL_TX_DPCNTL2 0x2C8
-#define DKL_TX_DP20BITMODE (1 << 2)
+#define _DKL_TX_DPCNTL2 0x2C8
+#define DKL_TX_DP20BITMODE REG_BIT(2)
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK REG_GENMASK(4, 3)
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK, (val))
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK REG_GENMASK(6, 5)
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, (val))
#define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \
_DKL_PHY1_BASE, \
_DKL_PHY2_BASE) + \
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 29a858c53bdd..c0d6d5526abe 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -457,6 +457,9 @@ int i915_vma_bind(struct i915_vma *vma,
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
+ if (vma->obj)
+ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
atomic_or(bind_flags, &vma->flags);
return 0;
}
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 22dab36afcb6..53f1ccb78849 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -68,6 +68,9 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
spin_lock_init(&rpm->debug.lock);
+
+ if (rpm->available)
+ stack_depot_init();
}
static noinline depot_stack_handle_t
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index fc25ebf1a593..778da3179b3c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
}
static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
- enum forcewake_domains fw_domains)
+ enum forcewake_domains fw_domains,
+ bool delayed)
{
struct intel_uncore_forcewake_domain *domain;
unsigned int tmp;
@@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
continue;
}
- fw_domains_put(uncore, domain->mask);
+ if (delayed &&
+ !(domain->uncore->fw_domains_timer & domain->mask))
+ fw_domain_arm_timer(domain);
+ else
+ fw_domains_put(uncore, domain->mask);
}
}
@@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
return;
spin_lock_irqsave(&uncore->lock, irqflags);
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
+ spin_unlock_irqrestore(&uncore->lock, irqflags);
+}
+
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains fw_domains)
+{
+ unsigned long irqflags;
+
+ if (!uncore->fw_get_funcs)
+ return;
+
+ spin_lock_irqsave(&uncore->lock, irqflags);
+ __intel_uncore_forcewake_put(uncore, fw_domains, true);
spin_unlock_irqrestore(&uncore->lock, irqflags);
}
@@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
if (!uncore->fw_get_funcs)
return;
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
}
void assert_forcewakes_inactive(struct intel_uncore *uncore)
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 210fe2a71612..2a15b2b2e2fc 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -246,6 +246,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
enum forcewake_domains domains);
void intel_uncore_forcewake_put(struct intel_uncore *uncore,
enum forcewake_domains domains);
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains domains);
void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
enum forcewake_domains fw_domains);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 51b83776951b..17cfad6424db 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1560,6 +1560,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
for (i = 0; i < gpu->nr_rings; i++)
a6xx_gpu->shadow[i] = 0;
+ gpu->suspend_count++;
+
return 0;
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 93005839b5da..fb261930ad1c 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -608,9 +608,27 @@ static int adreno_resume(struct device *dev)
return gpu->funcs->pm_resume(gpu);
}
+static int active_submits(struct msm_gpu *gpu)
+{
+ int active_submits;
+ mutex_lock(&gpu->active_lock);
+ active_submits = gpu->active_submits;
+ mutex_unlock(&gpu->active_lock);
+ return active_submits;
+}
+
static int adreno_suspend(struct device *dev)
{
struct msm_gpu *gpu = dev_to_gpu(dev);
+ int remaining;
+
+ remaining = wait_event_timeout(gpu->retire_event,
+ active_submits(gpu) == 0,
+ msecs_to_jiffies(1000));
+ if (remaining == 0) {
+ dev_err(dev, "Timeout waiting for GPU to suspend\n");
+ return -EBUSY;
+ }
return gpu->funcs->pm_suspend(gpu);
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
index a98e964c3b6f..355894a3b48c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
@@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx,
struct dpu_hw_pcc_cfg *cfg)
{
- u32 base = ctx->cap->sblk->pcc.base;
+ u32 base;
- if (!ctx || !base) {
+ if (!ctx) {
+ DRM_ERROR("invalid ctx %pK\n", ctx);
+ return;
+ }
+
+ base = ctx->cap->sblk->pcc.base;
+
+ if (!base) {
DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base);
return;
}
diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 052548883d27..0fe02529b5e7 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi)
of_node_put(phy_node);
- if (!phy_pdev || !msm_dsi->phy) {
+ if (!phy_pdev) {
+ DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__);
+ return -EPROBE_DEFER;
+ }
+ if (!msm_dsi->phy) {
+ put_device(&phy_pdev->dev);
DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__);
return -EPROBE_DEFER;
}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index c2ed177717c7..2027b38617ab 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -808,12 +808,14 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy,
struct msm_dsi_phy_clk_request *clk_req,
struct msm_dsi_phy_shared_timings *shared_timings)
{
- struct device *dev = &phy->pdev->dev;
+ struct device *dev;
int ret;
if (!phy || !phy->cfg->ops.enable)
return -EINVAL;
+ dev = &phy->pdev->dev;
+
ret = dsi_phy_enable_resource(phy);
if (ret) {
DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n",
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 3acdeae25caf..719720709e9e 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi)
of_node_put(phy_node);
- if (!phy_pdev || !hdmi->phy) {
+ if (!phy_pdev) {
DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
return -EPROBE_DEFER;
}
+ if (!hdmi->phy) {
+ DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
+ put_device(&phy_pdev->dev);
+ return -EPROBE_DEFER;
+ }
hdmi->phy_dev = get_device(&phy_pdev->dev);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ad35a5d94053..555666e3f960 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -461,7 +461,7 @@ static int msm_init_vram(struct drm_device *dev)
of_node_put(node);
if (ret)
return ret;
- size = r.end - r.start;
+ size = r.end - r.start + 1;
DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start);
/* if we have no IOMMU, then we need to use carveout allocator.
@@ -510,7 +510,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
struct msm_drm_private *priv = dev_get_drvdata(dev);
struct drm_device *ddev;
struct msm_kms *kms;
- struct msm_mdss *mdss;
int ret, i;
ddev = drm_dev_alloc(drv, dev);
@@ -521,8 +520,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
ddev->dev_private = priv;
priv->dev = ddev;
- mdss = priv->mdss;
-
priv->wq = alloc_ordered_workqueue("msm", 0);
priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 0f78c2615272..2c1049c0ea14 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -703,6 +703,8 @@ static void retire_submits(struct msm_gpu *gpu)
}
}
}
+
+ wake_up_all(&gpu->retire_event);
}
static void retire_worker(struct kthread_work *work)
@@ -848,6 +850,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
INIT_LIST_HEAD(&gpu->active_list);
mutex_init(&gpu->active_lock);
mutex_init(&gpu->lock);
+ init_waitqueue_head(&gpu->retire_event);
kthread_init_work(&gpu->retire_work, retire_worker);
kthread_init_work(&gpu->recover_work, recover_worker);
kthread_init_work(&gpu->fault_work, fault_worker);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 445c6bfd4b6b..92aa1e9196c6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -230,6 +230,9 @@ struct msm_gpu {
/* work for handling GPU recovery: */
struct kthread_work recover_work;
+ /** retire_event: notified when submits are retired: */
+ wait_queue_head_t retire_event;
+
/* work for handling active-list retiring: */
struct kthread_work retire_work;
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
index 62405e980925..9bf319be11f6 100644
--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
@@ -133,6 +133,18 @@ void msm_devfreq_init(struct msm_gpu *gpu)
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
}
+static void cancel_idle_work(struct msm_gpu_devfreq *df)
+{
+ hrtimer_cancel(&df->idle_work.timer);
+ kthread_cancel_work_sync(&df->idle_work.work);
+}
+
+static void cancel_boost_work(struct msm_gpu_devfreq *df)
+{
+ hrtimer_cancel(&df->boost_work.timer);
+ kthread_cancel_work_sync(&df->boost_work.work);
+}
+
void msm_devfreq_cleanup(struct msm_gpu *gpu)
{
struct msm_gpu_devfreq *df = &gpu->devfreq;
@@ -152,7 +164,12 @@ void msm_devfreq_resume(struct msm_gpu *gpu)
void msm_devfreq_suspend(struct msm_gpu *gpu)
{
- devfreq_suspend_device(gpu->devfreq.devfreq);
+ struct msm_gpu_devfreq *df = &gpu->devfreq;
+
+ devfreq_suspend_device(df->devfreq);
+
+ cancel_idle_work(df);
+ cancel_boost_work(df);
}
static void msm_devfreq_boost_work(struct kthread_work *work)
@@ -196,7 +213,7 @@ void msm_devfreq_active(struct msm_gpu *gpu)
/*
* Cancel any pending transition to idle frequency:
*/
- hrtimer_cancel(&df->idle_work.timer);
+ cancel_idle_work(df);
idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time));
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index e2488559cc9f..11ad210919c8 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -666,18 +666,18 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
if (unlikely(!fpriv)) {
r = -ENOMEM;
- goto out_suspend;
+ goto err_suspend;
}
if (rdev->accel_working) {
vm = &fpriv->vm;
r = radeon_vm_init(rdev, vm);
if (r)
- goto out_fpriv;
+ goto err_fpriv;
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
if (r)
- goto out_vm_fini;
+ goto err_vm_fini;
/* map the ib pool buffer read only into
* virtual address space */
@@ -685,7 +685,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
rdev->ring_tmp_bo.bo);
if (!vm->ib_bo_va) {
r = -ENOMEM;
- goto out_vm_fini;
+ goto err_vm_fini;
}
r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va,
@@ -693,19 +693,21 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_SNOOPED);
if (r)
- goto out_vm_fini;
+ goto err_vm_fini;
}
file_priv->driver_priv = fpriv;
}
- if (!r)
- goto out_suspend;
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+ return 0;
-out_vm_fini:
+err_vm_fini:
radeon_vm_fini(rdev, vm);
-out_fpriv:
+err_fpriv:
kfree(fpriv);
-out_suspend:
+
+err_suspend:
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return r;
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index a229da58962a..9300d3354c51 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -1262,7 +1262,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
struct mipi_dsi_device *device)
{
struct vc4_dsi *dsi = host_to_dsi(host);
- int ret;
dsi->lanes = device->lanes;
dsi->channel = device->channel;
@@ -1297,18 +1296,15 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
return 0;
}
- ret = component_add(&dsi->pdev->dev, &vc4_dsi_ops);
- if (ret) {
- mipi_dsi_host_unregister(&dsi->dsi_host);
- return ret;
- }
-
- return 0;
+ return component_add(&dsi->pdev->dev, &vc4_dsi_ops);
}
static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
struct mipi_dsi_device *device)
{
+ struct vc4_dsi *dsi = host_to_dsi(host);
+
+ component_del(&dsi->pdev->dev, &vc4_dsi_ops);
return 0;
}
@@ -1686,9 +1682,7 @@ static int vc4_dsi_dev_remove(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct vc4_dsi *dsi = dev_get_drvdata(dev);
- component_del(&pdev->dev, &vc4_dsi_ops);
mipi_dsi_host_unregister(&dsi->dsi_host);
-
return 0;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index d6b66636a19b..ea3ecdda561d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1140,15 +1140,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
struct vmw_private *dev_priv,
struct vmw_fence_obj **p_fence,
uint32_t *p_handle);
-extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
struct vmw_fpriv *vmw_fp,
int ret,
struct drm_vmw_fence_rep __user
*user_fence_rep,
struct vmw_fence_obj *fence,
uint32_t fence_handle,
- int32_t out_fence_fd,
- struct sync_file *sync_file);
+ int32_t out_fence_fd);
bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd);
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 44ca23b0ea4e..dd2ff441068e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3879,17 +3879,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
* Also if copying fails, user-space will be unable to signal the fence object
* so we wait for it immediately, and then unreference the user-space reference.
*/
-void
+int
vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
struct vmw_fpriv *vmw_fp, int ret,
struct drm_vmw_fence_rep __user *user_fence_rep,
struct vmw_fence_obj *fence, uint32_t fence_handle,
- int32_t out_fence_fd, struct sync_file *sync_file)
+ int32_t out_fence_fd)
{
struct drm_vmw_fence_rep fence_rep;
if (user_fence_rep == NULL)
- return;
+ return 0;
memset(&fence_rep, 0, sizeof(fence_rep));
@@ -3917,19 +3917,13 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
* handle.
*/
if (unlikely(ret != 0) && (fence_rep.error == 0)) {
- if (sync_file)
- fput(sync_file->file);
-
- if (fence_rep.fd != -1) {
- put_unused_fd(fence_rep.fd);
- fence_rep.fd = -1;
- }
-
ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle);
VMW_DEBUG_USER("Fence copy error. Syncing.\n");
(void) vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
}
+
+ return ret ? -EFAULT : 0;
}
/**
@@ -4266,16 +4260,23 @@ int vmw_execbuf_process(struct drm_file *file_priv,
(void) vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
+ }
+ }
+
+ ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
+ user_fence_rep, fence, handle, out_fence_fd);
+
+ if (sync_file) {
+ if (ret) {
+ /* usercopy of fence failed, put the file object */
+ fput(sync_file->file);
+ put_unused_fd(out_fence_fd);
} else {
/* Link the fence with the FD created earlier */
fd_install(out_fence_fd, sync_file->file);
}
}
- vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
- user_fence_rep, fence, handle, out_fence_fd,
- sync_file);
-
/* Don't unreference when handing fence out */
if (unlikely(out_fence != NULL)) {
*out_fence = fence;
@@ -4293,7 +4294,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
*/
vmw_validation_unref_lists(&val_ctx);
- return 0;
+ return ret;
out_unlock_binding:
mutex_unlock(&dev_priv->binding_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index c60d395f9e2e..5001b87aebe8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1128,7 +1128,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
}
vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
- handle, -1, NULL);
+ handle, -1);
vmw_fence_obj_unreference(&fence);
return 0;
out_no_create:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 4e693e8de2c3..bbd2f4ec08ec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2501,7 +2501,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
if (file_priv)
vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv),
ret, user_fence_rep, fence,
- handle, -1, NULL);
+ handle, -1);
if (out_fence)
*out_fence = fence;
else
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 26cee452ec44..85975031389b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -400,6 +400,7 @@
#define USB_DEVICE_ID_HP_X2 0x074d
#define USB_DEVICE_ID_HP_X2_10_COVER 0x0755
#define I2C_DEVICE_ID_HP_ENVY_X360_15 0x2d05
+#define I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100 0x29CF
#define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817
#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544
#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 1ce75e8b49d5..112901d2d8d2 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -330,6 +330,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15),
HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100),
+ HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c
index 72957a9f7117..efa6140915f4 100644
--- a/drivers/hid/hid-vivaldi.c
+++ b/drivers/hid/hid-vivaldi.c
@@ -6,16 +6,17 @@
* Author: Sean O'Brien <seobrien@chromium.org>
*/
+#include <linux/device.h>
#include <linux/hid.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sysfs.h>
#define MIN_FN_ROW_KEY 1
#define MAX_FN_ROW_KEY 24
#define HID_VD_FN_ROW_PHYSMAP 0x00000001
#define HID_USAGE_FN_ROW_PHYSMAP (HID_UP_GOOGLEVENDOR | HID_VD_FN_ROW_PHYSMAP)
-static struct hid_driver hid_vivaldi;
-
struct vivaldi_data {
u32 function_row_physmap[MAX_FN_ROW_KEY - MIN_FN_ROW_KEY + 1];
int max_function_row_key;
@@ -40,7 +41,7 @@ static ssize_t function_row_physmap_show(struct device *dev,
return size;
}
-DEVICE_ATTR_RO(function_row_physmap);
+static DEVICE_ATTR_RO(function_row_physmap);
static struct attribute *sysfs_attrs[] = {
&dev_attr_function_row_physmap.attr,
NULL
@@ -74,10 +75,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
struct hid_usage *usage)
{
struct vivaldi_data *drvdata = hid_get_drvdata(hdev);
+ struct hid_report *report = field->report;
int fn_key;
int ret;
u32 report_len;
- u8 *buf;
+ u8 *report_data, *buf;
if (field->logical != HID_USAGE_FN_ROW_PHYSMAP ||
(usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL)
@@ -89,12 +91,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
if (fn_key > drvdata->max_function_row_key)
drvdata->max_function_row_key = fn_key;
- buf = hid_alloc_report_buf(field->report, GFP_KERNEL);
- if (!buf)
+ report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL);
+ if (!report_data)
return;
- report_len = hid_report_len(field->report);
- ret = hid_hw_raw_request(hdev, field->report->id, buf,
+ report_len = hid_report_len(report);
+ if (!report->id) {
+ /*
+ * hid_hw_raw_request() will stuff report ID (which will be 0)
+ * into the first byte of the buffer even for unnumbered
+ * reports, so we need to account for this to avoid getting
+ * -EOVERFLOW in return.
+ * Note that hid_alloc_report_buf() adds 7 bytes to the size
+ * so we can safely say that we have space for an extra byte.
+ */
+ report_len++;
+ }
+
+ ret = hid_hw_raw_request(hdev, report->id, report_data,
report_len, HID_FEATURE_REPORT,
HID_REQ_GET_REPORT);
if (ret < 0) {
@@ -103,7 +117,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
goto out;
}
- ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
+ if (!report->id) {
+ /*
+ * Undo the damage from hid_hw_raw_request() for unnumbered
+ * reports.
+ */
+ report_data++;
+ report_len--;
+ }
+
+ ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data,
report_len, 0);
if (ret) {
dev_warn(&hdev->dev, "failed to report feature %d\n",
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 8fe3efcb8327..614adb510dbd 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -28,11 +28,22 @@
struct uhid_device {
struct mutex devlock;
+
+ /* This flag tracks whether the HID device is usable for commands from
+ * userspace. The flag is already set before hid_add_device(), which
+ * runs in workqueue context, to allow hid_add_device() to communicate
+ * with userspace.
+ * However, if hid_add_device() fails, the flag is cleared without
+ * holding devlock.
+ * We guarantee that if @running changes from true to false while you're
+ * holding @devlock, it's still fine to access @hid.
+ */
bool running;
__u8 *rd_data;
uint rd_size;
+ /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */
struct hid_device *hid;
struct uhid_event input_buf;
@@ -63,9 +74,18 @@ static void uhid_device_add_worker(struct work_struct *work)
if (ret) {
hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret);
- hid_destroy_device(uhid->hid);
- uhid->hid = NULL;
- uhid->running = false;
+ /* We used to call hid_destroy_device() here, but that's really
+ * messy to get right because we have to coordinate with
+ * concurrent writes from userspace that might be in the middle
+ * of using uhid->hid.
+ * Just leave uhid->hid as-is for now, and clean it up when
+ * userspace tries to close or reinitialize the uhid instance.
+ *
+ * However, we do have to clear the ->running flag and do a
+ * wakeup to make sure userspace knows that the device is gone.
+ */
+ WRITE_ONCE(uhid->running, false);
+ wake_up_interruptible(&uhid->report_wait);
}
}
@@ -174,9 +194,9 @@ static int __uhid_report_queue_and_wait(struct uhid_device *uhid,
spin_unlock_irqrestore(&uhid->qlock, flags);
ret = wait_event_interruptible_timeout(uhid->report_wait,
- !uhid->report_running || !uhid->running,
+ !uhid->report_running || !READ_ONCE(uhid->running),
5 * HZ);
- if (!ret || !uhid->running || uhid->report_running)
+ if (!ret || !READ_ONCE(uhid->running) || uhid->report_running)
ret = -EIO;
else if (ret < 0)
ret = -ERESTARTSYS;
@@ -217,7 +237,7 @@ static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum,
struct uhid_event *ev;
int ret;
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EIO;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -259,7 +279,7 @@ static int uhid_hid_set_report(struct hid_device *hid, unsigned char rnum,
struct uhid_event *ev;
int ret;
- if (!uhid->running || count > UHID_DATA_MAX)
+ if (!READ_ONCE(uhid->running) || count > UHID_DATA_MAX)
return -EIO;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -474,7 +494,7 @@ static int uhid_dev_create2(struct uhid_device *uhid,
void *rd_data;
int ret;
- if (uhid->running)
+ if (uhid->hid)
return -EALREADY;
rd_size = ev->u.create2.rd_size;
@@ -556,15 +576,16 @@ static int uhid_dev_create(struct uhid_device *uhid,
static int uhid_dev_destroy(struct uhid_device *uhid)
{
- if (!uhid->running)
+ if (!uhid->hid)
return -EINVAL;
- uhid->running = false;
+ WRITE_ONCE(uhid->running, false);
wake_up_interruptible(&uhid->report_wait);
cancel_work_sync(&uhid->worker);
hid_destroy_device(uhid->hid);
+ uhid->hid = NULL;
kfree(uhid->rd_data);
return 0;
@@ -572,7 +593,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid)
static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input.data,
@@ -583,7 +604,7 @@ static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input2.data,
@@ -595,7 +616,7 @@ static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
static int uhid_dev_get_report_reply(struct uhid_device *uhid,
struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
uhid_report_wake_up(uhid, ev->u.get_report_reply.id, ev);
@@ -605,7 +626,7 @@ static int uhid_dev_get_report_reply(struct uhid_device *uhid,
static int uhid_dev_set_report_reply(struct uhid_device *uhid,
struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
uhid_report_wake_up(uhid, ev->u.set_report_reply.id, ev);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 2a4cc39962e7..a7176fc0635d 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2588,6 +2588,24 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
}
}
+static bool wacom_wac_slot_is_active(struct input_dev *dev, int key)
+{
+ struct input_mt *mt = dev->mt;
+ struct input_mt_slot *s;
+
+ if (!mt)
+ return false;
+
+ for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
+ if (s->key == key &&
+ input_mt_get_value(s, ABS_MT_TRACKING_ID) >= 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void wacom_wac_finger_event(struct hid_device *hdev,
struct hid_field *field, struct hid_usage *usage, __s32 value)
{
@@ -2638,9 +2656,14 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
}
if (usage->usage_index + 1 == field->report_count) {
- if (equivalent_usage == wacom_wac->hid_data.last_slot_field &&
- wacom_wac->hid_data.confidence)
- wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
+ if (equivalent_usage == wacom_wac->hid_data.last_slot_field) {
+ bool touch_removed = wacom_wac_slot_is_active(wacom_wac->touch_input,
+ wacom_wac->hid_data.id) && !wacom_wac->hid_data.tipswitch;
+
+ if (wacom_wac->hid_data.confidence || touch_removed) {
+ wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
+ }
+ }
}
}
@@ -2659,6 +2682,10 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
hid_data->confidence = true;
+ hid_data->cc_report = 0;
+ hid_data->cc_index = -1;
+ hid_data->cc_value_index = -1;
+
for (i = 0; i < report->maxfield; i++) {
struct hid_field *field = report->field[i];
int j;
@@ -2692,11 +2719,14 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
hid_data->cc_index >= 0) {
struct hid_field *field = report->field[hid_data->cc_index];
int value = field->value[hid_data->cc_value_index];
- if (value)
+ if (value) {
hid_data->num_expected = value;
+ hid_data->num_received = 0;
+ }
}
else {
hid_data->num_expected = wacom_wac->features.touch_max;
+ hid_data->num_received = 0;
}
}
@@ -2724,6 +2754,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev,
input_sync(input);
wacom_wac->hid_data.num_received = 0;
+ wacom_wac->hid_data.num_expected = 0;
/* keep touch state for pen event */
wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac);
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index ca873a3b98db..f2d05bff4245 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1660,6 +1660,13 @@ static int balloon_connect_vsp(struct hv_device *dev)
unsigned long t;
int ret;
+ /*
+ * max_pkt_size should be large enough for one vmbus packet header plus
+ * our receive buffer size. Hyper-V sends messages up to
+ * HV_HYP_PAGE_SIZE bytes long on balloon channel.
+ */
+ dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
+
ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
balloon_onchannelcallback, dev);
if (ret)
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index d519aca4a9d6..fb6d14d213a1 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -662,6 +662,9 @@ static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val
struct adt7470_data *data = dev_get_drvdata(dev);
int err;
+ if (val <= 0)
+ return -EINVAL;
+
val = FAN_RPM_TO_PERIOD(val);
val = clamp_val(val, 1, 65534);
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index d401f9acf450..9949eeb79378 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -451,7 +451,7 @@ static int i8k_get_power_status(void)
static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
{
- struct dell_smm_data *data = PDE_DATA(file_inode(fp));
+ struct dell_smm_data *data = pde_data(file_inode(fp));
int __user *argp = (int __user *)arg;
int speed, err;
int val = 0;
@@ -585,7 +585,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
static int i8k_open_fs(struct inode *inode, struct file *file)
{
- return single_open(file, i8k_proc_show, PDE_DATA(inode));
+ return single_open(file, i8k_proc_show, pde_data(inode));
}
static const struct proc_ops i8k_proc_ops = {
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 74019dff2550..1c9493c70813 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -373,7 +373,7 @@ static const struct lm90_params lm90_params[] = {
.flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
| LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT,
.alert_alarms = 0x7c,
- .max_convrate = 8,
+ .max_convrate = 7,
},
[lm86] = {
.flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
@@ -394,12 +394,13 @@ static const struct lm90_params lm90_params[] = {
.max_convrate = 9,
},
[max6646] = {
- .flags = LM90_HAVE_CRIT,
+ .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT,
.alert_alarms = 0x7c,
.max_convrate = 6,
.reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
},
[max6654] = {
+ .flags = LM90_HAVE_BROKEN_ALERT,
.alert_alarms = 0x7c,
.max_convrate = 7,
.reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
@@ -418,7 +419,7 @@ static const struct lm90_params lm90_params[] = {
},
[max6680] = {
.flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT
- | LM90_HAVE_CRIT_ALRM_SWP,
+ | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT,
.alert_alarms = 0x7c,
.max_convrate = 7,
},
@@ -848,7 +849,7 @@ static int lm90_update_device(struct device *dev)
* Re-enable ALERT# output if it was originally enabled and
* relevant alarms are all clear
*/
- if (!(data->config_orig & 0x80) &&
+ if ((client->irq || !(data->config_orig & 0x80)) &&
!(data->alarms & data->alert_alarms)) {
if (data->config & 0x80) {
dev_dbg(&client->dev, "Re-enabling ALERT#\n");
@@ -1807,22 +1808,22 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status)
if (st & LM90_STATUS_LLOW)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_min, 0);
+ hwmon_temp_min_alarm, 0);
if (st & LM90_STATUS_RLOW)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_min, 1);
+ hwmon_temp_min_alarm, 1);
if (st2 & MAX6696_STATUS2_R2LOW)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_min, 2);
+ hwmon_temp_min_alarm, 2);
if (st & LM90_STATUS_LHIGH)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_max, 0);
+ hwmon_temp_max_alarm, 0);
if (st & LM90_STATUS_RHIGH)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_max, 1);
+ hwmon_temp_max_alarm, 1);
if (st2 & MAX6696_STATUS2_R2HIGH)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_max, 2);
+ hwmon_temp_max_alarm, 2);
return true;
}
diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
index 2a4bed0ab226..7352d2b3c756 100644
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -248,8 +248,7 @@ static int ltc2992_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask
gpio_status = reg;
- gpio_nr = 0;
- for_each_set_bit_from(gpio_nr, mask, LTC2992_GPIO_NR) {
+ for_each_set_bit(gpio_nr, mask, LTC2992_GPIO_NR) {
if (test_bit(LTC2992_GPIO_BIT(gpio_nr), &gpio_status))
set_bit(gpio_nr, bits);
}
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index fd3f91cb01c6..098d12b9ecda 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -1175,7 +1175,7 @@ static inline u8 in_to_reg(u32 val, u8 nr)
struct nct6775_data {
int addr; /* IO base of hw monitor block */
- int sioreg; /* SIO register address */
+ struct nct6775_sio_data *sio_data;
enum kinds kind;
const char *name;
@@ -3559,7 +3559,7 @@ clear_caseopen(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct nct6775_data *data = dev_get_drvdata(dev);
- struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
+ struct nct6775_sio_data *sio_data = data->sio_data;
int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE;
unsigned long val;
u8 reg;
@@ -3967,7 +3967,7 @@ static int nct6775_probe(struct platform_device *pdev)
return -ENOMEM;
data->kind = sio_data->kind;
- data->sioreg = sio_data->sioreg;
+ data->sio_data = sio_data;
if (sio_data->access == access_direct) {
data->addr = res->start;
diff --git a/drivers/hwmon/pmbus/ir38064.c b/drivers/hwmon/pmbus/ir38064.c
index 0ea7e1c18bdc..09276e397194 100644
--- a/drivers/hwmon/pmbus/ir38064.c
+++ b/drivers/hwmon/pmbus/ir38064.c
@@ -62,7 +62,7 @@ static const struct i2c_device_id ir38064_id[] = {
MODULE_DEVICE_TABLE(i2c, ir38064_id);
-static const struct of_device_id ir38064_of_match[] = {
+static const struct of_device_id __maybe_unused ir38064_of_match[] = {
{ .compatible = "infineon,ir38060" },
{ .compatible = "infineon,ir38064" },
{ .compatible = "infineon,ir38164" },
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index e45c600fccc0..bc2cfa5f9592 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -347,7 +347,7 @@ static int ad7124_find_free_config_slot(struct ad7124_state *st)
{
unsigned int free_cfg_slot;
- free_cfg_slot = find_next_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS, 0);
+ free_cfg_slot = find_first_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS);
if (free_cfg_slot == AD7124_MAX_CONFIGS)
return -1;
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 4b1b16e7a75b..89234d04cc65 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -1709,14 +1709,14 @@ clean_msixtbl:
*/
static void irdma_get_used_rsrc(struct irdma_device *iwdev)
{
- iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds,
- iwdev->rf->max_pd, 0);
- iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps,
- iwdev->rf->max_qp, 0);
- iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs,
- iwdev->rf->max_cq, 0);
- iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs,
- iwdev->rf->max_mr, 0);
+ iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds,
+ iwdev->rf->max_pd);
+ iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps,
+ iwdev->rf->max_qp);
+ iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs,
+ iwdev->rf->max_cq);
+ iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr);
}
void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 9363bccfc6e7..a8e1c30c370f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -196,7 +196,7 @@ struct qib_ctxtdata {
pid_t pid;
pid_t subpid[QLOGIC_IB_MAX_SUBCTXT];
/* same size as task_struct .comm[], command that opened context */
- char comm[16];
+ char comm[TASK_COMM_LEN];
/* pkeys set by this use of this ctxt */
u16 pkeys[4];
/* so file ops can get at unit */
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 63854f4b6524..aa290928cf96 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1321,7 +1321,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
rcd->tid_pg_list = ptmp;
rcd->pid = current->pid;
init_waitqueue_head(&dd->rcd[ctxt]->wait);
- strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
+ get_task_comm(rcd->comm, current);
ctxt_fp(fp) = rcd;
qib_stats.sps_ctxts++;
dd->freectxts--;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index afe11f475b8c..5018b9387694 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -217,8 +217,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
* the port number must be in the Dynamic Ports range
* (0xc000 - 0xffff).
*/
- qp->src_port = RXE_ROCE_V2_SPORT +
- (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff);
qp->sq.max_wr = init->cap.max_send_wr;
/* These caps are limited by rxe_qp_chk_cap() done by the caller */
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 28b8581b44dd..d8c4d5664145 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -239,33 +239,11 @@ static struct ctl_table mac_hid_files[] = {
{ }
};
-/* dir in /proc/sys/dev */
-static struct ctl_table mac_hid_dir[] = {
- {
- .procname = "mac_hid",
- .maxlen = 0,
- .mode = 0555,
- .child = mac_hid_files,
- },
- { }
-};
-
-/* /proc/sys/dev itself, in case that is not there yet */
-static struct ctl_table mac_hid_root_dir[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = mac_hid_dir,
- },
- { }
-};
-
static struct ctl_table_header *mac_hid_sysctl_header;
static int __init mac_hid_init(void)
{
- mac_hid_sysctl_header = register_sysctl_table(mac_hid_root_dir);
+ mac_hid_sysctl_header = register_sysctl("dev/mac_hid", mac_hid_files);
if (!mac_hid_sysctl_header)
return -ENOMEM;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c0ae8087c602..dcbd6d201619 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io *io)
struct mapped_device *md = io->md;
struct bio *bio = io->orig_bio;
- io->start_time = bio_start_io_acct(bio);
+ bio_start_io_acct_time(bio, io->start_time);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
@@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->md = md;
spin_lock_init(&io->endio_lock);
- start_io_acct(io);
+ io->start_time = jiffies;
return io;
}
@@ -1442,9 +1442,6 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
ci->sector = bio->bi_iter.bi_sector;
}
-#define __dm_part_stat_sub(part, field, subnd) \
- (part_stat_get(part, field) -= (subnd))
-
/*
* Entry point to split a bio into clones and submit them to the targets.
*/
@@ -1480,23 +1477,12 @@ static void __split_and_process_bio(struct mapped_device *md,
GFP_NOIO, &md->queue->bio_split);
ci.io->orig_bio = b;
- /*
- * Adjust IO stats for each split, otherwise upon queue
- * reentry there will be redundant IO accounting.
- * NOTE: this is a stop-gap fix, a proper fix involves
- * significant refactoring of DM core's bio splitting
- * (by eliminating DM's splitting and just using bio_split)
- */
- part_stat_lock();
- __dm_part_stat_sub(dm_disk(md)->part0,
- sectors[op_stat_group(bio_op(bio))], ci.sector_count);
- part_stat_unlock();
-
bio_chain(b, bio);
trace_block_split(b, bio->bi_iter.bi_sector);
submit_bio_noacct(bio);
}
}
+ start_io_acct(ci.io);
/* drop the extra reference count */
dm_io_dec_pending(ci.io, errno_to_blk_status(error));
diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c
index ec67065d5202..a3ab6a43fb14 100644
--- a/drivers/media/cec/core/cec-core.c
+++ b/drivers/media/cec/core/cec-core.c
@@ -106,7 +106,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode,
/* Part 1: Find a free minor number */
mutex_lock(&cec_devnode_lock);
- minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0);
+ minor = find_first_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES);
if (minor == CEC_NUM_DEVICES) {
mutex_unlock(&cec_devnode_lock);
pr_err("could not get a free minor\n");
diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c
index f11382afe23b..680fbb3a9340 100644
--- a/drivers/media/mc/mc-devnode.c
+++ b/drivers/media/mc/mc-devnode.c
@@ -217,7 +217,7 @@ int __must_check media_devnode_register(struct media_device *mdev,
/* Part 1: Find a free minor number */
mutex_lock(&media_devnode_lock);
- minor = find_next_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES, 0);
+ minor = find_first_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES);
if (minor == MEDIA_NUM_DEVICES) {
mutex_unlock(&media_devnode_lock);
pr_err("could not get a free minor\n");
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 24a4532053e4..e90adfa57950 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -300,8 +300,8 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc)
if (!hdr.ExtPageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -316,8 +316,8 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc)
rc = 1;
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return rc;
}
@@ -1661,16 +1661,14 @@ mpt_mapresources(MPT_ADAPTER *ioc)
const uint64_t required_mask = dma_get_required_mask
(&pdev->dev);
if (required_mask > DMA_BIT_MASK(32)
- && !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
- && !pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(64))) {
+ && !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))
+ && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
ioc->dma_mask = DMA_BIT_MASK(64);
dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
ioc->name));
- } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(32))) {
+ } else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))
+ && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
ioc->dma_mask = DMA_BIT_MASK(32);
dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
@@ -1681,9 +1679,8 @@ mpt_mapresources(MPT_ADAPTER *ioc)
goto out_pci_release_region;
}
} else {
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(32))) {
+ if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))
+ && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
ioc->dma_mask = DMA_BIT_MASK(32);
dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
@@ -2769,9 +2766,9 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
if (ioc->spi_data.pIocPg4 != NULL) {
sz = ioc->spi_data.IocPg4Sz;
- pci_free_consistent(ioc->pcidev, sz,
- ioc->spi_data.pIocPg4,
- ioc->spi_data.IocPg4_dma);
+ dma_free_coherent(&ioc->pcidev->dev, sz,
+ ioc->spi_data.pIocPg4,
+ ioc->spi_data.IocPg4_dma);
ioc->spi_data.pIocPg4 = NULL;
ioc->alloc_total -= sz;
}
@@ -3515,7 +3512,8 @@ mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size)
rc = 0;
goto out;
}
- ioc->cached_fw = pci_alloc_consistent(ioc->pcidev, size, &ioc->cached_fw_dma);
+ ioc->cached_fw = dma_alloc_coherent(&ioc->pcidev->dev, size,
+ &ioc->cached_fw_dma, GFP_ATOMIC);
if (!ioc->cached_fw) {
printk(MYIOC_s_ERR_FMT "Unable to allocate memory for the cached firmware image!\n",
ioc->name);
@@ -3548,7 +3546,8 @@ mpt_free_fw_memory(MPT_ADAPTER *ioc)
sz = ioc->facts.FWImageSize;
dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "free_fw_memory: FW Image @ %p[%p], sz=%d[%x] bytes\n",
ioc->name, ioc->cached_fw, (void *)(ulong)ioc->cached_fw_dma, sz, sz));
- pci_free_consistent(ioc->pcidev, sz, ioc->cached_fw, ioc->cached_fw_dma);
+ dma_free_coherent(&ioc->pcidev->dev, sz, ioc->cached_fw,
+ ioc->cached_fw_dma);
ioc->alloc_total -= sz;
ioc->cached_fw = NULL;
}
@@ -4447,9 +4446,8 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
*/
if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078 &&
ioc->dma_mask > DMA_BIT_MASK(35)) {
- if (!pci_set_dma_mask(ioc->pcidev, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(32))) {
+ if (!dma_set_mask(&ioc->pcidev->dev, DMA_BIT_MASK(32))
+ && !dma_set_coherent_mask(&ioc->pcidev->dev, DMA_BIT_MASK(32))) {
dma_mask = DMA_BIT_MASK(35);
d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
"setting 35 bit addressing for "
@@ -4457,10 +4455,10 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
ioc->name));
} else {
/*Reseting DMA mask to 64 bit*/
- pci_set_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(64));
- pci_set_consistent_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(64));
+ dma_set_mask(&ioc->pcidev->dev,
+ DMA_BIT_MASK(64));
+ dma_set_coherent_mask(&ioc->pcidev->dev,
+ DMA_BIT_MASK(64));
printk(MYIOC_s_ERR_FMT
"failed setting 35 bit addressing for "
@@ -4595,8 +4593,8 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
alloc_dma += ioc->reply_sz;
}
- if (dma_mask == DMA_BIT_MASK(35) && !pci_set_dma_mask(ioc->pcidev,
- ioc->dma_mask) && !pci_set_consistent_dma_mask(ioc->pcidev,
+ if (dma_mask == DMA_BIT_MASK(35) && !dma_set_mask(&ioc->pcidev->dev,
+ ioc->dma_mask) && !dma_set_coherent_mask(&ioc->pcidev->dev,
ioc->dma_mask))
d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
"restoring 64 bit addressing\n", ioc->name));
@@ -4620,8 +4618,8 @@ out_fail:
ioc->sense_buf_pool = NULL;
}
- if (dma_mask == DMA_BIT_MASK(35) && !pci_set_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(ioc->pcidev,
+ if (dma_mask == DMA_BIT_MASK(35) && !dma_set_mask(&ioc->pcidev->dev,
+ DMA_BIT_MASK(64)) && !dma_set_coherent_mask(&ioc->pcidev->dev,
DMA_BIT_MASK(64)))
d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
"restoring 64 bit addressing\n", ioc->name));
@@ -4968,7 +4966,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
if (hdr.PageLength > 0) {
data_sz = hdr.PageLength * 4;
- ppage0_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page0_dma);
+ ppage0_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page0_dma, GFP_KERNEL);
rc = -ENOMEM;
if (ppage0_alloc) {
memset((u8 *)ppage0_alloc, 0, data_sz);
@@ -4982,7 +4981,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage0_alloc, page0_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)ppage0_alloc, page0_dma);
/* FIXME!
* Normalize endianness of structure data,
@@ -5014,7 +5014,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
data_sz = hdr.PageLength * 4;
rc = -ENOMEM;
- ppage1_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page1_dma);
+ ppage1_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page1_dma, GFP_KERNEL);
if (ppage1_alloc) {
memset((u8 *)ppage1_alloc, 0, data_sz);
cfg.physAddr = page1_dma;
@@ -5026,7 +5027,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
memcpy(&ioc->lan_cnfg_page1, ppage1_alloc, copy_sz);
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage1_alloc, page1_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)ppage1_alloc, page1_dma);
/* FIXME!
* Normalize endianness of structure data,
@@ -5315,7 +5317,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc)
/* Read the config page */
data_sz = hdr.PageLength * 4;
rc = -ENOMEM;
- ppage_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma);
+ ppage_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page_dma, GFP_KERNEL);
if (ppage_alloc) {
memset((u8 *)ppage_alloc, 0, data_sz);
cfg.physAddr = page_dma;
@@ -5325,7 +5328,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc)
if ((rc = mpt_config(ioc, &cfg)) == 0)
ioc->biosVersion = le32_to_cpu(ppage_alloc->BiosVersion);
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage_alloc, page_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)ppage_alloc, page_dma);
}
return rc;
@@ -5400,7 +5404,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
return -EFAULT;
if (header.PageLength > 0) {
- pbuf = pci_alloc_consistent(ioc->pcidev, header.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, &buf_dma,
+ GFP_KERNEL);
if (pbuf) {
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
cfg.physAddr = buf_dma;
@@ -5456,7 +5462,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
}
}
if (pbuf) {
- pci_free_consistent(ioc->pcidev, header.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, pbuf,
+ buf_dma);
}
}
}
@@ -5478,7 +5486,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
if (header.PageLength > 0) {
/* Allocate memory and read SCSI Port Page 2
*/
- pbuf = pci_alloc_consistent(ioc->pcidev, header.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, &buf_dma,
+ GFP_KERNEL);
if (pbuf) {
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_NVRAM;
cfg.physAddr = buf_dma;
@@ -5543,7 +5553,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
}
}
- pci_free_consistent(ioc->pcidev, header.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, pbuf,
+ buf_dma);
}
}
@@ -5659,8 +5671,8 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id)
if (!hdr.PageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -5707,8 +5719,8 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id)
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
}
/**
@@ -5752,8 +5764,8 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
rc = -ENOMEM;
@@ -5776,8 +5788,8 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
return rc;
}
@@ -5819,8 +5831,8 @@ mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num)
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
rc = 0;
@@ -5840,8 +5852,8 @@ mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num)
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
return rc;
}
@@ -5891,8 +5903,8 @@ mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
rc = -ENOMEM;
@@ -5929,8 +5941,8 @@ mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
return rc;
}
@@ -5986,7 +5998,8 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
return -EFAULT;
iocpage2sz = header.PageLength * 4;
- pIoc2 = pci_alloc_consistent(ioc->pcidev, iocpage2sz, &ioc2_dma);
+ pIoc2 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage2sz, &ioc2_dma,
+ GFP_KERNEL);
if (!pIoc2)
return -ENOMEM;
@@ -6011,7 +6024,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
pIoc2->RaidVolume[i].VolumeID);
out:
- pci_free_consistent(ioc->pcidev, iocpage2sz, pIoc2, ioc2_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage2sz, pIoc2, ioc2_dma);
return rc;
}
@@ -6053,7 +6066,8 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc)
/* Read Header good, alloc memory
*/
iocpage3sz = header.PageLength * 4;
- pIoc3 = pci_alloc_consistent(ioc->pcidev, iocpage3sz, &ioc3_dma);
+ pIoc3 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage3sz, &ioc3_dma,
+ GFP_KERNEL);
if (!pIoc3)
return 0;
@@ -6070,7 +6084,7 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc)
}
}
- pci_free_consistent(ioc->pcidev, iocpage3sz, pIoc3, ioc3_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage3sz, pIoc3, ioc3_dma);
return 0;
}
@@ -6104,7 +6118,8 @@ mpt_read_ioc_pg_4(MPT_ADAPTER *ioc)
if ( (pIoc4 = ioc->spi_data.pIocPg4) == NULL ) {
iocpage4sz = (header.PageLength + 4) * 4; /* Allow 4 additional SEP's */
- pIoc4 = pci_alloc_consistent(ioc->pcidev, iocpage4sz, &ioc4_dma);
+ pIoc4 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage4sz,
+ &ioc4_dma, GFP_KERNEL);
if (!pIoc4)
return;
ioc->alloc_total += iocpage4sz;
@@ -6122,7 +6137,8 @@ mpt_read_ioc_pg_4(MPT_ADAPTER *ioc)
ioc->spi_data.IocPg4_dma = ioc4_dma;
ioc->spi_data.IocPg4Sz = iocpage4sz;
} else {
- pci_free_consistent(ioc->pcidev, iocpage4sz, pIoc4, ioc4_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage4sz, pIoc4,
+ ioc4_dma);
ioc->spi_data.pIocPg4 = NULL;
ioc->alloc_total -= iocpage4sz;
}
@@ -6159,7 +6175,8 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc)
/* Read Header good, alloc memory
*/
iocpage1sz = header.PageLength * 4;
- pIoc1 = pci_alloc_consistent(ioc->pcidev, iocpage1sz, &ioc1_dma);
+ pIoc1 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage1sz, &ioc1_dma,
+ GFP_KERNEL);
if (!pIoc1)
return;
@@ -6210,7 +6227,7 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc)
}
}
- pci_free_consistent(ioc->pcidev, iocpage1sz, pIoc1, ioc1_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage1sz, pIoc1, ioc1_dma);
return;
}
@@ -6239,7 +6256,8 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc)
goto out;
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
- pbuf = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &buf_dma, GFP_KERNEL);
if (!pbuf)
goto out;
@@ -6255,7 +6273,8 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc)
out:
if (pbuf)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, pbuf,
+ buf_dma);
}
/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index ae433c150b37..03c8fb1795c2 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -1041,14 +1041,15 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
* copying the data in this array into the correct place in the
* request and chain buffers.
*/
- sglbuf = pci_alloc_consistent(ioc->pcidev, MAX_SGL_BYTES, sglbuf_dma);
+ sglbuf = dma_alloc_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES,
+ sglbuf_dma, GFP_KERNEL);
if (sglbuf == NULL)
goto free_and_fail;
if (sgdir & 0x04000000)
- dir = PCI_DMA_TODEVICE;
+ dir = DMA_TO_DEVICE;
else
- dir = PCI_DMA_FROMDEVICE;
+ dir = DMA_FROM_DEVICE;
/* At start:
* sgl = sglbuf = point to beginning of sg buffer
@@ -1062,9 +1063,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
while (bytes_allocd < bytes) {
this_alloc = min(alloc_sz, bytes-bytes_allocd);
buflist[buflist_ent].len = this_alloc;
- buflist[buflist_ent].kptr = pci_alloc_consistent(ioc->pcidev,
- this_alloc,
- &pa);
+ buflist[buflist_ent].kptr = dma_alloc_coherent(&ioc->pcidev->dev,
+ this_alloc,
+ &pa, GFP_KERNEL);
if (buflist[buflist_ent].kptr == NULL) {
alloc_sz = alloc_sz / 2;
if (alloc_sz == 0) {
@@ -1080,8 +1081,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
bytes_allocd += this_alloc;
sgl->FlagsLength = (0x10000000|sgdir|this_alloc);
- dma_addr = pci_map_single(ioc->pcidev,
- buflist[buflist_ent].kptr, this_alloc, dir);
+ dma_addr = dma_map_single(&ioc->pcidev->dev,
+ buflist[buflist_ent].kptr,
+ this_alloc, dir);
sgl->Address = dma_addr;
fragcnt++;
@@ -1140,9 +1142,11 @@ free_and_fail:
kptr = buflist[i].kptr;
len = buflist[i].len;
- pci_free_consistent(ioc->pcidev, len, kptr, dma_addr);
+ dma_free_coherent(&ioc->pcidev->dev, len, kptr,
+ dma_addr);
}
- pci_free_consistent(ioc->pcidev, MAX_SGL_BYTES, sglbuf, *sglbuf_dma);
+ dma_free_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, sglbuf,
+ *sglbuf_dma);
}
kfree(buflist);
return NULL;
@@ -1162,9 +1166,9 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE
int n = 0;
if (sg->FlagsLength & 0x04000000)
- dir = PCI_DMA_TODEVICE;
+ dir = DMA_TO_DEVICE;
else
- dir = PCI_DMA_FROMDEVICE;
+ dir = DMA_FROM_DEVICE;
nib = (sg->FlagsLength & 0xF0000000) >> 28;
while (! (nib & 0x4)) { /* eob */
@@ -1179,8 +1183,10 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE
dma_addr = sg->Address;
kptr = bl->kptr;
len = bl->len;
- pci_unmap_single(ioc->pcidev, dma_addr, len, dir);
- pci_free_consistent(ioc->pcidev, len, kptr, dma_addr);
+ dma_unmap_single(&ioc->pcidev->dev, dma_addr, len,
+ dir);
+ dma_free_coherent(&ioc->pcidev->dev, len, kptr,
+ dma_addr);
n++;
}
sg++;
@@ -1197,12 +1203,12 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE
dma_addr = sg->Address;
kptr = bl->kptr;
len = bl->len;
- pci_unmap_single(ioc->pcidev, dma_addr, len, dir);
- pci_free_consistent(ioc->pcidev, len, kptr, dma_addr);
+ dma_unmap_single(&ioc->pcidev->dev, dma_addr, len, dir);
+ dma_free_coherent(&ioc->pcidev->dev, len, kptr, dma_addr);
n++;
}
- pci_free_consistent(ioc->pcidev, MAX_SGL_BYTES, sgl, sgl_dma);
+ dma_free_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, sgl, sgl_dma);
kfree(buflist);
dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "-SG: Free'd 1 SGL buf + %d kbufs!\n",
ioc->name, n));
@@ -2100,8 +2106,9 @@ mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __u
}
flagsLength |= karg.dataOutSize;
bufOut.len = karg.dataOutSize;
- bufOut.kptr = pci_alloc_consistent(
- ioc->pcidev, bufOut.len, &dma_addr_out);
+ bufOut.kptr = dma_alloc_coherent(&ioc->pcidev->dev,
+ bufOut.len,
+ &dma_addr_out, GFP_KERNEL);
if (bufOut.kptr == NULL) {
rc = -ENOMEM;
@@ -2134,8 +2141,9 @@ mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __u
flagsLength |= karg.dataInSize;
bufIn.len = karg.dataInSize;
- bufIn.kptr = pci_alloc_consistent(ioc->pcidev,
- bufIn.len, &dma_addr_in);
+ bufIn.kptr = dma_alloc_coherent(&ioc->pcidev->dev,
+ bufIn.len,
+ &dma_addr_in, GFP_KERNEL);
if (bufIn.kptr == NULL) {
rc = -ENOMEM;
@@ -2283,13 +2291,13 @@ done_free_mem:
/* Free the allocated memory.
*/
if (bufOut.kptr != NULL) {
- pci_free_consistent(ioc->pcidev,
- bufOut.len, (void *) bufOut.kptr, dma_addr_out);
+ dma_free_coherent(&ioc->pcidev->dev, bufOut.len,
+ (void *)bufOut.kptr, dma_addr_out);
}
if (bufIn.kptr != NULL) {
- pci_free_consistent(ioc->pcidev,
- bufIn.len, (void *) bufIn.kptr, dma_addr_in);
+ dma_free_coherent(&ioc->pcidev->dev, bufIn.len,
+ (void *)bufIn.kptr, dma_addr_in);
}
/* mf is null if command issued successfully
@@ -2395,7 +2403,9 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size)
/* Issue the second config page request */
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
- pbuf = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev,
+ hdr.PageLength * 4,
+ &buf_dma, GFP_KERNEL);
if (pbuf) {
cfg.physAddr = buf_dma;
if (mpt_config(ioc, &cfg) == 0) {
@@ -2405,7 +2415,9 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size)
pdata->BoardTracerNumber, 24);
}
}
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev,
+ hdr.PageLength * 4, pbuf,
+ buf_dma);
pbuf = NULL;
}
}
@@ -2470,7 +2482,7 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size)
else
IstwiRWRequest->DeviceAddr = 0xB0;
- pbuf = pci_alloc_consistent(ioc->pcidev, 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev, 4, &buf_dma, GFP_KERNEL);
if (!pbuf)
goto out;
ioc->add_sge((char *)&IstwiRWRequest->SGL,
@@ -2519,7 +2531,7 @@ retry_wait:
SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0);
if (pbuf)
- pci_free_consistent(ioc->pcidev, 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev, 4, pbuf, buf_dma);
/* Copy the data from kernel memory to user memory
*/
@@ -2585,7 +2597,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
/* Get the data transfer speeds
*/
data_sz = ioc->spi_data.sdp0length * 4;
- pg0_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma);
+ pg0_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, &page_dma,
+ GFP_KERNEL);
if (pg0_alloc) {
hdr.PageVersion = ioc->spi_data.sdp0version;
hdr.PageLength = data_sz;
@@ -2623,7 +2636,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
karg.negotiated_speed = HP_DEV_SPEED_ASYNC;
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) pg0_alloc, page_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz, (u8 *)pg0_alloc,
+ page_dma);
}
/* Set defaults
@@ -2649,7 +2663,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
/* Issue the second config page request */
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
data_sz = (int) cfg.cfghdr.hdr->PageLength * 4;
- pg3_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma);
+ pg3_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page_dma, GFP_KERNEL);
if (pg3_alloc) {
cfg.physAddr = page_dma;
cfg.pageAddr = (karg.hdr.channel << 8) | karg.hdr.id;
@@ -2658,7 +2673,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
karg.phase_errors = (u32) le16_to_cpu(pg3_alloc->PhaseErrorCount);
karg.parity_errors = (u32) le16_to_cpu(pg3_alloc->ParityErrorCount);
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) pg3_alloc, page_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)pg3_alloc, page_dma);
}
}
hd = shost_priv(ioc->sh);
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 117fa4ebf6d7..142eb5d5d9df 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -516,9 +516,9 @@ mpt_lan_close(struct net_device *dev)
if (priv->RcvCtl[i].skb != NULL) {
/**/ dlprintk((KERN_INFO MYNAM "/lan_close: bucket %05x "
/**/ "is still out\n", i));
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[i].dma,
- priv->RcvCtl[i].len,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[i].dma,
+ priv->RcvCtl[i].len, DMA_FROM_DEVICE);
dev_kfree_skb(priv->RcvCtl[i].skb);
}
}
@@ -528,9 +528,9 @@ mpt_lan_close(struct net_device *dev)
for (i = 0; i < priv->tx_max_out; i++) {
if (priv->SendCtl[i].skb != NULL) {
- pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[i].dma,
- priv->SendCtl[i].len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev,
+ priv->SendCtl[i].dma,
+ priv->SendCtl[i].len, DMA_TO_DEVICE);
dev_kfree_skb(priv->SendCtl[i].skb);
}
}
@@ -582,8 +582,8 @@ mpt_lan_send_turbo(struct net_device *dev, u32 tmsg)
__func__, sent));
priv->SendCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[ctx].dma,
- priv->SendCtl[ctx].len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->SendCtl[ctx].dma,
+ priv->SendCtl[ctx].len, DMA_TO_DEVICE);
dev_kfree_skb_irq(sent);
spin_lock_irqsave(&priv->txfidx_lock, flags);
@@ -648,8 +648,9 @@ mpt_lan_send_reply(struct net_device *dev, LANSendReply_t *pSendRep)
__func__, sent));
priv->SendCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[ctx].dma,
- priv->SendCtl[ctx].len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev,
+ priv->SendCtl[ctx].dma,
+ priv->SendCtl[ctx].len, DMA_TO_DEVICE);
dev_kfree_skb_irq(sent);
priv->mpt_txfidx[++priv->mpt_txfidx_tail] = ctx;
@@ -720,8 +721,8 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, 12);
- dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len,
- PCI_DMA_TODEVICE);
+ dma = dma_map_single(&mpt_dev->pcidev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
priv->SendCtl[ctx].skb = skb;
priv->SendCtl[ctx].dma = dma;
@@ -868,13 +869,17 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
return -ENOMEM;
}
- pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
- pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
goto out;
}
@@ -882,8 +887,8 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
priv->RcvCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len, DMA_FROM_DEVICE);
out:
spin_lock_irqsave(&priv->rxfidx_lock, flags);
@@ -927,8 +932,8 @@ mpt_lan_receive_post_free(struct net_device *dev,
// dlprintk((KERN_INFO MYNAM "@rpr[2] TC + 3\n"));
priv->RcvCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
@@ -1028,16 +1033,16 @@ mpt_lan_receive_post_reply(struct net_device *dev,
// IOC_AND_NETDEV_NAMES_s_s(dev),
// i, l));
- pci_dma_sync_single_for_cpu(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(old_skb, skb_put(skb, l), l);
- pci_dma_sync_single_for_device(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
szrem -= l;
@@ -1056,17 +1061,17 @@ mpt_lan_receive_post_reply(struct net_device *dev,
return -ENOMEM;
}
- pci_dma_sync_single_for_cpu(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
- pci_dma_sync_single_for_device(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
spin_lock_irqsave(&priv->rxfidx_lock, flags);
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
@@ -1077,8 +1082,8 @@ mpt_lan_receive_post_reply(struct net_device *dev,
priv->RcvCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len, DMA_FROM_DEVICE);
priv->RcvCtl[ctx].dma = 0;
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
@@ -1199,10 +1204,10 @@ mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv)
skb = priv->RcvCtl[ctx].skb;
if (skb && (priv->RcvCtl[ctx].len != len)) {
- pci_unmap_single(mpt_dev->pcidev,
+ dma_unmap_single(&mpt_dev->pcidev->dev,
priv->RcvCtl[ctx].dma,
priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
dev_kfree_skb(priv->RcvCtl[ctx].skb);
skb = priv->RcvCtl[ctx].skb = NULL;
}
@@ -1218,8 +1223,9 @@ mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv)
break;
}
- dma = pci_map_single(mpt_dev->pcidev, skb->data,
- len, PCI_DMA_FROMDEVICE);
+ dma = dma_map_single(&mpt_dev->pcidev->dev,
+ skb->data, len,
+ DMA_FROM_DEVICE);
priv->RcvCtl[ctx].skb = skb;
priv->RcvCtl[ctx].dma = dma;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 091b45024d34..4acd8f9a48e1 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -702,8 +702,8 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
if (!hdr.PageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -769,8 +769,8 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
}
/**
@@ -1399,8 +1399,8 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -1426,8 +1426,8 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
enclosure->sep_channel = buffer->SEPBus;
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2058,8 +2058,8 @@ static int mptsas_get_linkerrors(struct sas_phy *phy)
if (!hdr.ExtPageLength)
return -ENXIO;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
@@ -2081,8 +2081,8 @@ static int mptsas_get_linkerrors(struct sas_phy *phy)
le32_to_cpu(buffer->PhyResetProblemCount);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
return error;
}
@@ -2301,7 +2301,7 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
<< MPI_SGE_FLAGS_SHIFT;
if (!dma_map_sg(&ioc->pcidev->dev, job->request_payload.sg_list,
- 1, PCI_DMA_BIDIRECTIONAL))
+ 1, DMA_BIDIRECTIONAL))
goto put_mf;
flagsLength |= (sg_dma_len(job->request_payload.sg_list) - 4);
@@ -2318,7 +2318,7 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
flagsLength = flagsLength << MPI_SGE_FLAGS_SHIFT;
if (!dma_map_sg(&ioc->pcidev->dev, job->reply_payload.sg_list,
- 1, PCI_DMA_BIDIRECTIONAL))
+ 1, DMA_BIDIRECTIONAL))
goto unmap_out;
flagsLength |= sg_dma_len(job->reply_payload.sg_list) + 4;
ioc->add_sge(psge, flagsLength,
@@ -2356,10 +2356,10 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
unmap_in:
dma_unmap_sg(&ioc->pcidev->dev, job->reply_payload.sg_list, 1,
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
unmap_out:
dma_unmap_sg(&ioc->pcidev->dev, job->request_payload.sg_list, 1,
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
put_mf:
if (mf)
mpt_free_msg_frame(ioc, mf);
@@ -2412,8 +2412,8 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2452,8 +2452,8 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
}
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2487,8 +2487,8 @@ mptsas_sas_io_unit_pg1(MPT_ADAPTER *ioc)
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2509,8 +2509,8 @@ mptsas_sas_io_unit_pg1(MPT_ADAPTER *ioc)
device_missing_delay & MPI_SAS_IOUNIT1_REPORT_MISSING_TIMEOUT_MASK;
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2551,8 +2551,8 @@ mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2573,8 +2573,8 @@ mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2614,8 +2614,8 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2654,8 +2654,8 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
device_info->flags = le16_to_cpu(buffer->Flags);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2697,8 +2697,8 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2737,8 +2737,8 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
}
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2777,8 +2777,8 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2810,8 +2810,8 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2896,7 +2896,8 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc,
sz = sizeof(struct rep_manu_request) + sizeof(struct rep_manu_reply);
- data_out = pci_alloc_consistent(ioc->pcidev, sz, &data_out_dma);
+ data_out = dma_alloc_coherent(&ioc->pcidev->dev, sz, &data_out_dma,
+ GFP_KERNEL);
if (!data_out) {
printk(KERN_ERR "Memory allocation failure at %s:%d/%s()!\n",
__FILE__, __LINE__, __func__);
@@ -2987,7 +2988,8 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc,
}
out_free:
if (data_out_dma)
- pci_free_consistent(ioc->pcidev, sz, data_out, data_out_dma);
+ dma_free_coherent(&ioc->pcidev->dev, sz, data_out,
+ data_out_dma);
put_mf:
if (mf)
mpt_free_msg_frame(ioc, mf);
@@ -4271,8 +4273,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
if (!hdr.PageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -4318,8 +4320,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
}
/*
* Work queue thread to handle SAS hotplug events
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index c3305bdda69c..bee727ed98db 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -440,6 +440,10 @@ static int at25_probe(struct spi_device *spi)
return -ENXIO;
}
+ at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
+ if (!at25)
+ return -ENOMEM;
+
mutex_init(&at25->lock);
at25->spi = spi;
spi_set_drvdata(spi, at25);
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 084c61b2cbec..2797a9c0f17d 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -642,7 +642,7 @@ static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host)
* is at least SH_MOBILE_SDHI_MIN_TAP_ROW probes long then use the
* center index as the tap, otherwise bail out.
*/
- bitmap_for_each_set_region(bitmap, rs, re, 0, taps_size) {
+ for_each_set_bitrange(rs, re, bitmap, taps_size) {
if (re - rs > tap_cnt) {
tap_end = re;
tap_start = rs;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 07fc603c2fa7..238b56d77c36 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3874,8 +3874,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
skb->l4_hash)
return skb->hash;
- return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
- skb->mac_header, skb->network_header,
+ return __bond_xmit_hash(bond, skb, skb->data, skb->protocol,
+ skb_mac_offset(skb), skb_network_offset(skb),
skb_headlen(skb));
}
@@ -4133,9 +4133,7 @@ static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cm
fallthrough;
case SIOCGHWTSTAMP:
- rcu_read_lock();
real_dev = bond_option_active_slave_get_rcu(bond);
- rcu_read_unlock();
if (!real_dev)
return -EOPNOTSUPP;
@@ -4884,25 +4882,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave = NULL;
struct list_head *iter;
+ bool xmit_suc = false;
+ bool skb_used = false;
bond_for_each_slave_rcu(bond, slave, iter) {
- if (bond_is_last_slave(bond, slave))
- break;
- if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *skb2;
+ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+ continue;
+
+ if (bond_is_last_slave(bond, slave)) {
+ skb2 = skb;
+ skb_used = true;
+ } else {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2) {
net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
bond_dev->name, __func__);
continue;
}
- bond_dev_queue_xmit(bond, skb2, slave->dev);
}
+
+ if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK)
+ xmit_suc = true;
}
- if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
- return bond_dev_queue_xmit(bond, skb, slave->dev);
- return bond_tx_drop(bond_dev, skb);
+ if (!skb_used)
+ dev_kfree_skb_any(skb);
+
+ if (xmit_suc)
+ return NETDEV_TX_OK;
+
+ atomic_long_inc(&bond_dev->tx_dropped);
+ return NET_XMIT_DROP;
}
/*------------------------- Device initialization ---------------------------*/
@@ -5368,9 +5380,7 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
struct net_device *real_dev;
struct phy_device *phydev;
- rcu_read_lock();
real_dev = bond_option_active_slave_get_rcu(bond);
- rcu_read_unlock();
if (real_dev) {
ops = real_dev->ethtool_ops;
phydev = real_dev->phydev;
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 2ec11af5f0cc..46b150e6289e 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -11,7 +11,7 @@
static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
struct list_head *iter;
struct slave *slave;
loff_t off = 0;
@@ -30,7 +30,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
struct list_head *iter;
struct slave *slave;
bool found = false;
@@ -57,7 +57,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)
static void bond_info_show_master(struct seq_file *seq)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
const struct bond_opt_value *optval;
struct slave *curr, *primary;
int i;
@@ -175,7 +175,7 @@ static void bond_info_show_master(struct seq_file *seq)
static void bond_info_show_slave(struct seq_file *seq,
const struct slave *slave)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link));
diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
index 0bff1884d5cc..74d7fcbfd065 100644
--- a/drivers/net/can/flexcan/flexcan-core.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -296,6 +296,7 @@ static_assert(sizeof(struct flexcan_regs) == 0x4 * 18 + 0xfb8);
static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16 |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
};
diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h
index fccdff8b1f0f..23fc09a7e10f 100644
--- a/drivers/net/can/flexcan/flexcan.h
+++ b/drivers/net/can/flexcan/flexcan.h
@@ -21,7 +21,7 @@
* Below is some version info we got:
* SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB
* Filter? connected? Passive detection ption in MB Supported?
- * MCF5441X FlexCAN2 ? no yes no no yes no 16
+ * MCF5441X FlexCAN2 ? no yes no no no no 16
* MX25 FlexCAN2 03.00.00.00 no no no no no no 64
* MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64
* MX35 FlexCAN2 03.00.00.00 no no no no no no 64
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 5b47cd867783..1a4b56f6fa8c 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *cdev,
u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
offset;
+ if (val_count == 0)
+ return 0;
+
return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
}
@@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *cdev,
u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
offset;
+ if (val_count == 0)
+ return 0;
+
return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
}
diff --git a/drivers/net/can/m_can/tcan4x5x-regmap.c b/drivers/net/can/m_can/tcan4x5x-regmap.c
index ca80dbaf7a3f..26e212b8ca7a 100644
--- a/drivers/net/can/m_can/tcan4x5x-regmap.c
+++ b/drivers/net/can/m_can/tcan4x5x-regmap.c
@@ -12,7 +12,7 @@
#define TCAN4X5X_SPI_INSTRUCTION_WRITE (0x61 << 24)
#define TCAN4X5X_SPI_INSTRUCTION_READ (0x41 << 24)
-#define TCAN4X5X_MAX_REGISTER 0x8ffc
+#define TCAN4X5X_MAX_REGISTER 0x87fc
static int tcan4x5x_regmap_gather_write(void *context,
const void *reg, size_t reg_len,
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 481f1df3106c..8aec5d9fbfef 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2278,6 +2278,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
struct net_device *dev;
struct typhoon *tp;
int card_id = (int) ent->driver_data;
+ u8 addr[ETH_ALEN] __aligned(4);
void __iomem *ioaddr;
void *shared;
dma_addr_t shared_dma;
@@ -2409,8 +2410,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto error_out_reset;
}
- *(__be16 *)&dev->dev_addr[0] = htons(le16_to_cpu(xp_resp[0].parm1));
- *(__be32 *)&dev->dev_addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2));
+ *(__be16 *)&addr[0] = htons(le16_to_cpu(xp_resp[0].parm1));
+ *(__be32 *)&addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2));
+ eth_hw_addr_set(dev, addr);
if (!is_valid_ether_addr(dev->dev_addr)) {
err_msg = "Could not obtain valid ethernet address, aborting";
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index bd22a534b1c0..e7b879123bb1 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -655,6 +655,7 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
struct ei_device *ei_local;
struct net_device *dev;
struct etherh_priv *eh;
+ u8 addr[ETH_ALEN];
int ret;
ret = ecard_request_resources(ec);
@@ -724,12 +725,13 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
spin_lock_init(&ei_local->page_lock);
if (ec->cid.product == PROD_ANT_ETHERM) {
- etherm_addr(dev->dev_addr);
+ etherm_addr(addr);
ei_local->reg_offset = etherm_regoffsets;
} else {
- etherh_addr(dev->dev_addr, ec);
+ etherh_addr(addr, ec);
ei_local->reg_offset = etherh_regoffsets;
}
+ eth_hw_addr_set(dev, addr);
ei_local->name = dev->name;
ei_local->word16 = 1;
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 849de4564709..621ce742ad21 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -106,9 +106,9 @@ static void emac_update_speed(struct net_device *dev)
/* set EMAC SPEED, depend on PHY */
reg_val = readl(db->membase + EMAC_MAC_SUPP_REG);
- reg_val &= ~(0x1 << 8);
+ reg_val &= ~EMAC_MAC_SUPP_100M;
if (db->speed == SPEED_100)
- reg_val |= 1 << 8;
+ reg_val |= EMAC_MAC_SUPP_100M;
writel(reg_val, db->membase + EMAC_MAC_SUPP_REG);
}
@@ -264,7 +264,7 @@ static void emac_dma_done_callback(void *arg)
/* re enable interrupt */
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0x01 << 8);
+ reg_val |= EMAC_INT_CTL_RX_EN;
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
db->emacrx_completed_flag = 1;
@@ -429,7 +429,7 @@ static unsigned int emac_powerup(struct net_device *ndev)
/* initial EMAC */
/* flush RX FIFO */
reg_val = readl(db->membase + EMAC_RX_CTL_REG);
- reg_val |= 0x8;
+ reg_val |= EMAC_RX_CTL_FLUSH_FIFO;
writel(reg_val, db->membase + EMAC_RX_CTL_REG);
udelay(1);
@@ -441,8 +441,8 @@ static unsigned int emac_powerup(struct net_device *ndev)
/* set MII clock */
reg_val = readl(db->membase + EMAC_MAC_MCFG_REG);
- reg_val &= (~(0xf << 2));
- reg_val |= (0xD << 2);
+ reg_val &= ~EMAC_MAC_MCFG_MII_CLKD_MASK;
+ reg_val |= EMAC_MAC_MCFG_MII_CLKD_72;
writel(reg_val, db->membase + EMAC_MAC_MCFG_REG);
/* clear RX counter */
@@ -506,7 +506,7 @@ static void emac_init_device(struct net_device *dev)
/* enable RX/TX0/RX Hlevel interrup */
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
spin_unlock_irqrestore(&db->lock, flags);
@@ -637,7 +637,9 @@ static void emac_rx(struct net_device *dev)
if (!rxcount) {
db->emacrx_completed_flag = 1;
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN |
+ EMAC_INT_CTL_TX_ABRT_EN |
+ EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
/* had one stuck? */
@@ -669,7 +671,9 @@ static void emac_rx(struct net_device *dev)
writel(reg_val | EMAC_CTL_RX_EN,
db->membase + EMAC_CTL_REG);
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN |
+ EMAC_INT_CTL_TX_ABRT_EN |
+ EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
db->emacrx_completed_flag = 1;
@@ -783,20 +787,20 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
}
/* Transmit Interrupt check */
- if (int_status & (0x01 | 0x02))
+ if (int_status & EMAC_INT_STA_TX_COMPLETE)
emac_tx_done(dev, db, int_status);
- if (int_status & (0x04 | 0x08))
+ if (int_status & EMAC_INT_STA_TX_ABRT)
netdev_info(dev, " ab : %x\n", int_status);
/* Re-enable interrupt mask */
if (db->emacrx_completed_flag == 1) {
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
} else {
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0);
+ reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
}
@@ -1068,6 +1072,7 @@ out_clk_disable_unprepare:
clk_disable_unprepare(db->clk);
out_dispose_mapping:
irq_dispose_mapping(ndev->irq);
+ dma_release_channel(db->rx_chan);
out_iounmap:
iounmap(db->membase);
out:
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.h b/drivers/net/ethernet/allwinner/sun4i-emac.h
index 38c72d9ec600..90bd9ad77607 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.h
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.h
@@ -38,6 +38,7 @@
#define EMAC_RX_CTL_REG (0x3c)
#define EMAC_RX_CTL_AUTO_DRQ_EN (1 << 1)
#define EMAC_RX_CTL_DMA_EN (1 << 2)
+#define EMAC_RX_CTL_FLUSH_FIFO (1 << 3)
#define EMAC_RX_CTL_PASS_ALL_EN (1 << 4)
#define EMAC_RX_CTL_PASS_CTL_EN (1 << 5)
#define EMAC_RX_CTL_PASS_CRC_ERR_EN (1 << 6)
@@ -61,7 +62,21 @@
#define EMAC_RX_IO_DATA_STATUS_OK (1 << 7)
#define EMAC_RX_FBC_REG (0x50)
#define EMAC_INT_CTL_REG (0x54)
+#define EMAC_INT_CTL_RX_EN (1 << 8)
+#define EMAC_INT_CTL_TX0_EN (1)
+#define EMAC_INT_CTL_TX1_EN (1 << 1)
+#define EMAC_INT_CTL_TX_EN (EMAC_INT_CTL_TX0_EN | EMAC_INT_CTL_TX1_EN)
+#define EMAC_INT_CTL_TX0_ABRT_EN (0x1 << 2)
+#define EMAC_INT_CTL_TX1_ABRT_EN (0x1 << 3)
+#define EMAC_INT_CTL_TX_ABRT_EN (EMAC_INT_CTL_TX0_ABRT_EN | EMAC_INT_CTL_TX1_ABRT_EN)
#define EMAC_INT_STA_REG (0x58)
+#define EMAC_INT_STA_TX0_COMPLETE (0x1)
+#define EMAC_INT_STA_TX1_COMPLETE (0x1 << 1)
+#define EMAC_INT_STA_TX_COMPLETE (EMAC_INT_STA_TX0_COMPLETE | EMAC_INT_STA_TX1_COMPLETE)
+#define EMAC_INT_STA_TX0_ABRT (0x1 << 2)
+#define EMAC_INT_STA_TX1_ABRT (0x1 << 3)
+#define EMAC_INT_STA_TX_ABRT (EMAC_INT_STA_TX0_ABRT | EMAC_INT_STA_TX1_ABRT)
+#define EMAC_INT_STA_RX_COMPLETE (0x1 << 8)
#define EMAC_MAC_CTL0_REG (0x5c)
#define EMAC_MAC_CTL0_RX_FLOW_CTL_EN (1 << 2)
#define EMAC_MAC_CTL0_TX_FLOW_CTL_EN (1 << 3)
@@ -87,8 +102,11 @@
#define EMAC_MAC_CLRT_RM (0x0f)
#define EMAC_MAC_MAXF_REG (0x70)
#define EMAC_MAC_SUPP_REG (0x74)
+#define EMAC_MAC_SUPP_100M (0x1 << 8)
#define EMAC_MAC_TEST_REG (0x78)
#define EMAC_MAC_MCFG_REG (0x7c)
+#define EMAC_MAC_MCFG_MII_CLKD_MASK (0xff << 2)
+#define EMAC_MAC_MCFG_MII_CLKD_72 (0x0d << 2)
#define EMAC_MAC_A0_REG (0x98)
#define EMAC_MAC_A1_REG (0x9c)
#define EMAC_MAC_A2_REG (0xa0)
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 493b0cefcc2a..ec8df05e7bf6 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1032,6 +1032,7 @@ static int dec_lance_probe(struct device *bdev, const int type)
int i, ret;
unsigned long esar_base;
unsigned char *esar;
+ u8 addr[ETH_ALEN];
const char *desc;
if (dec_lance_debug && version_printed++ == 0)
@@ -1228,7 +1229,8 @@ static int dec_lance_probe(struct device *bdev, const int type)
break;
}
for (i = 0; i < 6; i++)
- dev->dev_addr[i] = esar[i * 4];
+ addr[i] = esar[i * 4];
+ eth_hw_addr_set(dev, addr);
printk("%s: %s, addr = %pM, irq = %d\n",
name, desc, dev->dev_addr, dev->irq);
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 9a650d1c1bdd..4d2ba30c2fbd 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -1237,6 +1237,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
struct bmac_data *bp;
const unsigned char *prop_addr;
unsigned char addr[6];
+ u8 macaddr[6];
struct net_device *dev;
int is_bmac_plus = ((int)match->data) != 0;
@@ -1284,7 +1285,9 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
rev = addr[0] == 0 && addr[1] == 0xA0;
for (j = 0; j < 6; ++j)
- dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j];
+ macaddr[j] = rev ? bitrev8(addr[j]): addr[j];
+
+ eth_hw_addr_set(dev, macaddr);
/* Enable chip without interrupts for now */
bmac_enable_and_reset_chip(dev);
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 4b80e3a52a19..6f8c91eb1263 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -90,7 +90,7 @@ static void mace_set_timeout(struct net_device *dev);
static void mace_tx_timeout(struct timer_list *t);
static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma);
static inline void mace_clean_rings(struct mace_data *mp);
-static void __mace_set_address(struct net_device *dev, void *addr);
+static void __mace_set_address(struct net_device *dev, const void *addr);
/*
* If we can't get a skbuff when we need it, we use this area for DMA.
@@ -112,6 +112,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
struct net_device *dev;
struct mace_data *mp;
const unsigned char *addr;
+ u8 macaddr[ETH_ALEN];
int j, rev, rc = -EBUSY;
if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
@@ -167,8 +168,9 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
rev = addr[0] == 0 && addr[1] == 0xA0;
for (j = 0; j < 6; ++j) {
- dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j];
+ macaddr[j] = rev ? bitrev8(addr[j]): addr[j];
}
+ eth_hw_addr_set(dev, macaddr);
mp->chipid = (in_8(&mp->mace->chipid_hi) << 8) |
in_8(&mp->mace->chipid_lo);
@@ -369,11 +371,12 @@ static void mace_reset(struct net_device *dev)
out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO);
}
-static void __mace_set_address(struct net_device *dev, void *addr)
+static void __mace_set_address(struct net_device *dev, const void *addr)
{
struct mace_data *mp = netdev_priv(dev);
volatile struct mace __iomem *mb = mp->mace;
- unsigned char *p = addr;
+ const unsigned char *p = addr;
+ u8 macaddr[ETH_ALEN];
int i;
/* load up the hardware address */
@@ -385,7 +388,10 @@ static void __mace_set_address(struct net_device *dev, void *addr)
;
}
for (i = 0; i < 6; ++i)
- out_8(&mb->padr, dev->dev_addr[i] = p[i]);
+ out_8(&mb->padr, macaddr[i] = p[i]);
+
+ eth_hw_addr_set(dev, macaddr);
+
if (mp->chipid != BROKEN_ADDRCHG_REV)
out_8(&mb->iac, 0);
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 1bc4d33a0ce5..30a573db02bb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -826,7 +826,6 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
struct aq_hw_s *aq_hw = aq_nic->aq_hw;
int hweight = 0;
int err = 0;
- int i;
if (unlikely(!aq_hw_ops->hw_filter_vlan_set))
return -EOPNOTSUPP;
@@ -837,8 +836,7 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans);
if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
- for (i = 0; i < BITS_TO_LONGS(VLAN_N_VID); i++)
- hweight += hweight_long(aq_nic->active_vlans[i]);
+ hweight = bitmap_weight(aq_nic->active_vlans, VLAN_N_VID);
err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
if (err)
@@ -871,7 +869,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic)
struct aq_hw_s *aq_hw = aq_nic->aq_hw;
int err = 0;
- memset(aq_nic->active_vlans, 0, sizeof(aq_nic->active_vlans));
+ bitmap_zero(aq_nic->active_vlans, VLAN_N_VID);
aq_fvlan_rebuild(aq_nic, aq_nic->active_vlans,
aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 226f4403cfed..87f1056e29ff 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4020,10 +4020,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
/* Request the WOL interrupt and advertise suspend if available */
priv->wol_irq_disabled = true;
- err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0,
- dev->name, priv);
- if (!err)
- device_set_wakeup_capable(&pdev->dev, 1);
+ if (priv->wol_irq > 0) {
+ err = devm_request_irq(&pdev->dev, priv->wol_irq,
+ bcmgenet_wol_isr, 0, dev->name, priv);
+ if (!err)
+ device_set_wakeup_capable(&pdev->dev, 1);
+ }
/* Set the needed headroom to account for any possible
* features enabling/disabling at runtime
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index f38f40eb966e..a1a38456c9a3 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2183,9 +2183,7 @@ static int sbmac_init(struct platform_device *pldev, long long base)
ea_reg >>= 8;
}
- for (i = 0; i < 6; i++) {
- dev->dev_addr[i] = eaddr[i];
- }
+ eth_hw_addr_set(dev, eaddr);
/*
* Initialize context (get pointers to registers and stuff), then
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
index d04a6c163445..da8d10475a08 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -32,6 +32,7 @@
#include <linux/tcp.h>
#include <linux/ipv6.h>
+#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/ip6_route.h>
@@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi,
rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
peer_port, local_port, IPPROTO_TCP,
- tos, 0);
+ tos & ~INET_ECN_MASK, 0);
if (IS_ERR(rt))
return NULL;
n = dst_neigh_lookup(&rt->dst, &peer_ip);
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index bbbde9f701c2..be0bd4b44926 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -99,13 +99,13 @@ static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue)
netif_wake_queue(dev);
}
-static void mpc52xx_fec_set_paddr(struct net_device *dev, u8 *mac)
+static void mpc52xx_fec_set_paddr(struct net_device *dev, const u8 *mac)
{
struct mpc52xx_fec_priv *priv = netdev_priv(dev);
struct mpc52xx_fec __iomem *fec = priv->fec;
- out_be32(&fec->paddr1, *(u32 *)(&mac[0]));
- out_be32(&fec->paddr2, (*(u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE);
+ out_be32(&fec->paddr1, *(const u32 *)(&mac[0]));
+ out_be32(&fec->paddr2, (*(const u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE);
}
static int mpc52xx_fec_set_mac_address(struct net_device *dev, void *addr)
@@ -893,13 +893,15 @@ static int mpc52xx_fec_probe(struct platform_device *op)
rv = of_get_ethdev_address(np, ndev);
if (rv) {
struct mpc52xx_fec __iomem *fec = priv->fec;
+ u8 addr[ETH_ALEN] __aligned(4);
/*
* If the MAC addresse is not provided via DT then read
* it back from the controller regs
*/
- *(u32 *)(&ndev->dev_addr[0]) = in_be32(&fec->paddr1);
- *(u16 *)(&ndev->dev_addr[4]) = in_be32(&fec->paddr2) >> 16;
+ *(u32 *)(&addr[0]) = in_be32(&fec->paddr1);
+ *(u16 *)(&addr[4]) = in_be32(&fec->paddr2) >> 16;
+ eth_hw_addr_set(ndev, addr);
}
/*
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 5b8b9bcf41a2..266e562bd67a 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -51,6 +51,7 @@ struct tgec_mdio_controller {
struct mdio_fsl_priv {
struct tgec_mdio_controller __iomem *mdio_base;
bool is_little_endian;
+ bool has_a009885;
bool has_a011043;
};
@@ -186,10 +187,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
{
struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+ unsigned long flags;
uint16_t dev_addr;
uint32_t mdio_stat;
uint32_t mdio_ctl;
- uint16_t value;
int ret;
bool endian = priv->is_little_endian;
@@ -221,12 +222,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
return ret;
}
+ if (priv->has_a009885)
+ /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we
+ * must read back the data register within 16 MDC cycles.
+ */
+ local_irq_save(flags);
+
/* Initiate the read */
xgmac_write32(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl, endian);
ret = xgmac_wait_until_done(&bus->dev, regs, endian);
if (ret)
- return ret;
+ goto irq_restore;
/* Return all Fs if nothing was there */
if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
@@ -234,13 +241,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
dev_dbg(&bus->dev,
"Error while reading PHY%d reg at %d.%hhu\n",
phy_id, dev_addr, regnum);
- return 0xffff;
+ ret = 0xffff;
+ } else {
+ ret = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
+ dev_dbg(&bus->dev, "read %04x\n", ret);
}
- value = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
- dev_dbg(&bus->dev, "read %04x\n", value);
+irq_restore:
+ if (priv->has_a009885)
+ local_irq_restore(flags);
- return value;
+ return ret;
}
static int xgmac_mdio_probe(struct platform_device *pdev)
@@ -287,6 +298,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
priv->is_little_endian = device_property_read_bool(&pdev->dev,
"little-endian");
+ priv->has_a009885 = device_property_read_bool(&pdev->dev,
+ "fsl,erratum-a009885");
priv->has_a011043 = device_property_read_bool(&pdev->dev,
"fsl,erratum-a011043");
@@ -318,9 +331,10 @@ err_ioremap:
static int xgmac_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
+ struct mdio_fsl_priv *priv = bus->priv;
mdiobus_unregister(bus);
- iounmap(bus->priv);
+ iounmap(priv->mdio_base);
mdiobus_free(bus);
return 0;
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 5f5d4f7aa813..160735484465 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -843,7 +843,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
- enum dma_data_direction);
+ enum dma_data_direction, gfp_t gfp_flags);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index f7f65c4bf993..54e51c8221b8 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -766,9 +766,9 @@ static void gve_free_rings(struct gve_priv *priv)
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
- enum dma_data_direction dir)
+ enum dma_data_direction dir, gfp_t gfp_flags)
{
- *page = alloc_page(GFP_KERNEL);
+ *page = alloc_page(gfp_flags);
if (!*page) {
priv->page_alloc_fail++;
return -ENOMEM;
@@ -811,7 +811,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
for (i = 0; i < pages; i++) {
err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
&qpl->page_buses[i],
- gve_qpl_dma_dir(priv, id));
+ gve_qpl_dma_dir(priv, id), GFP_KERNEL);
/* caller handles clean up */
if (err)
return -ENOMEM;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 9ddcc497f48e..2068199445bd 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -86,7 +86,8 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
dma_addr_t dma;
int err;
- err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
+ err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
+ GFP_ATOMIC);
if (err)
return err;
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index beb8bb079023..8c939628e2d8 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv,
int err;
err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
- &buf_state->addr, DMA_FROM_DEVICE);
+ &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL);
if (err)
return err;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 7df87610ad96..21442a9bb996 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2043,8 +2043,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
break;
}
- if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER)
- hclgevf_enable_vector(&hdev->misc_vector, true);
+ hclgevf_enable_vector(&hdev->misc_vector, true);
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index c612ef526d16..3e7d7c4bafdc 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c
@@ -986,6 +986,7 @@ static int
ether1_probe(struct expansion_card *ec, const struct ecard_id *id)
{
struct net_device *dev;
+ u8 addr[ETH_ALEN];
int i, ret = 0;
ether1_banner();
@@ -1015,7 +1016,8 @@ ether1_probe(struct expansion_card *ec, const struct ecard_id *id)
}
for (i = 0; i < 6; i++)
- dev->dev_addr[i] = readb(IDPROM_ADDRESS + (i << 2));
+ addr[i] = readb(IDPROM_ADDRESS + (i << 2));
+ eth_hw_addr_set(dev, addr);
if (ether1_init_2(dev)) {
ret = -ENODEV;
diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index 27937c5d7956..daec9ce04531 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -117,9 +117,10 @@ static int sni_82596_probe(struct platform_device *dev)
netdevice->dev_addr[5] = readb(eth_addr + 0x06);
iounmap(eth_addr);
- if (!netdevice->irq) {
+ if (netdevice->irq < 0) {
printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n",
__FILE__, netdevice->base_addr);
+ retval = netdevice->irq;
goto probe_failed;
}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 59536bd5cab1..bda7a2a9d211 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2602,6 +2602,7 @@ static void __ibmvnic_reset(struct work_struct *work)
struct ibmvnic_rwi *rwi;
unsigned long flags;
u32 reset_state;
+ int num_fails = 0;
int rc = 0;
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
@@ -2655,11 +2656,23 @@ static void __ibmvnic_reset(struct work_struct *work)
rc = do_hard_reset(adapter, rwi, reset_state);
rtnl_unlock();
}
- if (rc) {
- /* give backing device time to settle down */
+ if (rc)
+ num_fails++;
+ else
+ num_fails = 0;
+
+ /* If auto-priority-failover is enabled we can get
+ * back to back failovers during resets, resulting
+ * in at least two failed resets (from high-priority
+ * backing device to low-priority one and then back)
+ * If resets continue to fail beyond that, give the
+ * adapter some time to settle down before retrying.
+ */
+ if (num_fails >= 3) {
netdev_dbg(adapter->netdev,
- "[S:%s] Hard reset failed, waiting 60 secs\n",
- adapter_state_to_string(adapter->state));
+ "[S:%s] Hard reset failed %d times, waiting 60 secs\n",
+ adapter_state_to_string(adapter->state),
+ num_fails);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(60 * HZ);
}
@@ -3844,11 +3857,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
struct device *dev = &adapter->vdev->dev;
union ibmvnic_crq crq;
int max_entries;
+ int cap_reqs;
+
+ /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on
+ * the PROMISC flag). Initialize this count upfront. When the tasklet
+ * receives a response to all of these, it will send the next protocol
+ * message (QUERY_IP_OFFLOAD).
+ */
+ if (!(adapter->netdev->flags & IFF_PROMISC) ||
+ adapter->promisc_supported)
+ cap_reqs = 7;
+ else
+ cap_reqs = 6;
if (!retry) {
/* Sub-CRQ entries are 32 byte long */
int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4);
+ atomic_set(&adapter->running_cap_crqs, cap_reqs);
+
if (adapter->min_tx_entries_per_subcrq > entries_page ||
adapter->min_rx_add_entries_per_subcrq > entries_page) {
dev_err(dev, "Fatal, invalid entries per sub-crq\n");
@@ -3909,44 +3936,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
adapter->opt_rx_comp_queues;
adapter->req_rx_add_queues = adapter->max_rx_add_queues;
+ } else {
+ atomic_add(cap_reqs, &adapter->running_cap_crqs);
}
-
memset(&crq, 0, sizeof(crq));
crq.request_capability.first = IBMVNIC_CRQ_CMD;
crq.request_capability.cmd = REQUEST_CAPABILITY;
crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability =
cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
crq.request_capability.number =
cpu_to_be64(adapter->req_tx_entries_per_subcrq);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability =
cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
crq.request_capability.number =
cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_MTU);
crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
if (adapter->netdev->flags & IFF_PROMISC) {
@@ -3954,16 +3982,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
crq.request_capability.capability =
cpu_to_be16(PROMISC_REQUESTED);
crq.request_capability.number = cpu_to_be64(1);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
}
} else {
crq.request_capability.capability =
cpu_to_be16(PROMISC_REQUESTED);
crq.request_capability.number = cpu_to_be64(0);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
}
+
+ /* Keep at end to catch any discrepancy between expected and actual
+ * CRQs sent.
+ */
+ WARN_ON(cap_reqs != 0);
}
static int pending_scrq(struct ibmvnic_adapter *adapter,
@@ -4357,118 +4390,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter)
static void send_query_cap(struct ibmvnic_adapter *adapter)
{
union ibmvnic_crq crq;
+ int cap_reqs;
+
+ /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count
+ * upfront. When the tasklet receives a response to all of these, it
+ * can send out the next protocol messaage (REQUEST_CAPABILITY).
+ */
+ cap_reqs = 25;
+
+ atomic_set(&adapter->running_cap_crqs, cap_reqs);
- atomic_set(&adapter->running_cap_crqs, 0);
memset(&crq, 0, sizeof(crq));
crq.query_capability.first = IBMVNIC_CRQ_CMD;
crq.query_capability.cmd = QUERY_CAPABILITY;
crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MIN_MTU);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_MTU);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ);
- atomic_inc(&adapter->running_cap_crqs);
+
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
+
+ /* Keep at end to catch any discrepancy between expected and actual
+ * CRQs sent.
+ */
+ WARN_ON(cap_reqs != 0);
}
static void send_query_ip_offload(struct ibmvnic_adapter *adapter)
@@ -4772,6 +4819,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
char *name;
atomic_dec(&adapter->running_cap_crqs);
+ netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n",
+ atomic_read(&adapter->running_cap_crqs));
switch (be16_to_cpu(crq->request_capability_rsp.capability)) {
case REQ_TX_QUEUES:
req_value = &adapter->req_tx_queues;
@@ -4835,10 +4884,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
}
/* Done receiving requested capabilities, query IP offload support */
- if (atomic_read(&adapter->running_cap_crqs) == 0) {
- adapter->wait_capability = false;
+ if (atomic_read(&adapter->running_cap_crqs) == 0)
send_query_ip_offload(adapter);
- }
}
static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
@@ -5136,10 +5183,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
}
out:
- if (atomic_read(&adapter->running_cap_crqs) == 0) {
- adapter->wait_capability = false;
+ if (atomic_read(&adapter->running_cap_crqs) == 0)
send_request_cap(adapter, 0);
- }
}
static int send_query_phys_parms(struct ibmvnic_adapter *adapter)
@@ -5435,33 +5480,21 @@ static void ibmvnic_tasklet(struct tasklet_struct *t)
struct ibmvnic_crq_queue *queue = &adapter->crq;
union ibmvnic_crq *crq;
unsigned long flags;
- bool done = false;
spin_lock_irqsave(&queue->lock, flags);
- while (!done) {
- /* Pull all the valid messages off the CRQ */
- while ((crq = ibmvnic_next_crq(adapter)) != NULL) {
- /* This barrier makes sure ibmvnic_next_crq()'s
- * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded
- * before ibmvnic_handle_crq()'s
- * switch(gen_crq->first) and switch(gen_crq->cmd).
- */
- dma_rmb();
- ibmvnic_handle_crq(crq, adapter);
- crq->generic.first = 0;
- }
- /* remain in tasklet until all
- * capabilities responses are received
+ /* Pull all the valid messages off the CRQ */
+ while ((crq = ibmvnic_next_crq(adapter)) != NULL) {
+ /* This barrier makes sure ibmvnic_next_crq()'s
+ * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded
+ * before ibmvnic_handle_crq()'s
+ * switch(gen_crq->first) and switch(gen_crq->cmd).
*/
- if (!adapter->wait_capability)
- done = true;
+ dma_rmb();
+ ibmvnic_handle_crq(crq, adapter);
+ crq->generic.first = 0;
}
- /* if capabilities CRQ's were sent in this tasklet, the following
- * tasklet must wait until all responses are received
- */
- if (atomic_read(&adapter->running_cap_crqs) != 0)
- adapter->wait_capability = true;
+
spin_unlock_irqrestore(&queue->lock, flags);
}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 4a8f36e0ab07..4a7a56ff74ce 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -919,7 +919,6 @@ struct ibmvnic_adapter {
int login_rsp_buf_sz;
atomic_t running_cap_crqs;
- bool wait_capability;
struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned;
struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 4d939af0a626..2e02cc68cd3f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -174,7 +174,6 @@ enum i40e_interrupt_policy {
struct i40e_lump_tracking {
u16 num_entries;
- u16 search_hint;
u16 list[0];
#define I40E_PILE_VALID_BIT 0x8000
#define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2)
@@ -848,12 +847,12 @@ struct i40e_vsi {
struct rtnl_link_stats64 net_stats_offsets;
struct i40e_eth_stats eth_stats;
struct i40e_eth_stats eth_stats_offsets;
- u32 tx_restart;
- u32 tx_busy;
+ u64 tx_restart;
+ u64 tx_busy;
u64 tx_linearize;
u64 tx_force_wb;
- u32 rx_buf_failed;
- u32 rx_page_failed;
+ u64 rx_buf_failed;
+ u64 rx_page_failed;
/* These are containers of ring pointers, allocated at run-time */
struct i40e_ring **rx_rings;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 2c1b1da1220e..1e57cc8c47d7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -240,7 +240,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
(unsigned long int)vsi->net_stats_offsets.rx_compressed,
(unsigned long int)vsi->net_stats_offsets.tx_compressed);
dev_info(&pf->pdev->dev,
- " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n",
+ " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n",
vsi->tx_restart, vsi->tx_busy,
vsi->rx_buf_failed, vsi->rx_page_failed);
rcu_read_lock();
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2a3d8aef7f4e..f70c478dafdb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -196,10 +196,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
* @id: an owner id to stick on the items assigned
*
* Returns the base item index of the lump, or negative for error
- *
- * The search_hint trick and lack of advanced fit-finding only work
- * because we're highly likely to have all the same size lump requests.
- * Linear search time and any fragmentation should be minimal.
**/
static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
u16 needed, u16 id)
@@ -214,8 +210,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
return -EINVAL;
}
- /* start the linear search with an imperfect hint */
- i = pile->search_hint;
+ /* Allocate last queue in the pile for FDIR VSI queue
+ * so it doesn't fragment the qp_pile
+ */
+ if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) {
+ if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
+ dev_err(&pf->pdev->dev,
+ "Cannot allocate queue %d for I40E_VSI_FDIR\n",
+ pile->num_entries - 1);
+ return -ENOMEM;
+ }
+ pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT;
+ return pile->num_entries - 1;
+ }
+
+ i = 0;
while (i < pile->num_entries) {
/* skip already allocated entries */
if (pile->list[i] & I40E_PILE_VALID_BIT) {
@@ -234,7 +243,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
for (j = 0; j < needed; j++)
pile->list[i+j] = id | I40E_PILE_VALID_BIT;
ret = i;
- pile->search_hint = i + j;
break;
}
@@ -257,7 +265,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
{
int valid_id = (id | I40E_PILE_VALID_BIT);
int count = 0;
- int i;
+ u16 i;
if (!pile || index >= pile->num_entries)
return -EINVAL;
@@ -269,8 +277,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
count++;
}
- if (count && index < pile->search_hint)
- pile->search_hint = index;
return count;
}
@@ -772,9 +778,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
struct rtnl_link_stats64 *ns; /* netdev stats */
struct i40e_eth_stats *oes;
struct i40e_eth_stats *es; /* device's eth stats */
- u32 tx_restart, tx_busy;
+ u64 tx_restart, tx_busy;
struct i40e_ring *p;
- u32 rx_page, rx_buf;
+ u64 rx_page, rx_buf;
u64 bytes, packets;
unsigned int start;
u64 tx_linearize;
@@ -10574,15 +10580,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
}
i40e_get_oem_version(&pf->hw);
- if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
- ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
- hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
- /* The following delay is necessary for 4.33 firmware and older
- * to recover after EMP reset. 200 ms should suffice but we
- * put here 300 ms to be sure that FW is ready to operate
- * after reset.
- */
- mdelay(300);
+ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) {
+ /* The following delay is necessary for firmware update. */
+ mdelay(1000);
}
/* re-verify the eeprom if we just had an EMP reset */
@@ -11792,7 +11792,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
return -ENOMEM;
pf->irq_pile->num_entries = vectors;
- pf->irq_pile->search_hint = 0;
/* track first vector for misc interrupts, ignore return */
(void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
@@ -12595,7 +12594,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
goto sw_init_done;
}
pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
- pf->qp_pile->search_hint = 0;
pf->tx_timeout_recovery_level = 1;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 8d0588a27a05..1908eed4fa5e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -413,6 +413,9 @@
#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
#define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1
#define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
#define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
#define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0
#define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index b785d09c19f8..dfdb6e786461 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1377,6 +1377,32 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
}
/**
+ * i40e_sync_vfr_reset
+ * @hw: pointer to hw struct
+ * @vf_id: VF identifier
+ *
+ * Before trigger hardware reset, we need to know if no other process has
+ * reserved the hardware for any reset operations. This check is done by
+ * examining the status of the RSTAT1 register used to signal the reset.
+ **/
+static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id)
+{
+ u32 reg;
+ int i;
+
+ for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) {
+ reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) &
+ I40E_VFINT_ICR0_ADMINQ_MASK;
+ if (reg)
+ return 0;
+
+ usleep_range(100, 200);
+ }
+
+ return -EAGAIN;
+}
+
+/**
* i40e_trigger_vf_reset
* @vf: pointer to the VF structure
* @flr: VFLR was issued or not
@@ -1390,9 +1416,11 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
struct i40e_pf *pf = vf->pf;
struct i40e_hw *hw = &pf->hw;
u32 reg, reg_idx, bit_idx;
+ bool vf_active;
+ u32 radq;
/* warn the VF */
- clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
+ vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
/* Disable VF's configuration API during reset. The flag is re-enabled
* in i40e_alloc_vf_res(), when it's safe again to access VF's VSI.
@@ -1406,7 +1434,19 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
* just need to clean up, so don't hit the VFRTRIG register.
*/
if (!flr) {
- /* reset VF using VPGEN_VFRTRIG reg */
+ /* Sync VFR reset before trigger next one */
+ radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) &
+ I40E_VFINT_ICR0_ADMINQ_MASK;
+ if (vf_active && !radq)
+ /* waiting for finish reset by virtual driver */
+ if (i40e_sync_vfr_reset(hw, vf->vf_id))
+ dev_info(&pf->pdev->dev,
+ "Reset VF %d never finished\n",
+ vf->vf_id);
+
+ /* Reset VF using VPGEN_VFRTRIG reg. It is also setting
+ * in progress state in rstat1 register.
+ */
reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK;
wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
@@ -2618,6 +2658,59 @@ error_param:
}
/**
+ * i40e_check_enough_queue - find big enough queue number
+ * @vf: pointer to the VF info
+ * @needed: the number of items needed
+ *
+ * Returns the base item index of the queue, or negative for error
+ **/
+static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed)
+{
+ unsigned int i, cur_queues, more, pool_size;
+ struct i40e_lump_tracking *pile;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi;
+
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ cur_queues = vsi->alloc_queue_pairs;
+
+ /* if current allocated queues are enough for need */
+ if (cur_queues >= needed)
+ return vsi->base_queue;
+
+ pile = pf->qp_pile;
+ if (cur_queues > 0) {
+ /* if the allocated queues are not zero
+ * just check if there are enough queues for more
+ * behind the allocated queues.
+ */
+ more = needed - cur_queues;
+ for (i = vsi->base_queue + cur_queues;
+ i < pile->num_entries; i++) {
+ if (pile->list[i] & I40E_PILE_VALID_BIT)
+ break;
+
+ if (more-- == 1)
+ /* there is enough */
+ return vsi->base_queue;
+ }
+ }
+
+ pool_size = 0;
+ for (i = 0; i < pile->num_entries; i++) {
+ if (pile->list[i] & I40E_PILE_VALID_BIT) {
+ pool_size = 0;
+ continue;
+ }
+ if (needed <= ++pool_size)
+ /* there is enough */
+ return i;
+ }
+
+ return -ENOMEM;
+}
+
+/**
* i40e_vc_request_queues_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2651,6 +2744,12 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
req_pairs - cur_pairs,
pf->queues_left);
vfres->num_queue_pairs = pf->queues_left + cur_pairs;
+ } else if (i40e_check_enough_queue(vf, req_pairs) < 0) {
+ dev_warn(&pf->pdev->dev,
+ "VF %d requested %d more queues, but there is not enough for it.\n",
+ vf->vf_id,
+ req_pairs - cur_pairs);
+ vfres->num_queue_pairs = cur_pairs;
} else {
/* successful request */
vf->num_req_queues = req_pairs;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 49575a640a84..03c42fd0fea1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -19,6 +19,7 @@
#define I40E_MAX_VF_PROMISC_FLAGS 3
#define I40E_VF_STATE_WAIT_COUNT 20
+#define I40E_VFR_WAIT_COUNT 100
/* Various queue ctrls */
enum i40e_queue_ctrl {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 186d00a9ab35..3631d612aaca 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1570,6 +1570,8 @@ static struct mac_ops cgx_mac_ops = {
.mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm,
.mac_pause_frm_config = cgx_lmac_pause_frm_config,
.mac_enadis_ptp_config = cgx_lmac_ptp_config,
+ .mac_rx_tx_enable = cgx_lmac_rx_tx_enable,
+ .mac_tx_enable = cgx_lmac_tx_enable,
};
static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
index fc6e7423cbd8..b33e7d1d0851 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
@@ -107,6 +107,9 @@ struct mac_ops {
void (*mac_enadis_ptp_config)(void *cgxd,
int lmac_id,
bool enable);
+
+ int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable);
+ int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable);
};
struct cgx {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 4e79e918a161..58e2aeebc14f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -732,6 +732,7 @@ enum nix_af_status {
NIX_AF_ERR_BANDPROF_INVAL_REQ = -428,
NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429,
NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430,
+ NIX_AF_ERR_LINK_CREDITS = -431,
};
/* For NIX RX vtag action */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index 0fe7ad35e36f..4180376fa676 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -185,7 +185,6 @@ enum npc_kpu_parser_state {
NPC_S_KPU2_QINQ,
NPC_S_KPU2_ETAG,
NPC_S_KPU2_EXDSA,
- NPC_S_KPU2_NGIO,
NPC_S_KPU2_CPT_CTAG,
NPC_S_KPU2_CPT_QINQ,
NPC_S_KPU3_CTAG,
@@ -212,6 +211,7 @@ enum npc_kpu_parser_state {
NPC_S_KPU5_NSH,
NPC_S_KPU5_CPT_IP,
NPC_S_KPU5_CPT_IP6,
+ NPC_S_KPU5_NGIO,
NPC_S_KPU6_IP6_EXT,
NPC_S_KPU6_IP6_HOP_DEST,
NPC_S_KPU6_IP6_ROUT,
@@ -1124,15 +1124,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
NPC_S_KPU1_ETHER, 0xff,
NPC_ETYPE_CTAG,
0xffff,
- NPC_ETYPE_NGIO,
- 0xffff,
- 0x0000,
- 0x0000,
- },
- {
- NPC_S_KPU1_ETHER, 0xff,
- NPC_ETYPE_CTAG,
- 0xffff,
NPC_ETYPE_CTAG,
0xffff,
0x0000,
@@ -1968,6 +1959,15 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
},
{
NPC_S_KPU2_CTAG, 0xff,
+ NPC_ETYPE_NGIO,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU2_CTAG, 0xff,
NPC_ETYPE_PPPOE,
0xffff,
0x0000,
@@ -2750,15 +2750,6 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
0x0000,
},
{
- NPC_S_KPU2_NGIO, 0xff,
- 0x0000,
- 0x0000,
- 0x0000,
- 0x0000,
- 0x0000,
- 0x0000,
- },
- {
NPC_S_KPU2_CPT_CTAG, 0xff,
NPC_ETYPE_IP,
0xffff,
@@ -5090,6 +5081,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
0x0000,
},
{
+ NPC_S_KPU5_NGIO, 0xff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
NPC_S_NA, 0X00,
0x0000,
0x0000,
@@ -8425,14 +8425,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 12, 0, 0, 0,
- NPC_S_KPU2_NGIO, 12, 1,
- NPC_LID_LA, NPC_LT_LA_ETHER,
- 0,
- 0, 0, 0, 0,
- },
- {
- NPC_ERRLEV_RE, NPC_EC_NOERR,
- 8, 12, 0, 0, 0,
NPC_S_KPU2_CTAG2, 12, 1,
NPC_LID_LA, NPC_LT_LA_ETHER,
NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
@@ -9196,6 +9188,14 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 2, 0,
+ NPC_S_KPU5_NGIO, 6, 1,
+ NPC_LID_LB, NPC_LT_LB_CTAG,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 0, 6, 2, 0,
NPC_S_KPU5_IP, 14, 1,
NPC_LID_LB, NPC_LT_LB_PPPOE,
@@ -9892,14 +9892,6 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 0, 0, 0, 0, 1,
- NPC_S_NA, 0, 1,
- NPC_LID_LC, NPC_LT_LC_NGIO,
- 0,
- 0, 0, 0, 0,
- },
- {
- NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 0, 6, 2, 0,
NPC_S_KPU5_CPT_IP, 6, 1,
NPC_LID_LB, NPC_LT_LB_CTAG,
@@ -11974,6 +11966,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
0, 0, 0, 0,
},
{
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 1,
+ NPC_S_NA, 0, 1,
+ NPC_LID_LC, NPC_LT_LC_NGIO,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
NPC_ERRLEV_LC, NPC_EC_UNK,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index e695fa0e82a9..9ea2f6ac38ec 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -30,6 +30,8 @@ static struct mac_ops rpm_mac_ops = {
.mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm,
.mac_pause_frm_config = rpm_lmac_pause_frm_config,
.mac_enadis_ptp_config = rpm_lmac_ptp_config,
+ .mac_rx_tx_enable = rpm_lmac_rx_tx_enable,
+ .mac_tx_enable = rpm_lmac_tx_enable,
};
struct mac_ops *rpm_get_mac_ops(void)
@@ -54,6 +56,43 @@ int rpm_get_nr_lmacs(void *rpmd)
return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL);
}
+int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable)
+{
+ rpm_t *rpm = rpmd;
+ u64 cfg, last;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ last = cfg;
+ if (enable)
+ cfg |= RPM_TX_EN;
+ else
+ cfg &= ~(RPM_TX_EN);
+
+ if (cfg != last)
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ return !!(last & RPM_TX_EN);
+}
+
+int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable)
+{
+ rpm_t *rpm = rpmd;
+ u64 cfg;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ if (enable)
+ cfg |= RPM_RX_EN | RPM_TX_EN;
+ else
+ cfg &= ~(RPM_RX_EN | RPM_TX_EN);
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ return 0;
+}
+
void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable)
{
rpm_t *rpm = rpmd;
@@ -252,23 +291,20 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
if (!rpm || lmac_id >= rpm->lmac_count)
return -ENODEV;
lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id);
- if (lmac_type == LMAC_MODE_100G_R) {
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
-
- if (enable)
- cfg |= RPMX_MTI_PCS_LBK;
- else
- cfg &= ~RPMX_MTI_PCS_LBK;
- rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
- } else {
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1);
- if (enable)
- cfg |= RPMX_MTI_PCS_LBK;
- else
- cfg &= ~RPMX_MTI_PCS_LBK;
- rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg);
+
+ if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) {
+ dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n");
+ return 0;
}
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
+
+ if (enable)
+ cfg |= RPMX_MTI_PCS_LBK;
+ else
+ cfg &= ~RPMX_MTI_PCS_LBK;
+ rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
+
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
index 57c8a687b488..ff580311edd0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
@@ -43,6 +43,8 @@
#define RPMX_MTI_STAT_DATA_HI_CDC 0x10038
#define RPM_LMAC_FWI 0xa
+#define RPM_TX_EN BIT_ULL(0)
+#define RPM_RX_EN BIT_ULL(1)
/* Function Declarations */
int rpm_get_nr_lmacs(void *rpmd);
@@ -57,4 +59,6 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat);
int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat);
void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable);
+int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable);
+int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable);
#endif /* RPM_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 3ca6b942ebe2..54e1b27a7dfe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
- if (err)
- dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
+ if (err) {
+ dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr);
+ while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY)
+ ;
+ }
}
static void rvu_reset_all_blocks(struct rvu *rvu)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 66e45d733824..5ed94cfb47d2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -806,6 +806,7 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
u32 rvu_cgx_get_fifolen(struct rvu *rvu);
void *rvu_first_cgx_pdata(struct rvu *rvu);
int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id);
+int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable);
int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
int type);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 2ca182a4ce82..8a7ac5a8b821 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -441,16 +441,26 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable)
int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
{
int pf = rvu_get_pf(pcifunc);
+ struct mac_ops *mac_ops;
u8 cgx_id, lmac_id;
+ void *cgxd;
if (!is_cgx_config_permitted(rvu, pcifunc))
return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+ mac_ops = get_mac_ops(cgxd);
+
+ return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start);
+}
- cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start);
+int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable)
+{
+ struct mac_ops *mac_ops;
- return 0;
+ mac_ops = get_mac_ops(cgxd);
+ return mac_ops->mac_tx_enable(cgxd, lmac_id, enable);
}
void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index a09a507369ac..d1eddb769a41 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -1224,6 +1224,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m,
seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n",
sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld);
+ seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n",
+ sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend);
seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb);
seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb);
seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d8b1948aaa0a..97fb61915379 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -512,11 +512,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
lmac_chan_cnt = cfg & 0xFF;
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
- sdp_chan_cnt = cfg & 0xFFF;
-
cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+ sdp_chan_cnt = cfg & 0xFFF;
sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
@@ -2068,8 +2068,8 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
/* enable cgx tx if disabled */
if (is_pf_cgxmapped(rvu, pf)) {
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
- restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
- lmac_id, true);
+ restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
+ lmac_id, true);
}
cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
@@ -2092,7 +2092,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
rvu_cgx_enadis_rx_bp(rvu, pf, true);
/* restore cgx tx state */
if (restore_tx_en)
- cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+ rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
return err;
}
@@ -3878,7 +3878,7 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
/* Enable cgx tx if disabled for credits to be back */
if (is_pf_cgxmapped(rvu, pf)) {
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
- restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
+ restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
lmac_id, true);
}
@@ -3891,8 +3891,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
}
- rc = -EBUSY;
- poll_tmo = jiffies + usecs_to_jiffies(10000);
+ rc = NIX_AF_ERR_LINK_CREDITS;
+ poll_tmo = jiffies + usecs_to_jiffies(200000);
/* Wait for credits to return */
do {
if (time_after(jiffies, poll_tmo))
@@ -3918,7 +3918,7 @@ exit:
/* Restore state of cgx tx */
if (restore_tx_en)
- cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+ rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
mutex_unlock(&rvu->rsrc_lock);
return rc;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index c0005a1feee6..91f86d77cd41 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, int index, struct mcam_entry *entry,
bool *enable)
{
+ struct rvu_npc_mcam_rule *rule;
u16 owner, target_func;
struct rvu_pfvf *pfvf;
u64 rx_action;
@@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
*enable = false;
+ /* fix up not needed for the rules added by user(ntuple filters) */
+ list_for_each_entry(rule, &mcam->mcam_rules, list) {
+ if (rule->entry == index)
+ return;
+ }
+
/* copy VF default entry action to the VF mcam entry */
rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
target_func);
@@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
}
/* PF installing VF rule */
- if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries)
- npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable);
+ if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries)
+ npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable);
/* Set 'action' */
rvu_write64(rvu, blkaddr,
@@ -916,7 +923,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, u16 pcifunc, u64 rx_action)
{
int actindex, index, bank, entry;
- bool enable;
+ struct rvu_npc_mcam_rule *rule;
+ bool enable, update;
if (!(pcifunc & RVU_PFVF_FUNC_MASK))
return;
@@ -924,6 +932,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
mutex_lock(&mcam->lock);
for (index = 0; index < mcam->bmap_entries; index++) {
if (mcam->entry2target_pffunc[index] == pcifunc) {
+ update = true;
+ /* update not needed for the rules added via ntuple filters */
+ list_for_each_entry(rule, &mcam->mcam_rules, list) {
+ if (rule->entry == index)
+ update = false;
+ }
+ if (!update)
+ continue;
bank = npc_get_bank(mcam, index);
actindex = index;
entry = index & (mcam->banksize - 1);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index ff2b21999f36..19c53e591d0d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1098,14 +1098,6 @@ find_rule:
write_req.cntr = rule->cntr;
}
- err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
- &write_rsp);
- if (err) {
- rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
- if (new)
- kfree(rule);
- return err;
- }
/* update rule */
memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
@@ -1132,6 +1124,18 @@ find_rule:
if (req->default_rule)
pfvf->def_ucast_rule = rule;
+ /* write to mcam entry registers */
+ err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
+ &write_rsp);
+ if (err) {
+ rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
+ if (new) {
+ list_del(&rule->list);
+ kfree(rule);
+ }
+ return err;
+ }
+
/* VF's MAC address is being changed via PF */
if (pf_set_vfs_mac) {
ether_addr_copy(pfvf->default_mac, req->packet.dmac);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 61e52812983f..14509fc64cce 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -603,6 +603,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
size++;
tar_addr |= ((size - 1) & 0x7) << 4;
}
+ dma_wmb();
memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
/* Perform LMTST flush */
cn10k_lmt_flush(val, tar_addr);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 6080ebd9bd94..d39341e4ab37 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -394,7 +394,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
dst_mdev->msg_size = mbox_hdr->msg_size;
dst_mdev->num_msgs = num_msgs;
err = otx2_sync_mbox_msg(dst_mbox);
- if (err) {
+ /* Error code -EIO indicate there is a communication failure
+ * to the AF. Rest of the error codes indicate that AF processed
+ * VF messages and set the error codes in response messages
+ * (if any) so simply forward responses to VF.
+ */
+ if (err == -EIO) {
dev_warn(pf->dev,
"AF not responding to VF%d messages\n", vf);
/* restore PF mbase and exit */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index a0a5a8e6bd8c..2fd9ef2fe5d6 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -283,7 +283,6 @@ struct prestera_router {
struct list_head rif_entry_list;
struct notifier_block inetaddr_nb;
struct notifier_block inetaddr_valid_nb;
- bool aborted;
};
struct prestera_rxtx_params {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 51fc841b1e7a..e6bfadc874c5 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -1831,8 +1831,8 @@ static int prestera_iface_to_msg(struct prestera_iface *iface,
int prestera_hw_rif_create(struct prestera_switch *sw,
struct prestera_iface *iif, u8 *mac, u16 *rif_id)
{
- struct prestera_msg_rif_req req;
struct prestera_msg_rif_resp resp;
+ struct prestera_msg_rif_req req;
int err;
memcpy(req.mac, mac, ETH_ALEN);
@@ -1868,9 +1868,9 @@ int prestera_hw_rif_delete(struct prestera_switch *sw, u16 rif_id,
int prestera_hw_vr_create(struct prestera_switch *sw, u16 *vr_id)
{
- int err;
struct prestera_msg_vr_resp resp;
struct prestera_msg_vr_req req;
+ int err;
err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ROUTER_VR_CREATE,
&req.cmd, sizeof(req), &resp.ret, sizeof(resp));
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 08fdd1e50388..cad93f747d0c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -982,6 +982,7 @@ static void prestera_switch_fini(struct prestera_switch *sw)
prestera_event_handlers_unregister(sw);
prestera_rxtx_switch_fini(sw);
prestera_switchdev_fini(sw);
+ prestera_router_fini(sw);
prestera_netdev_event_handler_unregister(sw);
prestera_hw_switch_fini(sw);
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index 8a3b7b664358..6ef4d32b8fdd 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -25,10 +25,10 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev,
struct netlink_ext_ack *extack)
{
struct prestera_port *port = netdev_priv(port_dev);
- int err;
- struct prestera_rif_entry *re;
struct prestera_rif_entry_key re_key = {};
+ struct prestera_rif_entry *re;
u32 kern_tb_id;
+ int err;
err = prestera_is_valid_mac_addr(port, port_dev->dev_addr);
if (err) {
@@ -45,21 +45,21 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev,
switch (event) {
case NETDEV_UP:
if (re) {
- NL_SET_ERR_MSG_MOD(extack, "rif_entry already exist");
+ NL_SET_ERR_MSG_MOD(extack, "RIF already exist");
return -EEXIST;
}
re = prestera_rif_entry_create(port->sw, &re_key,
prestera_fix_tb_id(kern_tb_id),
port_dev->dev_addr);
if (!re) {
- NL_SET_ERR_MSG_MOD(extack, "Can't create rif_entry");
+ NL_SET_ERR_MSG_MOD(extack, "Can't create RIF");
return -EINVAL;
}
dev_hold(port_dev);
break;
case NETDEV_DOWN:
if (!re) {
- NL_SET_ERR_MSG_MOD(extack, "rif_entry not exist");
+ NL_SET_ERR_MSG_MOD(extack, "Can't find RIF");
return -EEXIST;
}
prestera_rif_entry_destroy(port->sw, re);
@@ -75,11 +75,11 @@ static int __prestera_inetaddr_event(struct prestera_switch *sw,
unsigned long event,
struct netlink_ext_ack *extack)
{
- if (prestera_netdev_check(dev) && !netif_is_bridge_port(dev) &&
- !netif_is_lag_port(dev) && !netif_is_ovs_port(dev))
- return __prestera_inetaddr_port_event(dev, event, extack);
+ if (!prestera_netdev_check(dev) || netif_is_bridge_port(dev) ||
+ netif_is_lag_port(dev) || netif_is_ovs_port(dev))
+ return 0;
- return 0;
+ return __prestera_inetaddr_port_event(dev, event, extack);
}
static int __prestera_inetaddr_cb(struct notifier_block *nb,
@@ -126,6 +126,8 @@ static int __prestera_inetaddr_valid_cb(struct notifier_block *nb,
goto out;
if (ipv4_is_multicast(ivi->ivi_addr)) {
+ NL_SET_ERR_MSG_MOD(ivi->extack,
+ "Multicast addr on RIF is not supported");
err = -EINVAL;
goto out;
}
@@ -166,7 +168,7 @@ int prestera_router_init(struct prestera_switch *sw)
err_register_inetaddr_notifier:
unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
err_register_inetaddr_validator_notifier:
- /* prestera_router_hw_fini */
+ prestera_router_hw_fini(sw);
err_router_lib_init:
kfree(sw->router);
return err;
@@ -176,7 +178,7 @@ void prestera_router_fini(struct prestera_switch *sw)
{
unregister_inetaddr_notifier(&sw->router->inetaddr_nb);
unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb);
- /* router_hw_fini */
+ prestera_router_hw_fini(sw);
kfree(sw->router);
sw->router = NULL;
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
index 5866a4be50f5..e5592b69ad37 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
@@ -29,6 +29,12 @@ int prestera_router_hw_init(struct prestera_switch *sw)
return 0;
}
+void prestera_router_hw_fini(struct prestera_switch *sw)
+{
+ WARN_ON(!list_empty(&sw->router->vr_list));
+ WARN_ON(!list_empty(&sw->router->rif_entry_list));
+}
+
static struct prestera_vr *__prestera_vr_find(struct prestera_switch *sw,
u32 tb_id)
{
@@ -47,13 +53,8 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw,
struct netlink_ext_ack *extack)
{
struct prestera_vr *vr;
- u16 hw_vr_id;
int err;
- err = prestera_hw_vr_create(sw, &hw_vr_id);
- if (err)
- return ERR_PTR(-ENOMEM);
-
vr = kzalloc(sizeof(*vr), GFP_KERNEL);
if (!vr) {
err = -ENOMEM;
@@ -61,23 +62,26 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw,
}
vr->tb_id = tb_id;
- vr->hw_vr_id = hw_vr_id;
+
+ err = prestera_hw_vr_create(sw, &vr->hw_vr_id);
+ if (err)
+ goto err_hw_create;
list_add(&vr->router_node, &sw->router->vr_list);
return vr;
-err_alloc_vr:
- prestera_hw_vr_delete(sw, hw_vr_id);
+err_hw_create:
kfree(vr);
+err_alloc_vr:
return ERR_PTR(err);
}
static void __prestera_vr_destroy(struct prestera_switch *sw,
struct prestera_vr *vr)
{
- prestera_hw_vr_delete(sw, vr->hw_vr_id);
list_del(&vr->router_node);
+ prestera_hw_vr_delete(sw, vr->hw_vr_id);
kfree(vr);
}
@@ -87,17 +91,22 @@ static struct prestera_vr *prestera_vr_get(struct prestera_switch *sw, u32 tb_id
struct prestera_vr *vr;
vr = __prestera_vr_find(sw, tb_id);
- if (!vr)
+ if (vr) {
+ refcount_inc(&vr->refcount);
+ } else {
vr = __prestera_vr_create(sw, tb_id, extack);
- if (IS_ERR(vr))
- return ERR_CAST(vr);
+ if (IS_ERR(vr))
+ return ERR_CAST(vr);
+
+ refcount_set(&vr->refcount, 1);
+ }
return vr;
}
static void prestera_vr_put(struct prestera_switch *sw, struct prestera_vr *vr)
{
- if (!vr->ref_cnt)
+ if (refcount_dec_and_test(&vr->refcount))
__prestera_vr_destroy(sw, vr);
}
@@ -120,7 +129,7 @@ __prestera_rif_entry_key_copy(const struct prestera_rif_entry_key *in,
out->iface.vlan_id = in->iface.vlan_id;
break;
default:
- pr_err("Unsupported iface type");
+ WARN(1, "Unsupported iface type");
return -EINVAL;
}
@@ -158,7 +167,6 @@ void prestera_rif_entry_destroy(struct prestera_switch *sw,
iface.vr_id = e->vr->hw_vr_id;
prestera_hw_rif_delete(sw, e->hw_id, &iface);
- e->vr->ref_cnt--;
prestera_vr_put(sw, e->vr);
kfree(e);
}
@@ -183,7 +191,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
if (IS_ERR(e->vr))
goto err_vr_get;
- e->vr->ref_cnt++;
memcpy(&e->addr, addr, sizeof(e->addr));
/* HW */
@@ -198,7 +205,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
return e;
err_hw_create:
- e->vr->ref_cnt--;
prestera_vr_put(sw, e->vr);
err_vr_get:
err_key_copy:
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
index fed53595f7bb..b6b028551868 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
@@ -6,7 +6,7 @@
struct prestera_vr {
struct list_head router_node;
- unsigned int ref_cnt;
+ refcount_t refcount;
u32 tb_id; /* key (kernel fib table id) */
u16 hw_vr_id; /* virtual router ID */
u8 __pad[2];
@@ -32,5 +32,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
struct prestera_rif_entry_key *k,
u32 tb_id, const unsigned char *addr);
int prestera_router_hw_init(struct prestera_switch *sw);
+void prestera_router_hw_fini(struct prestera_switch *sw);
#endif /* _PRESTERA_ROUTER_HW_H_ */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b67b4323cff0..f02d07ec5ccb 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -267,7 +267,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
phylink_config);
struct mtk_eth *eth = mac->hw;
u32 mcr_cur, mcr_new, sid, i;
- int val, ge_mode, err;
+ int val, ge_mode, err = 0;
/* MT76x8 has no hardware settings between for the MAC */
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 33815246fead..378fc8e3bd97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
+#include <net/inet_ecn.h>
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
@@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index ca5f1177963d..ce5970bdcc6a 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -40,11 +40,12 @@ static int lan966x_mac_wait_for_completion(struct lan966x *lan966x)
{
u32 val;
- return readx_poll_timeout(lan966x_mac_get_status,
- lan966x, val,
- (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) ==
- MACACCESS_CMD_IDLE,
- TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
+ return readx_poll_timeout_atomic(lan966x_mac_get_status,
+ lan966x, val,
+ (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) ==
+ MACACCESS_CMD_IDLE,
+ TABLE_UPDATE_SLEEP_US,
+ TABLE_UPDATE_TIMEOUT_US);
}
static void lan966x_mac_select(struct lan966x *lan966x,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 2cb70da63db3..1f60fd125a1d 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -182,9 +182,9 @@ static int lan966x_port_inj_ready(struct lan966x *lan966x, u8 grp)
{
u32 val;
- return readx_poll_timeout(lan966x_port_inj_status, lan966x, val,
- QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp),
- READL_SLEEP_US, READL_TIMEOUT_US);
+ return readx_poll_timeout_atomic(lan966x_port_inj_status, lan966x, val,
+ QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp),
+ READL_SLEEP_US, READL_TIMEOUT_US);
}
static int lan966x_port_ifh_xmit(struct sk_buff *skb,
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index b1311b656e17..455293aa6343 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -771,7 +771,10 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
- ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
+ /* Don't attempt to send PAUSE frames on the NPI port, it's broken */
+ if (port != ocelot->npi)
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA,
+ tx_pause);
/* Undo the effects of ocelot_phylink_mac_link_down:
* enable MAC module
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index beb9379424c0..949858891973 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -559,13 +559,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
return -EOPNOTSUPP;
}
- if (filter->block_id == VCAP_IS1 &&
- !is_zero_ether_addr(match.mask->dst)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Key type S1_NORMAL cannot match on destination MAC");
- return -EOPNOTSUPP;
- }
-
/* The hw support mac matches only for MAC_ETYPE key,
* therefore if other matches(port, tcp flags, etc) are added
* then just bail out
@@ -580,6 +573,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
return -EOPNOTSUPP;
flow_rule_match_eth_addrs(rule, &match);
+
+ if (filter->block_id == VCAP_IS1 &&
+ !is_zero_ether_addr(match.mask->dst)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Key type S1_NORMAL cannot match on destination MAC");
+ return -EOPNOTSUPP;
+ }
+
filter->key_type = OCELOT_VCAP_KEY_ETYPE;
ether_addr_copy(filter->key.etype.dmac.value,
match.key->dst);
@@ -805,13 +806,34 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
struct netlink_ext_ack *extack = f->common.extack;
struct ocelot_vcap_filter *filter;
int chain = f->common.chain_index;
- int ret;
+ int block_id, ret;
if (chain && !ocelot_find_vcap_filter_that_points_at(ocelot, chain)) {
NL_SET_ERR_MSG_MOD(extack, "No default GOTO action points to this chain");
return -EOPNOTSUPP;
}
+ block_id = ocelot_chain_to_block(chain, ingress);
+ if (block_id < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot offload to this chain");
+ return -EOPNOTSUPP;
+ }
+
+ filter = ocelot_vcap_block_find_filter_by_id(&ocelot->block[block_id],
+ f->cookie, true);
+ if (filter) {
+ /* Filter already exists on other ports */
+ if (!ingress) {
+ NL_SET_ERR_MSG_MOD(extack, "VCAP ES0 does not support shared filters");
+ return -EOPNOTSUPP;
+ }
+
+ filter->ingress_port_mask |= BIT(port);
+
+ return ocelot_vcap_filter_replace(ocelot, filter);
+ }
+
+ /* Filter didn't exist, create it now */
filter = ocelot_vcap_filter_create(ocelot, port, ingress, f);
if (!filter)
return -ENOMEM;
@@ -874,6 +896,12 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port,
if (filter->type == OCELOT_VCAP_FILTER_DUMMY)
return ocelot_vcap_dummy_filter_del(ocelot, filter);
+ if (ingress) {
+ filter->ingress_port_mask &= ~BIT(port);
+ if (filter->ingress_port_mask)
+ return ocelot_vcap_filter_replace(ocelot, filter);
+ }
+
return ocelot_vcap_filter_del(ocelot, filter);
}
EXPORT_SYMBOL_GPL(ocelot_cls_flower_destroy);
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 8115c3db252e..e271b6225b72 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1187,7 +1187,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
ocelot_port_bridge_join(ocelot, port, bridge);
err = switchdev_bridge_port_offload(brport_dev, dev, priv,
- &ocelot_netdevice_nb,
+ &ocelot_switchdev_nb,
&ocelot_switchdev_blocking_nb,
false, extack);
if (err)
@@ -1201,7 +1201,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
err_switchdev_sync:
switchdev_bridge_port_unoffload(brport_dev, priv,
- &ocelot_netdevice_nb,
+ &ocelot_switchdev_nb,
&ocelot_switchdev_blocking_nb);
err_switchdev_offload:
ocelot_port_bridge_leave(ocelot, port, bridge);
@@ -1214,7 +1214,7 @@ static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev,
struct ocelot_port_private *priv = netdev_priv(dev);
switchdev_bridge_port_unoffload(brport_dev, priv,
- &ocelot_netdevice_nb,
+ &ocelot_switchdev_nb,
&ocelot_switchdev_blocking_nb);
}
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 16a4cbae9326..c672f92d65e9 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -749,6 +749,7 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id)
const struct ether3_data *data = id->data;
struct net_device *dev;
int bus_type, ret;
+ u8 addr[ETH_ALEN];
ether3_banner();
@@ -776,7 +777,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id)
priv(dev)->seeq = priv(dev)->base + data->base_offset;
dev->irq = ec->irq;
- ether3_addr(dev->dev_addr, ec);
+ ether3_addr(addr, ec);
+ eth_hw_addr_set(dev, addr);
priv(dev)->dev = dev;
timer_setup(&priv(dev)->timer, ether3_ledoff, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index adfeb8d3293d..62a69a91ab22 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -12,6 +12,7 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/mfd/syscon.h>
@@ -48,46 +49,60 @@
#define DWMAC_RX_VARDELAY(d) ((d) << DWMAC_RX_VARDELAY_SHIFT)
#define DWMAC_RXN_VARDELAY(d) ((d) << DWMAC_RXN_VARDELAY_SHIFT)
+struct oxnas_dwmac;
+
+struct oxnas_dwmac_data {
+ int (*setup)(struct oxnas_dwmac *dwmac);
+};
+
struct oxnas_dwmac {
struct device *dev;
struct clk *clk;
struct regmap *regmap;
+ const struct oxnas_dwmac_data *data;
};
-static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
+static int oxnas_dwmac_setup_ox810se(struct oxnas_dwmac *dwmac)
{
- struct oxnas_dwmac *dwmac = priv;
unsigned int value;
int ret;
- /* Reset HW here before changing the glue configuration */
- ret = device_reset(dwmac->dev);
- if (ret)
+ ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
+ if (ret < 0)
return ret;
- ret = clk_prepare_enable(dwmac->clk);
- if (ret)
- return ret;
+ /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
+ value |= BIT(DWMAC_CKEN_GTX) |
+ /* Use simple mux for 25/125 Mhz clock switching */
+ BIT(DWMAC_SIMPLE_MUX);
+
+ regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
+
+ return 0;
+}
+
+static int oxnas_dwmac_setup_ox820(struct oxnas_dwmac *dwmac)
+{
+ unsigned int value;
+ int ret;
ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
- if (ret < 0) {
- clk_disable_unprepare(dwmac->clk);
+ if (ret < 0)
return ret;
- }
/* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
value |= BIT(DWMAC_CKEN_GTX) |
/* Use simple mux for 25/125 Mhz clock switching */
- BIT(DWMAC_SIMPLE_MUX) |
- /* set auto switch tx clock source */
- BIT(DWMAC_AUTO_TX_SOURCE) |
- /* enable tx & rx vardelay */
- BIT(DWMAC_CKEN_TX_OUT) |
- BIT(DWMAC_CKEN_TXN_OUT) |
- BIT(DWMAC_CKEN_TX_IN) |
- BIT(DWMAC_CKEN_RX_OUT) |
- BIT(DWMAC_CKEN_RXN_OUT) |
- BIT(DWMAC_CKEN_RX_IN);
+ BIT(DWMAC_SIMPLE_MUX) |
+ /* set auto switch tx clock source */
+ BIT(DWMAC_AUTO_TX_SOURCE) |
+ /* enable tx & rx vardelay */
+ BIT(DWMAC_CKEN_TX_OUT) |
+ BIT(DWMAC_CKEN_TXN_OUT) |
+ BIT(DWMAC_CKEN_TX_IN) |
+ BIT(DWMAC_CKEN_RX_OUT) |
+ BIT(DWMAC_CKEN_RXN_OUT) |
+ BIT(DWMAC_CKEN_RX_IN);
regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
/* set tx & rx vardelay */
@@ -100,6 +115,27 @@ static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
return 0;
}
+static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
+{
+ struct oxnas_dwmac *dwmac = priv;
+ int ret;
+
+ /* Reset HW here before changing the glue configuration */
+ ret = device_reset(dwmac->dev);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(dwmac->clk);
+ if (ret)
+ return ret;
+
+ ret = dwmac->data->setup(dwmac);
+ if (ret)
+ clk_disable_unprepare(dwmac->clk);
+
+ return ret;
+}
+
static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv)
{
struct oxnas_dwmac *dwmac = priv;
@@ -128,6 +164,12 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
goto err_remove_config_dt;
}
+ dwmac->data = (const struct oxnas_dwmac_data *)of_device_get_match_data(&pdev->dev);
+ if (!dwmac->data) {
+ ret = -EINVAL;
+ goto err_remove_config_dt;
+ }
+
dwmac->dev = &pdev->dev;
plat_dat->bsp_priv = dwmac;
plat_dat->init = oxnas_dwmac_init;
@@ -166,8 +208,23 @@ err_remove_config_dt:
return ret;
}
+static const struct oxnas_dwmac_data ox810se_dwmac_data = {
+ .setup = oxnas_dwmac_setup_ox810se,
+};
+
+static const struct oxnas_dwmac_data ox820_dwmac_data = {
+ .setup = oxnas_dwmac_setup_ox820,
+};
+
static const struct of_device_id oxnas_dwmac_match[] = {
- { .compatible = "oxsemi,ox820-dwmac" },
+ {
+ .compatible = "oxsemi,ox810se-dwmac",
+ .data = &ox810se_dwmac_data,
+ },
+ {
+ .compatible = "oxsemi,ox820-dwmac",
+ .data = &ox820_dwmac_data,
+ },
{ }
};
MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 617d0e4c6495..09644ab0d87a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -756,7 +756,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv)
if (err) {
dev_err(priv->device, "EMAC reset timeout\n");
- return -EFAULT;
+ return err;
}
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
index e2e0f977875d..dde5b772a5af 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -22,21 +22,21 @@
#define ETHER_CLK_SEL_RMII_CLK_EN BIT(2)
#define ETHER_CLK_SEL_RMII_CLK_RST BIT(3)
#define ETHER_CLK_SEL_DIV_SEL_2 BIT(4)
-#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0)
+#define ETHER_CLK_SEL_DIV_SEL_20 0
#define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8))
#define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9)
#define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8)
#define ETHER_CLK_SEL_FREQ_SEL_2P5M 0
-#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0)
+#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0
#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10)
#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11)
-#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN BIT(0)
+#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN 0
#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12)
#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13)
-#define ETHER_CLK_SEL_TX_CLK_O_TX_I BIT(0)
+#define ETHER_CLK_SEL_TX_CLK_O_TX_I 0
#define ETHER_CLK_SEL_TX_CLK_O_RMII_I BIT(14)
#define ETHER_CLK_SEL_TX_O_E_N_IN BIT(15)
-#define ETHER_CLK_SEL_RMII_CLK_SEL_IN BIT(0)
+#define ETHER_CLK_SEL_RMII_CLK_SEL_IN 0
#define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C BIT(16)
#define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN)
@@ -96,31 +96,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
val |= ETHER_CLK_SEL_TX_O_E_N_IN;
writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ /* Set Clock-Mux, Start clock, Set TX_O direction */
switch (dwmac->phy_intf_sel) {
case ETHER_CONFIG_INTF_RGMII:
val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
break;
case ETHER_CONFIG_INTF_RMII:
val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV |
- ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN |
+ ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
ETHER_CLK_SEL_RMII_CLK_SEL_RX_C;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RMII_CLK_RST;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
break;
case ETHER_CONFIG_INTF_MII:
default:
val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC |
- ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
- ETHER_CLK_SEL_RMII_CLK_EN;
+ ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
break;
}
- /* Start clock */
- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
- val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
-
- val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
-
spin_unlock_irqrestore(&dwmac->lock, flags);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 40b5ed94cb54..5b195d5051d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -194,7 +194,6 @@ struct stmmac_priv {
u32 tx_coal_timer[MTL_MAX_TX_QUEUES];
u32 rx_coal_frames[MTL_MAX_TX_QUEUES];
- int tx_coalesce;
int hwts_tx_en;
bool tx_path_in_lpi_mode;
bool tso;
@@ -229,7 +228,6 @@ struct stmmac_priv {
unsigned int flow_ctrl;
unsigned int pause;
struct mii_bus *mii;
- int mii_irq[PHY_MAX_ADDR];
struct phylink_config phylink_config;
struct phylink *phylink;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 63ff2dad8c85..639a753266e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -402,7 +402,7 @@ static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en)
* Description: this function is to verify and enter in LPI mode in case of
* EEE.
*/
-static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
+static int stmmac_enable_eee_mode(struct stmmac_priv *priv)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
u32 queue;
@@ -412,13 +412,14 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
if (tx_q->dirty_tx != tx_q->cur_tx)
- return; /* still unfinished work */
+ return -EBUSY; /* still unfinished work */
}
/* Check and enter in LPI mode */
if (!priv->tx_path_in_lpi_mode)
stmmac_set_eee_mode(priv, priv->hw,
priv->plat->en_tx_lpi_clockgating);
+ return 0;
}
/**
@@ -450,8 +451,8 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
{
struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer);
- stmmac_enable_eee_mode(priv);
- mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+ if (stmmac_enable_eee_mode(priv))
+ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
}
/**
@@ -889,6 +890,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
int ret;
+ if (priv->plat->ptp_clk_freq_config)
+ priv->plat->ptp_clk_freq_config(priv);
+
ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
if (ret)
return ret;
@@ -911,8 +915,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
priv->hwts_tx_en = 0;
priv->hwts_rx_en = 0;
- stmmac_ptp_register(priv);
-
return 0;
}
@@ -2647,8 +2649,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
if (priv->eee_enabled && !priv->tx_path_in_lpi_mode &&
priv->eee_sw_timer_en) {
- stmmac_enable_eee_mode(priv);
- mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+ if (stmmac_enable_eee_mode(priv))
+ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
}
/* We still have pending packets, let's call for a new scheduling */
@@ -3238,7 +3240,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
/**
* stmmac_hw_setup - setup mac in a usable state.
* @dev : pointer to the device structure.
- * @init_ptp: initialize PTP if set
+ * @ptp_register: register PTP if set
* Description:
* this is the main function to setup the HW in a usable state because the
* dma engine is reset, the core registers are configured (e.g. AXI,
@@ -3248,7 +3250,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
* 0 on success and an appropriate (-)ve integer as defined in errno.h
* file on failure.
*/
-static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
+static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
{
struct stmmac_priv *priv = netdev_priv(dev);
u32 rx_cnt = priv->plat->rx_queues_to_use;
@@ -3305,13 +3307,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
stmmac_mmc_setup(priv);
- if (init_ptp) {
- ret = stmmac_init_ptp(priv);
- if (ret == -EOPNOTSUPP)
- netdev_warn(priv->dev, "PTP not supported by HW\n");
- else if (ret)
- netdev_warn(priv->dev, "PTP init failed\n");
- }
+ ret = stmmac_init_ptp(priv);
+ if (ret == -EOPNOTSUPP)
+ netdev_warn(priv->dev, "PTP not supported by HW\n");
+ else if (ret)
+ netdev_warn(priv->dev, "PTP init failed\n");
+ else if (ptp_register)
+ stmmac_ptp_register(priv);
priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS;
@@ -7159,7 +7161,8 @@ int stmmac_dvr_probe(struct device *device,
pm_runtime_get_noresume(device);
pm_runtime_set_active(device);
- pm_runtime_enable(device);
+ if (!pm_runtime_enabled(device))
+ pm_runtime_enable(device);
if (priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 0d24ebd37873..1c9f02f9c317 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
{
int i;
- if (priv->plat->ptp_clk_freq_config)
- priv->plat->ptp_clk_freq_config(priv);
-
for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
if (i >= STMMAC_PPS_MAX)
break;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 33142d505fc8..03575c017500 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -349,7 +349,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev);
int pkt_size = cpsw->rx_packet_max;
int ret = 0, port, ch = xmeta->ch;
- int headroom = CPSW_HEADROOM;
+ int headroom = CPSW_HEADROOM_NA;
struct net_device *ndev = xmeta->ndev;
struct cpsw_priv *priv;
struct page_pool *pool;
@@ -392,7 +392,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
}
if (priv->xdp_prog) {
- int headroom = CPSW_HEADROOM, size = len;
+ int size = len;
xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
if (status & CPDMA_RX_VLAN_ENCAP) {
@@ -442,7 +442,7 @@ requeue:
xmeta->ndev = ndev;
xmeta->ch = ch;
- dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+ dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
pkt_size, 0);
if (ret < 0) {
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 279e261e4720..bd4b1528cf99 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -283,7 +283,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
{
struct page *new_page, *page = token;
void *pa = page_address(page);
- int headroom = CPSW_HEADROOM;
+ int headroom = CPSW_HEADROOM_NA;
struct cpsw_meta_xdp *xmeta;
struct cpsw_common *cpsw;
struct net_device *ndev;
@@ -336,7 +336,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
}
if (priv->xdp_prog) {
- int headroom = CPSW_HEADROOM, size = len;
+ int size = len;
xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
if (status & CPDMA_RX_VLAN_ENCAP) {
@@ -386,7 +386,7 @@ requeue:
xmeta->ndev = ndev;
xmeta->ch = ch;
- dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+ dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
pkt_size, 0);
if (ret < 0) {
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 3537502e5e8b..8f6817f346ba 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1122,7 +1122,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
xmeta->ndev = priv->ndev;
xmeta->ch = ch;
- dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
+ dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM_NA;
ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
page, dma,
cpsw->rx_packet_max,
@@ -1146,7 +1146,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw,
int size)
{
- struct page_pool_params pp_params;
+ struct page_pool_params pp_params = {};
struct page_pool *pool;
pp_params.order = 0;
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index cf0917b29e30..5251fc324221 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1091,20 +1091,22 @@ static int tsi108_get_mac(struct net_device *dev)
struct tsi108_prv_data *data = netdev_priv(dev);
u32 word1 = TSI_READ(TSI108_MAC_ADDR1);
u32 word2 = TSI_READ(TSI108_MAC_ADDR2);
+ u8 addr[ETH_ALEN];
/* Note that the octets are reversed from what the manual says,
* producing an even weirder ordering...
*/
if (word2 == 0 && word1 == 0) {
- dev->dev_addr[0] = 0x00;
- dev->dev_addr[1] = 0x06;
- dev->dev_addr[2] = 0xd2;
- dev->dev_addr[3] = 0x00;
- dev->dev_addr[4] = 0x00;
+ addr[0] = 0x00;
+ addr[1] = 0x06;
+ addr[2] = 0xd2;
+ addr[3] = 0x00;
+ addr[4] = 0x00;
if (0x8 == data->phy)
- dev->dev_addr[5] = 0x01;
+ addr[5] = 0x01;
else
- dev->dev_addr[5] = 0x02;
+ addr[5] = 0x02;
+ eth_hw_addr_set(dev, addr);
word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24);
@@ -1114,12 +1116,13 @@ static int tsi108_get_mac(struct net_device *dev)
TSI_WRITE(TSI108_MAC_ADDR1, word1);
TSI_WRITE(TSI108_MAC_ADDR2, word2);
} else {
- dev->dev_addr[0] = (word2 >> 16) & 0xff;
- dev->dev_addr[1] = (word2 >> 24) & 0xff;
- dev->dev_addr[2] = (word1 >> 0) & 0xff;
- dev->dev_addr[3] = (word1 >> 8) & 0xff;
- dev->dev_addr[4] = (word1 >> 16) & 0xff;
- dev->dev_addr[5] = (word1 >> 24) & 0xff;
+ addr[0] = (word2 >> 16) & 0xff;
+ addr[1] = (word2 >> 24) & 0xff;
+ addr[2] = (word1 >> 0) & 0xff;
+ addr[3] = (word1 >> 8) & 0xff;
+ addr[4] = (word1 >> 16) & 0xff;
+ addr[5] = (word1 >> 24) & 0xff;
+ eth_hw_addr_set(dev, addr);
}
if (!is_valid_ether_addr(dev->dev_addr)) {
@@ -1136,14 +1139,12 @@ static int tsi108_set_mac(struct net_device *dev, void *addr)
{
struct tsi108_prv_data *data = netdev_priv(dev);
u32 word1, word2;
- int i;
if (!is_valid_ether_addr(addr))
return -EADDRNOTAVAIL;
- for (i = 0; i < 6; i++)
- /* +2 is for the offset of the HW addr type */
- dev->dev_addr[i] = ((unsigned char *)addr)[i + 2];
+ /* +2 is for the offset of the HW addr type */
+ eth_hw_addr_set(dev, ((unsigned char *)addr) + 2);
word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24);
diff --git a/drivers/net/ethernet/vertexcom/Kconfig b/drivers/net/ethernet/vertexcom/Kconfig
index 6e2cf062ddba..4184a635fe01 100644
--- a/drivers/net/ethernet/vertexcom/Kconfig
+++ b/drivers/net/ethernet/vertexcom/Kconfig
@@ -5,7 +5,7 @@
config NET_VENDOR_VERTEXCOM
bool "Vertexcom devices"
- default n
+ default y
help
If you have a network (Ethernet) card belonging to this class, say Y.
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 23ac353b35fe..377c94ec2486 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -41,8 +41,9 @@
#include "xilinx_axienet.h"
/* Descriptors defines for Tx and Rx DMA */
-#define TX_BD_NUM_DEFAULT 64
+#define TX_BD_NUM_DEFAULT 128
#define RX_BD_NUM_DEFAULT 1024
+#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1)
#define TX_BD_NUM_MAX 4096
#define RX_BD_NUM_MAX 4096
@@ -496,7 +497,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
static int __axienet_device_reset(struct axienet_local *lp)
{
- u32 timeout;
+ u32 value;
+ int ret;
/* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
* process of Axi DMA takes a while to complete as all pending
@@ -506,15 +508,23 @@ static int __axienet_device_reset(struct axienet_local *lp)
* they both reset the entire DMA core, so only one needs to be used.
*/
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
- timeout = DELAY_OF_ONE_MILLISEC;
- while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
- XAXIDMA_CR_RESET_MASK) {
- udelay(1);
- if (--timeout == 0) {
- netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
- __func__);
- return -ETIMEDOUT;
- }
+ ret = read_poll_timeout(axienet_dma_in32, value,
+ !(value & XAXIDMA_CR_RESET_MASK),
+ DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+ XAXIDMA_TX_CR_OFFSET);
+ if (ret) {
+ dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__);
+ return ret;
+ }
+
+ /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */
+ ret = read_poll_timeout(axienet_ior, value,
+ value & XAE_INT_PHYRSTCMPLT_MASK,
+ DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+ XAE_IS_OFFSET);
+ if (ret) {
+ dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__);
+ return ret;
}
return 0;
@@ -623,6 +633,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK))
break;
+ /* Ensure we see complete descriptor update */
+ dma_rmb();
phys = desc_get_phys_addr(lp, cur_p);
dma_unmap_single(ndev->dev.parent, phys,
(cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
@@ -631,13 +643,15 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
dev_consume_skb_irq(cur_p->skb);
- cur_p->cntrl = 0;
cur_p->app0 = 0;
cur_p->app1 = 0;
cur_p->app2 = 0;
cur_p->app4 = 0;
- cur_p->status = 0;
cur_p->skb = NULL;
+ /* ensure our transmit path and device don't prematurely see status cleared */
+ wmb();
+ cur_p->cntrl = 0;
+ cur_p->status = 0;
if (sizep)
*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
@@ -647,6 +661,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
}
/**
+ * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
+ * @lp: Pointer to the axienet_local structure
+ * @num_frag: The number of BDs to check for
+ *
+ * Return: 0, on success
+ * NETDEV_TX_BUSY, if any of the descriptors are not free
+ *
+ * This function is invoked before BDs are allocated and transmission starts.
+ * This function returns 0 if a BD or group of BDs can be allocated for
+ * transmission. If the BD or any of the BDs are not free the function
+ * returns a busy status. This is invoked from axienet_start_xmit.
+ */
+static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
+ int num_frag)
+{
+ struct axidma_bd *cur_p;
+
+ /* Ensure we see all descriptor updates from device or TX IRQ path */
+ rmb();
+ cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
+ if (cur_p->cntrl)
+ return NETDEV_TX_BUSY;
+ return 0;
+}
+
+/**
* axienet_start_xmit_done - Invoked once a transmit is completed by the
* Axi DMA Tx channel.
* @ndev: Pointer to the net_device structure
@@ -675,30 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev)
/* Matches barrier in axienet_start_xmit */
smp_mb();
- netif_wake_queue(ndev);
-}
-
-/**
- * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
- * @lp: Pointer to the axienet_local structure
- * @num_frag: The number of BDs to check for
- *
- * Return: 0, on success
- * NETDEV_TX_BUSY, if any of the descriptors are not free
- *
- * This function is invoked before BDs are allocated and transmission starts.
- * This function returns 0 if a BD or group of BDs can be allocated for
- * transmission. If the BD or any of the BDs are not free the function
- * returns a busy status. This is invoked from axienet_start_xmit.
- */
-static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
- int num_frag)
-{
- struct axidma_bd *cur_p;
- cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
- if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
- return NETDEV_TX_BUSY;
- return 0;
+ if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+ netif_wake_queue(ndev);
}
/**
@@ -730,20 +748,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
num_frag = skb_shinfo(skb)->nr_frags;
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
- if (axienet_check_tx_bd_space(lp, num_frag)) {
- if (netif_queue_stopped(ndev))
- return NETDEV_TX_BUSY;
-
+ if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
+ /* Should not happen as last start_xmit call should have
+ * checked for sufficient space and queue should only be
+ * woken when sufficient space is available.
+ */
netif_stop_queue(ndev);
-
- /* Matches barrier in axienet_start_xmit_done */
- smp_mb();
-
- /* Space might have just been freed - check again */
- if (axienet_check_tx_bd_space(lp, num_frag))
- return NETDEV_TX_BUSY;
-
- netif_wake_queue(ndev);
+ if (net_ratelimit())
+ netdev_warn(ndev, "TX ring unexpectedly full\n");
+ return NETDEV_TX_BUSY;
}
if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -804,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (++lp->tx_bd_tail >= lp->tx_bd_num)
lp->tx_bd_tail = 0;
+ /* Stop queue if next transmit may not have space */
+ if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+ netif_stop_queue(ndev);
+
+ /* Matches barrier in axienet_start_xmit_done */
+ smp_mb();
+
+ /* Space might have just been freed - check again */
+ if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+ netif_wake_queue(ndev);
+ }
+
return NETDEV_TX_OK;
}
@@ -834,6 +859,8 @@ static void axienet_recv(struct net_device *ndev)
tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
+ /* Ensure we see complete descriptor update */
+ dma_rmb();
phys = desc_get_phys_addr(lp, cur_p);
dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
DMA_FROM_DEVICE);
@@ -1352,7 +1379,8 @@ axienet_ethtools_set_ringparam(struct net_device *ndev,
if (ering->rx_pending > RX_BD_NUM_MAX ||
ering->rx_mini_pending ||
ering->rx_jumbo_pending ||
- ering->rx_pending > TX_BD_NUM_MAX)
+ ering->tx_pending < TX_BD_NUM_MIN ||
+ ering->tx_pending > TX_BD_NUM_MAX)
return -EINVAL;
if (netif_running(ndev))
@@ -2027,6 +2055,11 @@ static int axienet_probe(struct platform_device *pdev)
lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+ /* Reset core now that clocks are enabled, prior to accessing MDIO */
+ ret = __axienet_device_reset(lp);
+ if (ret)
+ goto cleanup_clk;
+
lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
if (lp->phy_node) {
ret = axienet_mdio_setup(lp);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 6376b8485976..980f2be32f05 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -950,9 +950,7 @@ static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __
ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs));
if (IS_ERR(ym))
return PTR_ERR(ym);
- if (ym->cmd != SIOCYAMSMCS)
- return -EINVAL;
- if (ym->bitrate > YAM_MAXBITRATE) {
+ if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) {
kfree(ym);
return -EINVAL;
}
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 49d9a077d037..68291a3efd04 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1080,27 +1080,38 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one)
{
struct gsi *gsi;
u32 backlog;
+ int delta;
- if (!endpoint->replenish_enabled) {
+ if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
if (add_one)
atomic_inc(&endpoint->replenish_saved);
return;
}
+ /* If already active, just update the backlog */
+ if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
+ if (add_one)
+ atomic_inc(&endpoint->replenish_backlog);
+ return;
+ }
+
while (atomic_dec_not_zero(&endpoint->replenish_backlog))
if (ipa_endpoint_replenish_one(endpoint))
goto try_again_later;
+
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+
if (add_one)
atomic_inc(&endpoint->replenish_backlog);
return;
try_again_later:
- /* The last one didn't succeed, so fix the backlog */
- backlog = atomic_inc_return(&endpoint->replenish_backlog);
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
- if (add_one)
- atomic_inc(&endpoint->replenish_backlog);
+ /* The last one didn't succeed, so fix the backlog */
+ delta = add_one ? 2 : 1;
+ backlog = atomic_add_return(delta, &endpoint->replenish_backlog);
/* Whenever a receive buffer transaction completes we'll try to
* replenish again. It's unlikely, but if we fail to supply even
@@ -1120,7 +1131,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint)
u32 max_backlog;
u32 saved;
- endpoint->replenish_enabled = true;
+ set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
atomic_add(saved, &endpoint->replenish_backlog);
@@ -1134,7 +1145,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint)
{
u32 backlog;
- endpoint->replenish_enabled = false;
+ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
atomic_add(backlog, &endpoint->replenish_saved);
}
@@ -1691,7 +1702,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
/* RX transactions require a single TRE, so the maximum
* backlog is the same as the maximum outstanding TREs.
*/
- endpoint->replenish_enabled = false;
+ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
atomic_set(&endpoint->replenish_saved,
gsi_channel_tre_max(gsi, endpoint->channel_id));
atomic_set(&endpoint->replenish_backlog, 0);
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 0a859d10312d..0313cdc607de 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -41,6 +41,19 @@ enum ipa_endpoint_name {
#define IPA_ENDPOINT_MAX 32 /* Max supported by driver */
/**
+ * enum ipa_replenish_flag: RX buffer replenish flags
+ *
+ * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled
+ * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway
+ * @IPA_REPLENISH_COUNT: Number of defined replenish flags
+ */
+enum ipa_replenish_flag {
+ IPA_REPLENISH_ENABLED,
+ IPA_REPLENISH_ACTIVE,
+ IPA_REPLENISH_COUNT, /* Number of flags (must be last) */
+};
+
+/**
* struct ipa_endpoint - IPA endpoint information
* @ipa: IPA pointer
* @ee_id: Execution environmnent endpoint is associated with
@@ -51,7 +64,7 @@ enum ipa_endpoint_name {
* @trans_tre_max: Maximum number of TRE descriptors per transaction
* @evt_ring_id: GSI event ring used by the endpoint
* @netdev: Network device pointer, if endpoint uses one
- * @replenish_enabled: Whether receive buffer replenishing is enabled
+ * @replenish_flags: Replenishing state flags
* @replenish_ready: Number of replenish transactions without doorbell
* @replenish_saved: Replenish requests held while disabled
* @replenish_backlog: Number of buffers needed to fill hardware queue
@@ -72,7 +85,7 @@ struct ipa_endpoint {
struct net_device *netdev;
/* Receive buffer replenishing for RX endpoints */
- bool replenish_enabled;
+ DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
u32 replenish_ready;
atomic_t replenish_saved;
atomic_t replenish_backlog;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index dae95d9a07e8..5b6c0d120e09 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -421,7 +421,7 @@ static int at803x_set_wol(struct phy_device *phydev,
const u8 *mac;
int ret, irq_enabled;
unsigned int i;
- const unsigned int offsets[] = {
+ static const unsigned int offsets[] = {
AT803X_LOC_MAC_ADDR_32_47_OFFSET,
AT803X_LOC_MAC_ADDR_16_31_OFFSET,
AT803X_LOC_MAC_ADDR_0_15_OFFSET,
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index bb5104ae4610..3c683e0e40e9 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -854,6 +854,7 @@ static struct phy_driver broadcom_drivers[] = {
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM54616S",
/* PHY_GBIT_FEATURES */
+ .soft_reset = genphy_soft_reset,
.config_init = bcm54xx_config_init,
.config_aneg = bcm54616s_config_aneg,
.config_intr = bcm_phy_config_intr,
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 739859c0dfb1..fa71fb7a66b5 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -189,6 +189,8 @@
#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4
#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
+#define MII_88E1510_MSCR_2 0x15
+
#define MII_VCT5_TX_RX_MDI0_COUPLING 0x10
#define MII_VCT5_TX_RX_MDI1_COUPLING 0x11
#define MII_VCT5_TX_RX_MDI2_COUPLING 0x12
@@ -1932,6 +1934,58 @@ static void marvell_get_stats(struct phy_device *phydev,
data[i] = marvell_get_stat(phydev, i);
}
+static int m88e1510_loopback(struct phy_device *phydev, bool enable)
+{
+ int err;
+
+ if (enable) {
+ u16 bmcr_ctl = 0, mscr2_ctl = 0;
+
+ if (phydev->speed == SPEED_1000)
+ bmcr_ctl = BMCR_SPEED1000;
+ else if (phydev->speed == SPEED_100)
+ bmcr_ctl = BMCR_SPEED100;
+
+ if (phydev->duplex == DUPLEX_FULL)
+ bmcr_ctl |= BMCR_FULLDPLX;
+
+ err = phy_write(phydev, MII_BMCR, bmcr_ctl);
+ if (err < 0)
+ return err;
+
+ if (phydev->speed == SPEED_1000)
+ mscr2_ctl = BMCR_SPEED1000;
+ else if (phydev->speed == SPEED_100)
+ mscr2_ctl = BMCR_SPEED100;
+
+ err = phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
+ MII_88E1510_MSCR_2, BMCR_SPEED1000 |
+ BMCR_SPEED100, mscr2_ctl);
+ if (err < 0)
+ return err;
+
+ /* Need soft reset to have speed configuration takes effect */
+ err = genphy_soft_reset(phydev);
+ if (err < 0)
+ return err;
+
+ /* FIXME: Based on trial and error test, it seem 1G need to have
+ * delay between soft reset and loopback enablement.
+ */
+ if (phydev->speed == SPEED_1000)
+ msleep(1000);
+
+ return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
+ BMCR_LOOPBACK);
+ } else {
+ err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0);
+ if (err < 0)
+ return err;
+
+ return phy_config_aneg(phydev);
+ }
+}
+
static int marvell_vct5_wait_complete(struct phy_device *phydev)
{
int i;
@@ -3078,7 +3132,7 @@ static struct phy_driver marvell_drivers[] = {
.get_sset_count = marvell_get_sset_count,
.get_strings = marvell_get_strings,
.get_stats = marvell_get_stats,
- .set_loopback = genphy_loopback,
+ .set_loopback = m88e1510_loopback,
.get_tunable = m88e1011_get_tunable,
.set_tunable = m88e1011_set_tunable,
.cable_test_start = marvell_vct7_cable_test_start,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 4570cb9535b7..a7ebcdab415b 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1726,8 +1726,8 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8021,
.phy_id_mask = 0x00ffffff,
@@ -1741,8 +1741,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8031,
.phy_id_mask = 0x00ffffff,
@@ -1756,8 +1756,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8041,
.phy_id_mask = MICREL_PHY_ID_MASK,
@@ -1788,8 +1788,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.name = "Micrel KSZ8051",
/* PHY_BASIC_FEATURES */
@@ -1802,8 +1802,8 @@ static struct phy_driver ksphy_driver[] = {
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.match_phy_device = ksz8051_match_phy_device,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8001,
.name = "Micrel KSZ8001 or KS8721",
@@ -1817,8 +1817,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8081,
.name = "Micrel KSZ8081 or KSZ8091",
@@ -1848,8 +1848,8 @@ static struct phy_driver ksphy_driver[] = {
.config_init = ksz8061_config_init,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ9021,
.phy_id_mask = 0x000ffffe,
@@ -1864,8 +1864,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
.read_mmd = genphy_read_mmd_unsupported,
.write_mmd = genphy_write_mmd_unsupported,
}, {
@@ -1883,7 +1883,7 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
+ .suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
.phy_id = PHY_ID_LAN8814,
@@ -1928,7 +1928,7 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
+ .suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8873MLL,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 74d8e1dc125f..ce0bb5951b81 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1746,6 +1746,9 @@ void phy_detach(struct phy_device *phydev)
phy_driver_is_genphy_10g(phydev))
device_release_driver(&phydev->mdio.dev);
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
+
/*
* The phydev might go away on the put_device() below, so avoid
* a use-after-free bug by reading the underlying bus first.
@@ -1757,9 +1760,6 @@ void phy_detach(struct phy_device *phydev)
ndev_owner = dev->dev.parent->driver->owner;
if (ndev_owner != bus->owner)
module_put(bus->owner);
-
- /* Assert the reset signal */
- phy_device_reset(phydev, 1);
}
EXPORT_SYMBOL(phy_detach);
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 0c6c0d1843bc..c1512c9925a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -651,6 +651,11 @@ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode)
else if (ret < 0)
return ERR_PTR(ret);
+ if (!fwnode_device_is_available(ref.fwnode)) {
+ fwnode_handle_put(ref.fwnode);
+ return NULL;
+ }
+
bus = sfp_bus_get(ref.fwnode);
fwnode_handle_put(ref.fwnode);
if (!bus)
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index ab77a9f439ef..4720b24ca51b 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1641,17 +1641,20 @@ static int sfp_sm_probe_for_phy(struct sfp *sfp)
static int sfp_module_parse_power(struct sfp *sfp)
{
u32 power_mW = 1000;
+ bool supports_a2;
if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
power_mW = 1500;
if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
power_mW = 2000;
+ supports_a2 = sfp->id.ext.sff8472_compliance !=
+ SFP_SFF8472_COMPLIANCE_NONE ||
+ sfp->id.ext.diagmon & SFP_DIAGMON_DDM;
+
if (power_mW > sfp->max_power_mW) {
/* Module power specification exceeds the allowed maximum. */
- if (sfp->id.ext.sff8472_compliance ==
- SFP_SFF8472_COMPLIANCE_NONE &&
- !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) {
+ if (!supports_a2) {
/* The module appears not to implement bus address
* 0xa2, so assume that the module powers up in the
* indicated mode.
@@ -1668,11 +1671,25 @@ static int sfp_module_parse_power(struct sfp *sfp)
}
}
+ if (power_mW <= 1000) {
+ /* Modules below 1W do not require a power change sequence */
+ sfp->module_power_mW = power_mW;
+ return 0;
+ }
+
+ if (!supports_a2) {
+ /* The module power level is below the host maximum and the
+ * module appears not to implement bus address 0xa2, so assume
+ * that the module powers up in the indicated mode.
+ */
+ return 0;
+ }
+
/* If the module requires a higher power mode, but also requires
* an address change sequence, warn the user that the module may
* not be functional.
*/
- if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) {
+ if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) {
dev_warn(sfp->dev,
"Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n",
power_mW / 1000, (power_mW / 100) % 10);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index f510e8219470..37e5f3495362 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1316,6 +1316,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */
{QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */
{QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */
+ {QMI_FIXED_INTF(0x19d2, 0x1485, 5)}, /* ZTE MF286D */
{QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */
{QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */
{QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */
@@ -1401,6 +1402,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
+ {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
{QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index abe0149ed917..bc1e3dd67c04 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1962,7 +1962,8 @@ static const struct driver_info smsc95xx_info = {
.bind = smsc95xx_bind,
.unbind = smsc95xx_unbind,
.link_reset = smsc95xx_link_reset,
- .reset = smsc95xx_start_phy,
+ .reset = smsc95xx_reset,
+ .check_connect = smsc95xx_start_phy,
.stop = smsc95xx_stop,
.rx_fixup = smsc95xx_rx_fixup,
.tx_fixup = smsc95xx_tx_fixup,
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3d97f158ec59..a801ea40908f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2101,7 +2101,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
stragglers = num_cpu >= vi->curr_queue_pairs ?
num_cpu % vi->curr_queue_pairs :
0;
- cpu = cpumask_next(-1, cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < vi->curr_queue_pairs; i++) {
group_size = stride + (i < stragglers ? 1 : 0);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
index 2f3c451148db..2f8908074303 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
@@ -4,6 +4,8 @@
*/
#include <asm/unaligned.h>
+
+#include <linux/math.h>
#include <linux/string.h>
#include <linux/bug.h>
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 45594f003ef7..452d08545d31 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -4672,7 +4672,7 @@ static ssize_t proc_write(struct file *file,
static int proc_status_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *apriv = dev->ml_priv;
CapabilityRid cap_rid;
StatusRid status_rid;
@@ -4756,7 +4756,7 @@ static int proc_stats_rid_open(struct inode *inode,
u16 rid)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *apriv = dev->ml_priv;
StatsRid stats;
int i, j;
@@ -4819,7 +4819,7 @@ static inline int sniffing_mode(struct airo_info *ai)
static void proc_config_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data = file->private_data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
char *line;
@@ -5030,7 +5030,7 @@ static const char *get_rmode(__le16 mode)
static int proc_config_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i;
__le16 mode;
@@ -5120,7 +5120,7 @@ static int proc_config_open(struct inode *inode, struct file *file)
static void proc_SSID_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data = file->private_data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
SsidRid SSID_rid;
int i;
@@ -5156,7 +5156,7 @@ static void proc_SSID_on_close(struct inode *inode, struct file *file)
static void proc_APList_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data = file->private_data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
APListRid *APList_rid = &ai->APList;
int i;
@@ -5280,7 +5280,7 @@ static int set_wep_tx_idx(struct airo_info *ai, u16 index, int perm, int lock)
static void proc_wepkey_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i, rc;
char key[16];
@@ -5331,7 +5331,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file)
static int proc_wepkey_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
char *ptr;
WepKeyRid wkr;
@@ -5379,7 +5379,7 @@ static int proc_wepkey_open(struct inode *inode, struct file *file)
static int proc_SSID_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i;
char *ptr;
@@ -5423,7 +5423,7 @@ static int proc_SSID_open(struct inode *inode, struct file *file)
static int proc_APList_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i;
char *ptr;
@@ -5462,7 +5462,7 @@ static int proc_APList_open(struct inode *inode, struct file *file)
static int proc_BSSList_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
char *ptr;
BSSListRid BSSList_rid;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index 8bcc1cdcb75b..462ccc7d7d1a 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -69,7 +69,7 @@ static void prism2_send_mgmt(struct net_device *dev,
#if !defined(PRISM2_NO_PROCFS_DEBUG) && defined(CONFIG_PROC_FS)
static int ap_debug_proc_show(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast);
seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast);
@@ -320,7 +320,7 @@ void hostap_deauth_all_stas(struct net_device *dev, struct ap_data *ap,
static int ap_control_proc_show(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
char *policy_txt;
struct mac_entry *entry;
@@ -352,20 +352,20 @@ static int ap_control_proc_show(struct seq_file *m, void *v)
static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_lock_bh(&ap->mac_restrictions.lock);
return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos);
}
static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos);
}
static void ap_control_proc_stop(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_unlock_bh(&ap->mac_restrictions.lock);
}
@@ -554,20 +554,20 @@ static int prism2_ap_proc_show(struct seq_file *m, void *v)
static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_lock_bh(&ap->sta_table_lock);
return seq_list_start_head(&ap->sta_list, *_pos);
}
static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
return seq_list_next(v, &ap->sta_list, _pos);
}
static void prism2_ap_proc_stop(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_unlock_bh(&ap->sta_table_lock);
}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_download.c b/drivers/net/wireless/intersil/hostap/hostap_download.c
index 7c6a5a6d1d45..3672291ced5c 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_download.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_download.c
@@ -227,7 +227,7 @@ static int prism2_download_aux_dump_proc_open(struct inode *inode, struct file *
sizeof(struct prism2_download_aux_dump));
if (ret == 0) {
struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
+ m->private = pde_data(inode);
}
return ret;
}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c
index 51c847d98755..61f68786056f 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_proc.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c
@@ -97,20 +97,20 @@ static int prism2_wds_proc_show(struct seq_file *m, void *v)
static void *prism2_wds_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
read_lock_bh(&local->iface_lock);
return seq_list_start(&local->hostap_interfaces, *_pos);
}
static void *prism2_wds_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
return seq_list_next(v, &local->hostap_interfaces, _pos);
}
static void prism2_wds_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
read_unlock_bh(&local->iface_lock);
}
@@ -123,7 +123,7 @@ static const struct seq_operations prism2_wds_proc_seqops = {
static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
struct list_head *ptr = v;
struct hostap_bss_info *bss;
@@ -151,21 +151,21 @@ static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos)
__acquires(&local->lock)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_lock_bh(&local->lock);
return seq_list_start_head(&local->bss_list, *_pos);
}
static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
return seq_list_next(v, &local->bss_list, _pos);
}
static void prism2_bss_list_proc_stop(struct seq_file *m, void *v)
__releases(&local->lock)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_unlock_bh(&local->lock);
}
@@ -198,7 +198,7 @@ static int prism2_crypt_proc_show(struct seq_file *m, void *v)
static ssize_t prism2_pda_proc_read(struct file *file, char __user *buf,
size_t count, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(file));
+ local_info_t *local = pde_data(file_inode(file));
size_t off;
if (local->pda == NULL || *_pos >= PRISM2_PDA_SIZE)
@@ -272,7 +272,7 @@ static int prism2_io_debug_proc_read(char *page, char **start, off_t off,
#ifndef PRISM2_NO_STATION_MODES
static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
unsigned long entry;
int i, len;
struct hfa384x_hostscan_result *scanres;
@@ -322,7 +322,7 @@ static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_lock_bh(&local->lock);
/* We have a header (pos 0) + N results to show (pos 1...N) */
@@ -333,7 +333,7 @@ static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
++*_pos;
if (*_pos > local->last_scan_results_count)
@@ -343,7 +343,7 @@ static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *
static void prism2_scan_results_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_unlock_bh(&local->lock);
}
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index e3a3dc3e45b4..2987ad9271f6 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2746,7 +2746,7 @@ static ssize_t int_proc_write(struct file *file, const char __user *buffer,
nr = nr * 10 + c;
p++;
} while (--len);
- *(int *)PDE_DATA(file_inode(file)) = nr;
+ *(int *)pde_data(file_inode(file)) = nr;
return count;
}
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
index 71bf9b4f769f..6872782e8dd8 100644
--- a/drivers/net/wwan/mhi_wwan_mbim.c
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -385,13 +385,13 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
int err;
while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
- struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+ struct sk_buff *skb = alloc_skb(mbim->mru, GFP_KERNEL);
if (unlikely(!skb))
break;
err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
- MHI_DEFAULT_MRU, MHI_EOT);
+ mbim->mru, MHI_EOT);
if (unlikely(err)) {
kfree_skb(skb);
break;
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index 37d26f01986b..62a0f1a010cb 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -188,7 +188,7 @@ do { \
static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy)
{
int polarity, retry, ret;
- char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 };
+ static const char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 };
int count = sizeof(rset_cmd);
nfc_info(&phy->i2c_dev->dev, "Detecting nfc_en polarity\n");
diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c
index a43fc4117fa5..c922f10d0d7b 100644
--- a/drivers/nfc/st21nfca/se.c
+++ b/drivers/nfc/st21nfca/se.c
@@ -316,6 +316,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
return -ENOMEM;
transaction->aid_len = skb->data[1];
+
+ /* Checking if the length of the AID is valid */
+ if (transaction->aid_len > sizeof(transaction->aid))
+ return -EINVAL;
+
memcpy(transaction->aid, &skb->data[2],
transaction->aid_len);
@@ -325,6 +330,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
return -EPROTO;
transaction->params_len = skb->data[transaction->aid_len + 3];
+
+ /* Total size is allocated (skb->len - 2) minus fixed array members */
+ if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction)))
+ return -EINVAL;
+
memcpy(transaction->params, skb->data +
transaction->aid_len + 4, transaction->params_len);
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index 88e1f9a0faaf..1fd667852271 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -93,30 +93,30 @@ struct nubus_proc_pde_data {
static struct nubus_proc_pde_data *
nubus_proc_alloc_pde_data(unsigned char *ptr, unsigned int size)
{
- struct nubus_proc_pde_data *pde_data;
+ struct nubus_proc_pde_data *pded;
- pde_data = kmalloc(sizeof(*pde_data), GFP_KERNEL);
- if (!pde_data)
+ pded = kmalloc(sizeof(*pded), GFP_KERNEL);
+ if (!pded)
return NULL;
- pde_data->res_ptr = ptr;
- pde_data->res_size = size;
- return pde_data;
+ pded->res_ptr = ptr;
+ pded->res_size = size;
+ return pded;
}
static int nubus_proc_rsrc_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct nubus_proc_pde_data *pde_data;
+ struct nubus_proc_pde_data *pded;
- pde_data = PDE_DATA(inode);
- if (!pde_data)
+ pded = pde_data(inode);
+ if (!pded)
return 0;
- if (pde_data->res_size > m->size)
+ if (pded->res_size > m->size)
return -EFBIG;
- if (pde_data->res_size) {
+ if (pded->res_size) {
int lanes = (int)proc_get_parent_data(inode);
struct nubus_dirent ent;
@@ -124,11 +124,11 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v)
return 0;
ent.mask = lanes;
- ent.base = pde_data->res_ptr;
+ ent.base = pded->res_ptr;
ent.data = 0;
- nubus_seq_write_rsrc_mem(m, &ent, pde_data->res_size);
+ nubus_seq_write_rsrc_mem(m, &ent, pded->res_size);
} else {
- unsigned int data = (unsigned int)pde_data->res_ptr;
+ unsigned int data = (unsigned int)pded->res_ptr;
seq_putc(m, data >> 16);
seq_putc(m, data >> 8);
@@ -142,18 +142,18 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
unsigned int size)
{
char name[9];
- struct nubus_proc_pde_data *pde_data;
+ struct nubus_proc_pde_data *pded;
if (!procdir)
return;
snprintf(name, sizeof(name), "%x", ent->type);
if (size)
- pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
+ pded = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
else
- pde_data = NULL;
+ pded = NULL;
proc_create_single_data(name, S_IFREG | 0444, procdir,
- nubus_proc_rsrc_show, pde_data);
+ nubus_proc_rsrc_show, pded);
}
void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 7ae041e2b3fb..f79a66d4e22c 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1092,7 +1092,6 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file)
static int nvmf_dev_show(struct seq_file *seq_file, void *private)
{
struct nvme_ctrl *ctrl;
- int ret = 0;
mutex_lock(&nvmf_dev_mutex);
ctrl = seq_file->private;
@@ -1106,7 +1105,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private)
out_unlock:
mutex_unlock(&nvmf_dev_mutex);
- return ret;
+ return 0;
}
static int nvmf_dev_open(struct inode *inode, struct file *file)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d8585df2c2fd..6a99ed680915 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3391,7 +3391,8 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DEALLOCATE_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, },
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 8a24d37153b4..e7d92b67cb8a 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1420,15 +1420,18 @@ int of_phandle_iterator_args(struct of_phandle_iterator *it,
return count;
}
-static int __of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name,
- const char *cells_name,
- int cell_count, int index,
- struct of_phandle_args *out_args)
+int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count, int index,
+ struct of_phandle_args *out_args)
{
struct of_phandle_iterator it;
int rc, cur_index = 0;
+ if (index < 0)
+ return -EINVAL;
+
/* Loop over the phandles until all the requested entry is found */
of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) {
/*
@@ -1471,82 +1474,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np,
of_node_put(it.node);
return rc;
}
-
-/**
- * of_parse_phandle - Resolve a phandle property to a device_node pointer
- * @np: Pointer to device node holding phandle property
- * @phandle_name: Name of property holding a phandle value
- * @index: For properties holding a table of phandles, this is the index into
- * the table
- *
- * Return: The device_node pointer with refcount incremented. Use
- * of_node_put() on it when done.
- */
-struct device_node *of_parse_phandle(const struct device_node *np,
- const char *phandle_name, int index)
-{
- struct of_phandle_args args;
-
- if (index < 0)
- return NULL;
-
- if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
- index, &args))
- return NULL;
-
- return args.np;
-}
-EXPORT_SYMBOL(of_parse_phandle);
-
-/**
- * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
- * @np: pointer to a device tree node containing a list
- * @list_name: property name that contains a list
- * @cells_name: property name that specifies phandles' arguments count
- * @index: index of a phandle to parse out
- * @out_args: optional pointer to output arguments structure (will be filled)
- *
- * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_args, on error returns appropriate
- * errno value.
- *
- * Caller is responsible to call of_node_put() on the returned out_args->np
- * pointer.
- *
- * Example::
- *
- * phandle1: node1 {
- * #list-cells = <2>;
- * };
- *
- * phandle2: node2 {
- * #list-cells = <1>;
- * };
- *
- * node3 {
- * list = <&phandle1 1 2 &phandle2 3>;
- * };
- *
- * To get a device_node of the ``node2`` node you may call this:
- * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
- */
-int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
- const char *cells_name, int index,
- struct of_phandle_args *out_args)
-{
- int cell_count = -1;
-
- if (index < 0)
- return -EINVAL;
-
- /* If cells_name is NULL we assume a cell count of 0 */
- if (!cells_name)
- cell_count = 0;
-
- return __of_parse_phandle_with_args(np, list_name, cells_name,
- cell_count, index, out_args);
-}
-EXPORT_SYMBOL(of_parse_phandle_with_args);
+EXPORT_SYMBOL(__of_parse_phandle_with_args);
/**
* of_parse_phandle_with_args_map() - Find a node pointed by phandle in a list and remap it
@@ -1733,47 +1661,6 @@ free:
EXPORT_SYMBOL(of_parse_phandle_with_args_map);
/**
- * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
- * @np: pointer to a device tree node containing a list
- * @list_name: property name that contains a list
- * @cell_count: number of argument cells following the phandle
- * @index: index of a phandle to parse out
- * @out_args: optional pointer to output arguments structure (will be filled)
- *
- * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_args, on error returns appropriate
- * errno value.
- *
- * Caller is responsible to call of_node_put() on the returned out_args->np
- * pointer.
- *
- * Example::
- *
- * phandle1: node1 {
- * };
- *
- * phandle2: node2 {
- * };
- *
- * node3 {
- * list = <&phandle1 0 2 &phandle2 2 3>;
- * };
- *
- * To get a device_node of the ``node2`` node you may call this:
- * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
- */
-int of_parse_phandle_with_fixed_args(const struct device_node *np,
- const char *list_name, int cell_count,
- int index, struct of_phandle_args *out_args)
-{
- if (index < 0)
- return -EINVAL;
- return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
- index, out_args);
-}
-EXPORT_SYMBOL(of_parse_phandle_with_fixed_args);
-
-/**
* of_count_phandle_with_args() - Find the number of phandles references in a property
* @np: pointer to a device tree node containing a list
* @list_name: property name that contains a list
diff --git a/drivers/of/device.c b/drivers/of/device.c
index b0800c260f64..874f031442dc 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -28,7 +28,7 @@
const struct of_device_id *of_match_device(const struct of_device_id *matches,
const struct device *dev)
{
- if ((!matches) || (!dev->of_node))
+ if (!matches || !dev->of_node || dev->of_node_reused)
return NULL;
return of_match_node(matches, dev->of_node);
}
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index cf91cb024be3..1e4a5663d011 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -168,14 +168,14 @@ static int led_proc_show(struct seq_file *m, void *v)
static int led_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, led_proc_show, PDE_DATA(inode));
+ return single_open(file, led_proc_show, pde_data(inode));
}
static ssize_t led_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- void *data = PDE_DATA(file_inode(file));
+ void *data = pde_data(file_inode(file));
char *cur, lbuf[32];
int d;
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index 9513c39719d1..d9e51036a4fa 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -980,8 +980,10 @@ pdcs_register_pathentries(void)
entry->kobj.kset = paths_kset;
err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL,
"%s", entry->name);
- if (err)
+ if (err) {
+ kobject_put(&entry->kobj);
return err;
+ }
/* kobject is now registered */
write_lock(&entry->rw_lock);
diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 12d19183e746..dfcdeb432dc8 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -213,7 +213,7 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
if (!val)
return 0;
- pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0);
+ pos = find_first_bit(&val, MAX_MSI_IRQS_PER_CTRL);
while (pos != MAX_MSI_IRQS_PER_CTRL) {
generic_handle_domain_irq(pp->irq_domain,
(index * MAX_MSI_IRQS_PER_CTRL) + pos);
diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c
index 3824862ea144..33eb37a2225c 100644
--- a/drivers/pci/controller/pcie-mt7621.c
+++ b/drivers/pci/controller/pcie-mt7621.c
@@ -109,15 +109,6 @@ static inline void pcie_write(struct mt7621_pcie *pcie, u32 val, u32 reg)
writel_relaxed(val, pcie->base + reg);
}
-static inline void pcie_rmw(struct mt7621_pcie *pcie, u32 reg, u32 clr, u32 set)
-{
- u32 val = readl_relaxed(pcie->base + reg);
-
- val &= ~clr;
- val |= set;
- writel_relaxed(val, pcie->base + reg);
-}
-
static inline u32 pcie_port_read(struct mt7621_pcie_port *port, u32 reg)
{
return readl_relaxed(port->base + reg);
@@ -557,7 +548,7 @@ static struct platform_driver mt7621_pcie_driver = {
.remove = mt7621_pcie_remove,
.driver = {
.name = "mt7621-pci",
- .of_match_table = of_match_ptr(mt7621_pcie_ids),
+ .of_match_table = mt7621_pcie_ids,
},
};
builtin_platform_driver(mt7621_pcie_driver);
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index cb18f8a13ab6..9c7edec64f7e 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -21,14 +21,14 @@ static int proc_initialized; /* = 0 */
static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
return fixed_size_llseek(file, off, whence, dev->cfg_size);
}
static ssize_t proc_bus_pci_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
unsigned int pos = *ppos;
unsigned int cnt, size;
@@ -114,7 +114,7 @@ static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
struct inode *ino = file_inode(file);
- struct pci_dev *dev = PDE_DATA(ino);
+ struct pci_dev *dev = pde_data(ino);
int pos = *ppos;
int size = dev->cfg_size;
int cnt, ret;
@@ -196,7 +196,7 @@ struct pci_filp_private {
static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
#ifdef HAVE_PCI_MMAP
struct pci_filp_private *fpriv = file->private_data;
#endif /* HAVE_PCI_MMAP */
@@ -244,7 +244,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
#ifdef HAVE_PCI_MMAP
static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
struct pci_filp_private *fpriv = file->private_data;
int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 82fa6148216c..098180fb1cfc 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -880,14 +880,14 @@ static int dispatch_proc_show(struct seq_file *m, void *v)
static int dispatch_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, dispatch_proc_show, PDE_DATA(inode));
+ return single_open(file, dispatch_proc_show, pde_data(inode));
}
static ssize_t dispatch_proc_write(struct file *file,
const char __user *userbuf,
size_t count, loff_t *pos)
{
- struct ibm_struct *ibm = PDE_DATA(file_inode(file));
+ struct ibm_struct *ibm = pde_data(file_inode(file));
char *kernbuf;
int ret;
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 352508d30467..f113dec98e21 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1368,7 +1368,7 @@ static int lcd_proc_show(struct seq_file *m, void *v)
static int lcd_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, lcd_proc_show, PDE_DATA(inode));
+ return single_open(file, lcd_proc_show, pde_data(inode));
}
static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
@@ -1404,7 +1404,7 @@ static int set_lcd_status(struct backlight_device *bd)
static ssize_t lcd_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char cmd[42];
size_t len;
int levels;
@@ -1469,13 +1469,13 @@ static int video_proc_show(struct seq_file *m, void *v)
static int video_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, video_proc_show, PDE_DATA(inode));
+ return single_open(file, video_proc_show, pde_data(inode));
}
static ssize_t video_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char *buffer;
char *cmd;
int lcd_out = -1, crt_out = -1, tv_out = -1;
@@ -1580,13 +1580,13 @@ static int fan_proc_show(struct seq_file *m, void *v)
static int fan_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, fan_proc_show, PDE_DATA(inode));
+ return single_open(file, fan_proc_show, pde_data(inode));
}
static ssize_t fan_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char cmd[42];
size_t len;
int value;
@@ -1628,13 +1628,13 @@ static int keys_proc_show(struct seq_file *m, void *v)
static int keys_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, keys_proc_show, PDE_DATA(inode));
+ return single_open(file, keys_proc_show, pde_data(inode));
}
static ssize_t keys_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char cmd[42];
size_t len;
int value;
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 1ae458c02656..55ae72a2818b 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -22,7 +22,7 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence)
static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf,
size_t nbytes, loff_t * ppos)
{
- struct pnp_dev *dev = PDE_DATA(file_inode(file));
+ struct pnp_dev *dev = pde_data(file_inode(file));
int pos = *ppos;
int cnt, size = 256;
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index a806830e3a40..0f0d819b157f 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -173,13 +173,13 @@ static int pnpbios_proc_show(struct seq_file *m, void *v)
static int pnpbios_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, pnpbios_proc_show, PDE_DATA(inode));
+ return single_open(file, pnpbios_proc_show, pde_data(inode));
}
static ssize_t pnpbios_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- void *data = PDE_DATA(file_inode(file));
+ void *data = pde_data(file_inode(file));
struct pnp_bios_node *node;
int boot = (long)data >> 8;
u8 nodenum = (long)data;
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 93772ab8d7e3..c7552df32082 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -548,6 +548,73 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
}
}
+static int pwm_apply_legacy(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+ struct pwm_state initial_state = pwm->state;
+
+ if (state->polarity != pwm->state.polarity) {
+ if (!chip->ops->set_polarity)
+ return -EINVAL;
+
+ /*
+ * Changing the polarity of a running PWM is only allowed when
+ * the PWM driver implements ->apply().
+ */
+ if (pwm->state.enabled) {
+ chip->ops->disable(chip, pwm);
+
+ /*
+ * Update pwm->state already here in case
+ * .set_polarity() or another callback depend on that.
+ */
+ pwm->state.enabled = false;
+ }
+
+ err = chip->ops->set_polarity(chip, pwm, state->polarity);
+ if (err)
+ goto rollback;
+
+ pwm->state.polarity = state->polarity;
+ }
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ chip->ops->disable(chip, pwm);
+
+ return 0;
+ }
+
+ /*
+ * We cannot skip calling ->config even if state->period ==
+ * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle
+ * because we might have exited early in the last call to
+ * pwm_apply_state because of !state->enabled and so the two values in
+ * pwm->state might not be configured in hardware.
+ */
+ err = chip->ops->config(pwm->chip, pwm,
+ state->duty_cycle,
+ state->period);
+ if (err)
+ goto rollback;
+
+ pwm->state.period = state->period;
+ pwm->state.duty_cycle = state->duty_cycle;
+
+ if (!pwm->state.enabled) {
+ err = chip->ops->enable(chip, pwm);
+ if (err)
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ pwm->state = initial_state;
+ return err;
+}
+
/**
* pwm_apply_state() - atomically apply a new state to a PWM device
* @pwm: PWM device
@@ -580,70 +647,22 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
state->usage_power == pwm->state.usage_power)
return 0;
- if (chip->ops->apply) {
+ if (chip->ops->apply)
err = chip->ops->apply(chip, pwm, state);
- if (err)
- return err;
-
- trace_pwm_apply(pwm, state);
-
- pwm->state = *state;
-
- /*
- * only do this after pwm->state was applied as some
- * implementations of .get_state depend on this
- */
- pwm_apply_state_debug(pwm, state);
- } else {
- /*
- * FIXME: restore the initial state in case of error.
- */
- if (state->polarity != pwm->state.polarity) {
- if (!chip->ops->set_polarity)
- return -EINVAL;
+ else
+ err = pwm_apply_legacy(chip, pwm, state);
+ if (err)
+ return err;
- /*
- * Changing the polarity of a running PWM is
- * only allowed when the PWM driver implements
- * ->apply().
- */
- if (pwm->state.enabled) {
- chip->ops->disable(chip, pwm);
- pwm->state.enabled = false;
- }
+ trace_pwm_apply(pwm, state);
- err = chip->ops->set_polarity(chip, pwm,
- state->polarity);
- if (err)
- return err;
+ pwm->state = *state;
- pwm->state.polarity = state->polarity;
- }
-
- if (state->period != pwm->state.period ||
- state->duty_cycle != pwm->state.duty_cycle) {
- err = chip->ops->config(pwm->chip, pwm,
- state->duty_cycle,
- state->period);
- if (err)
- return err;
-
- pwm->state.duty_cycle = state->duty_cycle;
- pwm->state.period = state->period;
- }
-
- if (state->enabled != pwm->state.enabled) {
- if (state->enabled) {
- err = chip->ops->enable(chip, pwm);
- if (err)
- return err;
- } else {
- chip->ops->disable(chip, pwm);
- }
-
- pwm->state.enabled = state->enabled;
- }
- }
+ /*
+ * only do this after pwm->state was applied as some
+ * implementations of .get_state depend on this
+ */
+ pwm_apply_state_debug(pwm, state);
return 0;
}
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index f97f82548293..5996049f66ec 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -128,11 +128,9 @@ static int img_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
duty = DIV_ROUND_UP(timebase * duty_ns, period_ns);
- ret = pm_runtime_get_sync(chip->dev);
- if (ret < 0) {
- pm_runtime_put_autosuspend(chip->dev);
+ ret = pm_runtime_resume_and_get(chip->dev);
+ if (ret < 0)
return ret;
- }
val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG);
val &= ~(PWM_CTRL_CFG_DIV_MASK << PWM_CTRL_CFG_DIV_SHIFT(pwm->hwpwm));
@@ -184,10 +182,33 @@ static void img_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
pm_runtime_put_autosuspend(chip->dev);
}
+static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ img_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ err = img_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!pwm->state.enabled)
+ err = img_pwm_enable(chip, pwm);
+
+ return err;
+}
+
static const struct pwm_ops img_pwm_ops = {
- .config = img_pwm_config,
- .enable = img_pwm_enable,
- .disable = img_pwm_disable,
+ .apply = img_pwm_apply,
.owner = THIS_MODULE,
};
diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index 203194f2c92e..86567add79db 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c
@@ -58,9 +58,9 @@ static inline struct twl_pwm_chip *to_twl(struct pwm_chip *chip)
}
static int twl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+ u64 duty_ns, u64 period_ns)
{
- int duty_cycle = DIV_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
+ int duty_cycle = DIV64_U64_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
u8 pwm_config[2] = { 1, 0 };
int base, ret;
@@ -279,19 +279,65 @@ out:
mutex_unlock(&twl->mutex);
}
+static int twl4030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ twl4030_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!pwm->state.enabled)
+ err = twl4030_pwm_enable(chip, pwm);
+
+ return err;
+}
+
+static int twl6030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ twl6030_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!pwm->state.enabled)
+ err = twl6030_pwm_enable(chip, pwm);
+
+ return err;
+}
+
static const struct pwm_ops twl4030_pwm_ops = {
- .config = twl_pwm_config,
- .enable = twl4030_pwm_enable,
- .disable = twl4030_pwm_disable,
+ .apply = twl4030_pwm_apply,
.request = twl4030_pwm_request,
.free = twl4030_pwm_free,
.owner = THIS_MODULE,
};
static const struct pwm_ops twl6030_pwm_ops = {
- .config = twl_pwm_config,
- .enable = twl6030_pwm_enable,
- .disable = twl6030_pwm_disable,
+ .apply = twl6030_pwm_apply,
.owner = THIS_MODULE,
};
diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index 480bfc29782f..7170a315535b 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -70,7 +70,7 @@ static inline void vt8500_pwm_busy_wait(struct vt8500_chip *vt8500, int nr, u8 b
}
static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+ u64 duty_ns, u64 period_ns)
{
struct vt8500_chip *vt8500 = to_vt8500_chip(chip);
unsigned long long c;
@@ -102,8 +102,8 @@ static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
}
c = (unsigned long long)pv * duty_ns;
- do_div(c, period_ns);
- dc = c;
+
+ dc = div64_u64(c, period_ns);
writel(prescale, vt8500->base + REG_SCALAR(pwm->hwpwm));
vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_SCALAR_UPDATE);
@@ -176,11 +176,54 @@ static int vt8500_pwm_set_polarity(struct pwm_chip *chip,
return 0;
}
+static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+ bool enabled = pwm->state.enabled;
+
+ if (state->polarity != pwm->state.polarity) {
+ /*
+ * Changing the polarity of a running PWM is only allowed when
+ * the PWM driver implements ->apply().
+ */
+ if (enabled) {
+ vt8500_pwm_disable(chip, pwm);
+
+ enabled = false;
+ }
+
+ err = vt8500_pwm_set_polarity(chip, pwm, state->polarity);
+ if (err)
+ return err;
+ }
+
+ if (!state->enabled) {
+ if (enabled)
+ vt8500_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ /*
+ * We cannot skip calling ->config even if state->period ==
+ * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle
+ * because we might have exited early in the last call to
+ * pwm_apply_state because of !state->enabled and so the two values in
+ * pwm->state might not be configured in hardware.
+ */
+ err = vt8500_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!enabled)
+ err = vt8500_pwm_enable(chip, pwm);
+
+ return err;
+}
+
static const struct pwm_ops vt8500_pwm_ops = {
- .enable = vt8500_pwm_enable,
- .disable = vt8500_pwm_disable,
- .config = vt8500_pwm_config,
- .set_polarity = vt8500_pwm_set_polarity,
+ .apply = vt8500_pwm_apply,
.owner = THIS_MODULE,
};
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 3ddd426fc969..166019786653 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -180,6 +180,7 @@ config QCOM_Q6V5_ADSP
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_PIL_INFO
select QCOM_MDT_LOADER
@@ -199,6 +200,7 @@ config QCOM_Q6V5_MSS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_MDT_LOADER
select QCOM_PIL_INFO
@@ -218,6 +220,7 @@ config QCOM_Q6V5_PAS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_PIL_INFO
select QCOM_MDT_LOADER
@@ -239,6 +242,7 @@ config QCOM_Q6V5_WCSS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_MDT_LOADER
select QCOM_PIL_INFO
diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c
index eada7e34f3af..442a388f8102 100644
--- a/drivers/remoteproc/qcom_q6v5.c
+++ b/drivers/remoteproc/qcom_q6v5.c
@@ -10,6 +10,7 @@
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/soc/qcom/qcom_aoss.h>
#include <linux/soc/qcom/smem.h>
#include <linux/soc/qcom/smem_state.h>
#include <linux/remoteproc.h>
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index d6214cb66026..5663cf799c95 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -93,7 +93,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data)
/* wake up any blocked readers */
wake_up_interruptible(&eptdev->readq);
- device_del(&eptdev->dev);
+ cdev_device_del(&eptdev->cdev, &eptdev->dev);
put_device(&eptdev->dev);
return 0;
@@ -336,7 +336,6 @@ static void rpmsg_eptdev_release_device(struct device *dev)
ida_simple_remove(&rpmsg_ept_ida, dev->id);
ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt));
- cdev_del(&eptdev->cdev);
kfree(eptdev);
}
@@ -381,19 +380,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev,
dev->id = ret;
dev_set_name(dev, "rpmsg%d", ret);
- ret = cdev_add(&eptdev->cdev, dev->devt, 1);
+ ret = cdev_device_add(&eptdev->cdev, &eptdev->dev);
if (ret)
goto free_ept_ida;
/* We can now rely on the release function for cleanup */
dev->release = rpmsg_eptdev_release_device;
- ret = device_add(dev);
- if (ret) {
- dev_err(dev, "device_add failed: %d\n", ret);
- put_device(dev);
- }
-
return ret;
free_ept_ida:
@@ -462,7 +455,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev)
ida_simple_remove(&rpmsg_ctrl_ida, dev->id);
ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt));
- cdev_del(&ctrldev->cdev);
kfree(ctrldev);
}
@@ -497,19 +489,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev)
dev->id = ret;
dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret);
- ret = cdev_add(&ctrldev->cdev, dev->devt, 1);
+ ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev);
if (ret)
goto free_ctrl_ida;
/* We can now rely on the release function for cleanup */
dev->release = rpmsg_ctrldev_release_device;
- ret = device_add(dev);
- if (ret) {
- dev_err(&rpdev->dev, "device_add failed: %d\n", ret);
- put_device(dev);
- }
-
dev_set_drvdata(&rpdev->dev, ctrldev);
return ret;
@@ -535,7 +521,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev)
if (ret)
dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret);
- device_del(&ctrldev->dev);
+ cdev_device_del(&ctrldev->cdev, &ctrldev->dev);
put_device(&ctrldev->dev);
}
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 058e56a10ab8..d85a3c31347c 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1216,6 +1216,17 @@ config RTC_DRV_V3020
This driver can also be built as a module. If so, the module
will be called rtc-v3020.
+config RTC_DRV_GAMECUBE
+ tristate "Nintendo GameCube, Wii and Wii U RTC"
+ depends on GAMECUBE || WII || COMPILE_TEST
+ select REGMAP
+ help
+ If you say yes here you will get support for the RTC subsystem
+ of the Nintendo GameCube, Wii and Wii U.
+
+ This driver can also be built as a module. If so, the module
+ will be called "rtc-gamecube".
+
config RTC_DRV_WM831X
tristate "Wolfson Microelectronics WM831x RTC"
depends on MFD_WM831X
@@ -1444,6 +1455,19 @@ config RTC_DRV_SH
To compile this driver as a module, choose M here: the
module will be called rtc-sh.
+config RTC_DRV_SUNPLUS
+ tristate "Sunplus SP7021 RTC"
+ depends on SOC_SP7021
+ help
+ Say 'yes' to get support for the real-time clock present in
+ Sunplus SP7021 - a SoC for industrial applications. It provides
+ RTC status check, timer/alarm functionalities, user data
+ reservation with the battery over 2.5V, RTC power status check
+ and battery charge.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-sunplus.
+
config RTC_DRV_VR41XX
tristate "NEC VR41XX"
depends on CPU_VR41XX || COMPILE_TEST
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 678a8ef4abae..e92f3e943245 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_RTC_DRV_MT7622) += rtc-mt7622.o
obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o
obj-$(CONFIG_RTC_DRV_MXC) += rtc-mxc.o
obj-$(CONFIG_RTC_DRV_MXC_V2) += rtc-mxc_v2.o
+obj-$(CONFIG_RTC_DRV_GAMECUBE) += rtc-gamecube.o
obj-$(CONFIG_RTC_DRV_NTXEC) += rtc-ntxec.o
obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o
obj-$(CONFIG_RTC_DRV_OPAL) += rtc-opal.o
@@ -165,6 +166,7 @@ obj-$(CONFIG_RTC_DRV_STM32) += rtc-stm32.o
obj-$(CONFIG_RTC_DRV_STMP) += rtc-stmp3xxx.o
obj-$(CONFIG_RTC_DRV_SUN4V) += rtc-sun4v.o
obj-$(CONFIG_RTC_DRV_SUN6I) += rtc-sun6i.o
+obj-$(CONFIG_RTC_DRV_SUNPLUS) += rtc-sunplus.o
obj-$(CONFIG_RTC_DRV_SUNXI) += rtc-sunxi.o
obj-$(CONFIG_RTC_DRV_TEGRA) += rtc-tegra.o
obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c
index e104972a28fd..69325aeede1a 100644
--- a/drivers/rtc/dev.c
+++ b/drivers/rtc/dev.c
@@ -391,14 +391,14 @@ static long rtc_dev_ioctl(struct file *file,
}
switch(param.param) {
- long offset;
case RTC_PARAM_FEATURES:
if (param.index != 0)
err = -EINVAL;
param.uvalue = rtc->features[0];
break;
- case RTC_PARAM_CORRECTION:
+ case RTC_PARAM_CORRECTION: {
+ long offset;
mutex_unlock(&rtc->ops_lock);
if (param.index != 0)
return -EINVAL;
@@ -407,7 +407,7 @@ static long rtc_dev_ioctl(struct file *file,
if (err == 0)
param.svalue = offset;
break;
-
+ }
default:
if (rtc->ops->param_get)
err = rtc->ops->param_get(rtc->dev.parent, &param);
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 4eb53412b808..7c006c2b125f 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
static int cmos_read_time(struct device *dev, struct rtc_time *t)
{
+ int ret;
+
/*
* If pm_trace abused the RTC for storage, set the timespec to 0,
* which tells the caller that this RTC value is unusable.
@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
if (!pm_trace_rtc_valid())
return -EIO;
- mc146818_get_time(t);
+ ret = mc146818_get_time(t);
+ if (ret < 0) {
+ dev_err_ratelimited(dev, "unable to read current time\n");
+ return ret;
+ }
+
return 0;
}
@@ -242,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t)
return mc146818_set_time(t);
}
+struct cmos_read_alarm_callback_param {
+ struct cmos_rtc *cmos;
+ struct rtc_time *time;
+ unsigned char rtc_control;
+};
+
+static void cmos_read_alarm_callback(unsigned char __always_unused seconds,
+ void *param_in)
+{
+ struct cmos_read_alarm_callback_param *p =
+ (struct cmos_read_alarm_callback_param *)param_in;
+ struct rtc_time *time = p->time;
+
+ time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
+ time->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
+ time->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
+
+ if (p->cmos->day_alrm) {
+ /* ignore upper bits on readback per ACPI spec */
+ time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f;
+ if (!time->tm_mday)
+ time->tm_mday = -1;
+
+ if (p->cmos->mon_alrm) {
+ time->tm_mon = CMOS_READ(p->cmos->mon_alrm);
+ if (!time->tm_mon)
+ time->tm_mon = -1;
+ }
+ }
+
+ p->rtc_control = CMOS_READ(RTC_CONTROL);
+}
+
static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned char rtc_control;
+ struct cmos_read_alarm_callback_param p = {
+ .cmos = cmos,
+ .time = &t->time,
+ };
/* This not only a rtc_op, but also called directly */
if (!is_valid_irq(cmos->irq))
@@ -256,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
* the future.
*/
- spin_lock_irq(&rtc_lock);
- t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
- t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM);
- t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM);
-
- if (cmos->day_alrm) {
- /* ignore upper bits on readback per ACPI spec */
- t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f;
- if (!t->time.tm_mday)
- t->time.tm_mday = -1;
-
- if (cmos->mon_alrm) {
- t->time.tm_mon = CMOS_READ(cmos->mon_alrm);
- if (!t->time.tm_mon)
- t->time.tm_mon = -1;
- }
- }
-
- rtc_control = CMOS_READ(RTC_CONTROL);
- spin_unlock_irq(&rtc_lock);
+ /* Some Intel chipsets disconnect the alarm registers when the clock
+ * update is in progress - during this time reads return bogus values
+ * and writes may fail silently. See for example "7th Generation Intel®
+ * Processor Family I/O for U/Y Platforms [...] Datasheet", section
+ * 27.7.1
+ *
+ * Use the mc146818_avoid_UIP() function to avoid this.
+ */
+ if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p))
+ return -EIO;
- if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
if (((unsigned)t->time.tm_sec) < 0x60)
t->time.tm_sec = bcd2bin(t->time.tm_sec);
else
@@ -306,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
}
}
- t->enabled = !!(rtc_control & RTC_AIE);
+ t->enabled = !!(p.rtc_control & RTC_AIE);
t->pending = 0;
return 0;
@@ -437,10 +470,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t)
return 0;
}
+struct cmos_set_alarm_callback_param {
+ struct cmos_rtc *cmos;
+ unsigned char mon, mday, hrs, min, sec;
+ struct rtc_wkalrm *t;
+};
+
+/* Note: this function may be executed by mc146818_avoid_UIP() more then
+ * once
+ */
+static void cmos_set_alarm_callback(unsigned char __always_unused seconds,
+ void *param_in)
+{
+ struct cmos_set_alarm_callback_param *p =
+ (struct cmos_set_alarm_callback_param *)param_in;
+
+ /* next rtc irq must not be from previous alarm setting */
+ cmos_irq_disable(p->cmos, RTC_AIE);
+
+ /* update alarm */
+ CMOS_WRITE(p->hrs, RTC_HOURS_ALARM);
+ CMOS_WRITE(p->min, RTC_MINUTES_ALARM);
+ CMOS_WRITE(p->sec, RTC_SECONDS_ALARM);
+
+ /* the system may support an "enhanced" alarm */
+ if (p->cmos->day_alrm) {
+ CMOS_WRITE(p->mday, p->cmos->day_alrm);
+ if (p->cmos->mon_alrm)
+ CMOS_WRITE(p->mon, p->cmos->mon_alrm);
+ }
+
+ if (use_hpet_alarm()) {
+ /*
+ * FIXME the HPET alarm glue currently ignores day_alrm
+ * and mon_alrm ...
+ */
+ hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min,
+ p->t->time.tm_sec);
+ }
+
+ if (p->t->enabled)
+ cmos_irq_enable(p->cmos, RTC_AIE);
+}
+
static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned char mon, mday, hrs, min, sec, rtc_control;
+ struct cmos_set_alarm_callback_param p = {
+ .cmos = cmos,
+ .t = t
+ };
+ unsigned char rtc_control;
int ret;
/* This not only a rtc_op, but also called directly */
@@ -451,52 +531,33 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
if (ret < 0)
return ret;
- mon = t->time.tm_mon + 1;
- mday = t->time.tm_mday;
- hrs = t->time.tm_hour;
- min = t->time.tm_min;
- sec = t->time.tm_sec;
+ p.mon = t->time.tm_mon + 1;
+ p.mday = t->time.tm_mday;
+ p.hrs = t->time.tm_hour;
+ p.min = t->time.tm_min;
+ p.sec = t->time.tm_sec;
+ spin_lock_irq(&rtc_lock);
rtc_control = CMOS_READ(RTC_CONTROL);
+ spin_unlock_irq(&rtc_lock);
+
if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
/* Writing 0xff means "don't care" or "match all". */
- mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
- mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
- hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
- min = (min < 60) ? bin2bcd(min) : 0xff;
- sec = (sec < 60) ? bin2bcd(sec) : 0xff;
- }
-
- spin_lock_irq(&rtc_lock);
-
- /* next rtc irq must not be from previous alarm setting */
- cmos_irq_disable(cmos, RTC_AIE);
-
- /* update alarm */
- CMOS_WRITE(hrs, RTC_HOURS_ALARM);
- CMOS_WRITE(min, RTC_MINUTES_ALARM);
- CMOS_WRITE(sec, RTC_SECONDS_ALARM);
-
- /* the system may support an "enhanced" alarm */
- if (cmos->day_alrm) {
- CMOS_WRITE(mday, cmos->day_alrm);
- if (cmos->mon_alrm)
- CMOS_WRITE(mon, cmos->mon_alrm);
+ p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff;
+ p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff;
+ p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff;
+ p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff;
+ p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff;
}
- if (use_hpet_alarm()) {
- /*
- * FIXME the HPET alarm glue currently ignores day_alrm
- * and mon_alrm ...
- */
- hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min,
- t->time.tm_sec);
- }
-
- if (t->enabled)
- cmos_irq_enable(cmos, RTC_AIE);
-
- spin_unlock_irq(&rtc_lock);
+ /*
+ * Some Intel chipsets disconnect the alarm registers when the clock
+ * update is in progress - during this time writes fail silently.
+ *
+ * Use mc146818_avoid_UIP() to avoid this.
+ */
+ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p))
+ return -EIO;
cmos->alarm_expires = rtc_tm_to_time64(&t->time);
@@ -790,16 +851,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
- spin_lock_irq(&rtc_lock);
-
- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
- if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
- spin_unlock_irq(&rtc_lock);
- dev_warn(dev, "not accessible\n");
+ if (!mc146818_does_rtc_work()) {
+ dev_warn(dev, "broken or not accessible\n");
retval = -ENXIO;
goto cleanup1;
}
+ spin_lock_irq(&rtc_lock);
+
if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
/* force periodic irq to CMOS reset default of 1024Hz;
*
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index d4b72a9fa2ba..ee2efb496174 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -475,12 +475,14 @@ static int da9063_rtc_probe(struct platform_device *pdev)
da9063_data_to_tm(data, &rtc->alarm_time, rtc);
rtc->rtc_sync = false;
- /*
- * TODO: some models have alarms on a minute boundary but still support
- * real hardware interrupts. Add this once the core supports it.
- */
- if (config->rtc_data_start != RTC_SEC)
- rtc->rtc_dev->uie_unsupported = 1;
+ if (config->rtc_data_start != RTC_SEC) {
+ set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtc_dev->features);
+ /*
+ * TODO: some models have alarms on a minute boundary but still
+ * support real hardware interrupts.
+ */
+ clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtc_dev->features);
+ }
irq_alarm = platform_get_irq_byname(pdev, "ALARM");
if (irq_alarm < 0)
@@ -494,6 +496,8 @@ static int da9063_rtc_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
irq_alarm, ret);
+ device_init_wakeup(&pdev->dev, true);
+
return devm_rtc_register_device(rtc->rtc_dev);
}
diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c
index ad3add5db4c8..53bb08fe1cd4 100644
--- a/drivers/rtc/rtc-ftrtc010.c
+++ b/drivers/rtc/rtc-ftrtc010.c
@@ -141,11 +141,9 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev)
}
}
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res)
- return -ENODEV;
-
- rtc->rtc_irq = res->start;
+ rtc->rtc_irq = platform_get_irq(pdev, 0);
+ if (rtc->rtc_irq < 0)
+ return rtc->rtc_irq;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
diff --git a/drivers/rtc/rtc-gamecube.c b/drivers/rtc/rtc-gamecube.c
new file mode 100644
index 000000000000..f717b36f4738
--- /dev/null
+++ b/drivers/rtc/rtc-gamecube.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Nintendo GameCube, Wii and Wii U RTC driver
+ *
+ * This driver is for the MX23L4005, more specifically its real-time clock and
+ * SRAM storage. The value returned by the RTC counter must be added with the
+ * offset stored in a bias register in SRAM (on the GameCube and Wii) or in
+ * /config/rtc.xml (on the Wii U). The latter being very impractical to access
+ * from Linux, this driver assumes the bootloader has read it and stored it in
+ * SRAM like for the other two consoles.
+ *
+ * This device sits on a bus named EXI (which is similar to SPI), channel 0,
+ * device 1. This driver assumes no other user of the EXI bus, which is
+ * currently the case but would have to be reworked to add support for other
+ * GameCube hardware exposed on this bus.
+ *
+ * References:
+ * - https://wiiubrew.org/wiki/Hardware/RTC
+ * - https://wiibrew.org/wiki/MX23L4005
+ *
+ * Copyright (C) 2018 rw-r-r-0644
+ * Copyright (C) 2021 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+ *
+ * Based on rtc-gcn.c
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2005,2008,2009 Albert Herranz
+ * Based on gamecube_time.c from Torben Nielsen.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+
+/* EXI registers */
+#define EXICSR 0
+#define EXICR 12
+#define EXIDATA 16
+
+/* EXI register values */
+#define EXICSR_DEV 0x380
+ #define EXICSR_DEV1 0x100
+#define EXICSR_CLK 0x070
+ #define EXICSR_CLK_1MHZ 0x000
+ #define EXICSR_CLK_2MHZ 0x010
+ #define EXICSR_CLK_4MHZ 0x020
+ #define EXICSR_CLK_8MHZ 0x030
+ #define EXICSR_CLK_16MHZ 0x040
+ #define EXICSR_CLK_32MHZ 0x050
+#define EXICSR_INT 0x008
+ #define EXICSR_INTSET 0x008
+
+#define EXICR_TSTART 0x001
+#define EXICR_TRSMODE 0x002
+ #define EXICR_TRSMODE_IMM 0x000
+#define EXICR_TRSTYPE 0x00C
+ #define EXICR_TRSTYPE_R 0x000
+ #define EXICR_TRSTYPE_W 0x004
+#define EXICR_TLEN 0x030
+ #define EXICR_TLEN32 0x030
+
+/* EXI registers values to access the RTC */
+#define RTC_EXICSR (EXICSR_DEV1 | EXICSR_CLK_8MHZ | EXICSR_INTSET)
+#define RTC_EXICR_W (EXICR_TSTART | EXICR_TRSMODE_IMM | EXICR_TRSTYPE_W | EXICR_TLEN32)
+#define RTC_EXICR_R (EXICR_TSTART | EXICR_TRSMODE_IMM | EXICR_TRSTYPE_R | EXICR_TLEN32)
+#define RTC_EXIDATA_W 0x80000000
+
+/* RTC registers */
+#define RTC_COUNTER 0x200000
+#define RTC_SRAM 0x200001
+#define RTC_SRAM_BIAS 0x200004
+#define RTC_SNAPSHOT 0x204000
+#define RTC_ONTMR 0x210000
+#define RTC_OFFTMR 0x210001
+#define RTC_TEST0 0x210004
+#define RTC_TEST1 0x210005
+#define RTC_TEST2 0x210006
+#define RTC_TEST3 0x210007
+#define RTC_CONTROL0 0x21000c
+#define RTC_CONTROL1 0x21000d
+
+/* RTC flags */
+#define RTC_CONTROL0_UNSTABLE_POWER 0x00000800
+#define RTC_CONTROL0_LOW_BATTERY 0x00000200
+
+struct priv {
+ struct regmap *regmap;
+ void __iomem *iob;
+ u32 rtc_bias;
+};
+
+static int exi_read(void *context, u32 reg, u32 *data)
+{
+ struct priv *d = (struct priv *)context;
+ void __iomem *iob = d->iob;
+
+ /* The spin loops here loop about 15~16 times each, so there is no need
+ * to use a more expensive sleep method.
+ */
+
+ /* Write register offset */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(reg << 8, iob + EXIDATA);
+ iowrite32be(RTC_EXICR_W, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+
+ /* Read data */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(RTC_EXICR_R, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+ *data = ioread32be(iob + EXIDATA);
+
+ /* Clear channel parameters */
+ iowrite32be(0, iob + EXICSR);
+
+ return 0;
+}
+
+static int exi_write(void *context, u32 reg, u32 data)
+{
+ struct priv *d = (struct priv *)context;
+ void __iomem *iob = d->iob;
+
+ /* The spin loops here loop about 15~16 times each, so there is no need
+ * to use a more expensive sleep method.
+ */
+
+ /* Write register offset */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(RTC_EXIDATA_W | (reg << 8), iob + EXIDATA);
+ iowrite32be(RTC_EXICR_W, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+
+ /* Write data */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(data, iob + EXIDATA);
+ iowrite32be(RTC_EXICR_W, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+
+ /* Clear channel parameters */
+ iowrite32be(0, iob + EXICSR);
+
+ return 0;
+}
+
+static const struct regmap_bus exi_bus = {
+ /* TODO: is that true? Not that it matters here, but still. */
+ .fast_io = true,
+ .reg_read = exi_read,
+ .reg_write = exi_write,
+};
+
+static int gamecube_rtc_read_time(struct device *dev, struct rtc_time *t)
+{
+ struct priv *d = dev_get_drvdata(dev);
+ int ret;
+ u32 counter;
+ time64_t timestamp;
+
+ ret = regmap_read(d->regmap, RTC_COUNTER, &counter);
+ if (ret)
+ return ret;
+
+ /* Add the counter and the bias to obtain the timestamp */
+ timestamp = (time64_t)d->rtc_bias + counter;
+ rtc_time64_to_tm(timestamp, t);
+
+ return 0;
+}
+
+static int gamecube_rtc_set_time(struct device *dev, struct rtc_time *t)
+{
+ struct priv *d = dev_get_drvdata(dev);
+ time64_t timestamp;
+
+ /* Subtract the timestamp and the bias to obtain the counter value */
+ timestamp = rtc_tm_to_time64(t);
+ return regmap_write(d->regmap, RTC_COUNTER, timestamp - d->rtc_bias);
+}
+
+static int gamecube_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct priv *d = dev_get_drvdata(dev);
+ int value;
+ int control0;
+ int ret;
+
+ switch (cmd) {
+ case RTC_VL_READ:
+ ret = regmap_read(d->regmap, RTC_CONTROL0, &control0);
+ if (ret)
+ return ret;
+
+ value = 0;
+ if (control0 & RTC_CONTROL0_UNSTABLE_POWER)
+ value |= RTC_VL_DATA_INVALID;
+ if (control0 & RTC_CONTROL0_LOW_BATTERY)
+ value |= RTC_VL_BACKUP_LOW;
+ return put_user(value, (unsigned int __user *)arg);
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+static const struct rtc_class_ops gamecube_rtc_ops = {
+ .read_time = gamecube_rtc_read_time,
+ .set_time = gamecube_rtc_set_time,
+ .ioctl = gamecube_rtc_ioctl,
+};
+
+static int gamecube_rtc_read_offset_from_sram(struct priv *d)
+{
+ struct device_node *np;
+ int ret;
+ struct resource res;
+ void __iomem *hw_srnprot;
+ u32 old;
+
+ np = of_find_compatible_node(NULL, NULL, "nintendo,latte-srnprot");
+ if (!np)
+ np = of_find_compatible_node(NULL, NULL,
+ "nintendo,hollywood-srnprot");
+ if (!np) {
+ pr_info("HW_SRNPROT not found, assuming a GameCube\n");
+ return regmap_read(d->regmap, RTC_SRAM_BIAS, &d->rtc_bias);
+ }
+
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret) {
+ pr_err("no io memory range found\n");
+ return -1;
+ }
+
+ hw_srnprot = ioremap(res.start, resource_size(&res));
+ old = ioread32be(hw_srnprot);
+
+ /* TODO: figure out why we use this magic constant. I obtained it by
+ * reading the leftover value after boot, after IOSU already ran.
+ *
+ * On my Wii U, setting this register to 1 prevents the console from
+ * rebooting properly, so wiiubrew.org must be missing something.
+ *
+ * See https://wiiubrew.org/wiki/Hardware/Latte_registers
+ */
+ if (old != 0x7bf)
+ iowrite32be(0x7bf, hw_srnprot);
+
+ /* Get the offset from RTC SRAM.
+ *
+ * Its default location on the GameCube and on the Wii is in the SRAM,
+ * while on the Wii U the bootloader needs to fill it with the contents
+ * of /config/rtc.xml on the SLC (the eMMC). We don’t do that from
+ * Linux since it requires implementing a proprietary filesystem and do
+ * file decryption, instead we require the bootloader to fill the same
+ * SRAM address as on previous consoles.
+ */
+ ret = regmap_read(d->regmap, RTC_SRAM_BIAS, &d->rtc_bias);
+ if (ret) {
+ pr_err("failed to get the RTC bias\n");
+ return -1;
+ }
+
+ /* Reset SRAM access to how it was before, our job here is done. */
+ if (old != 0x7bf)
+ iowrite32be(old, hw_srnprot);
+ iounmap(hw_srnprot);
+
+ return 0;
+}
+
+static const struct regmap_range rtc_rd_ranges[] = {
+ regmap_reg_range(0x200000, 0x200010),
+ regmap_reg_range(0x204000, 0x204000),
+ regmap_reg_range(0x210000, 0x210001),
+ regmap_reg_range(0x210004, 0x210007),
+ regmap_reg_range(0x21000c, 0x21000d),
+};
+
+static const struct regmap_access_table rtc_rd_regs = {
+ .yes_ranges = rtc_rd_ranges,
+ .n_yes_ranges = ARRAY_SIZE(rtc_rd_ranges),
+};
+
+static const struct regmap_range rtc_wr_ranges[] = {
+ regmap_reg_range(0x200000, 0x200010),
+ regmap_reg_range(0x204000, 0x204000),
+ regmap_reg_range(0x210000, 0x210001),
+ regmap_reg_range(0x21000d, 0x21000d),
+};
+
+static const struct regmap_access_table rtc_wr_regs = {
+ .yes_ranges = rtc_wr_ranges,
+ .n_yes_ranges = ARRAY_SIZE(rtc_wr_ranges),
+};
+
+static const struct regmap_config gamecube_rtc_regmap_config = {
+ .reg_bits = 24,
+ .val_bits = 32,
+ .rd_table = &rtc_rd_regs,
+ .wr_table = &rtc_wr_regs,
+ .max_register = 0x21000d,
+ .name = "gamecube-rtc",
+};
+
+static int gamecube_rtc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rtc_device *rtc;
+ struct priv *d;
+ int ret;
+
+ d = devm_kzalloc(dev, sizeof(struct priv), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->iob = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(d->iob))
+ return PTR_ERR(d->iob);
+
+ d->regmap = devm_regmap_init(dev, &exi_bus, d,
+ &gamecube_rtc_regmap_config);
+ if (IS_ERR(d->regmap))
+ return PTR_ERR(d->regmap);
+
+ ret = gamecube_rtc_read_offset_from_sram(d);
+ if (ret)
+ return ret;
+ dev_dbg(dev, "SRAM bias: 0x%x", d->rtc_bias);
+
+ dev_set_drvdata(dev, d);
+
+ rtc = devm_rtc_allocate_device(dev);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ /* We can represent further than that, but it depends on the stored
+ * bias and we can’t modify it persistently on all supported consoles,
+ * so here we pretend to be limited to 2106.
+ */
+ rtc->range_min = 0;
+ rtc->range_max = U32_MAX;
+ rtc->ops = &gamecube_rtc_ops;
+
+ devm_rtc_register_device(rtc);
+
+ return 0;
+}
+
+static const struct of_device_id gamecube_rtc_of_match[] = {
+ {.compatible = "nintendo,latte-exi" },
+ {.compatible = "nintendo,hollywood-exi" },
+ {.compatible = "nintendo,flipper-exi" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, gamecube_rtc_of_match);
+
+static struct platform_driver gamecube_rtc_driver = {
+ .probe = gamecube_rtc_probe,
+ .driver = {
+ .name = "rtc-gamecube",
+ .of_match_table = gamecube_rtc_of_match,
+ },
+};
+module_platform_driver(gamecube_rtc_driver);
+
+MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
+MODULE_DESCRIPTION("Nintendo GameCube, Wii and Wii U RTC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
index dcfaf09946ee..ae9f131b43c0 100644
--- a/drivers/rtc/rtc-mc146818-lib.c
+++ b/drivers/rtc/rtc-mc146818-lib.c
@@ -8,48 +8,100 @@
#include <linux/acpi.h>
#endif
-unsigned int mc146818_get_time(struct rtc_time *time)
+/*
+ * Execute a function while the UIP (Update-in-progress) bit of the RTC is
+ * unset.
+ *
+ * Warning: callback may be executed more then once.
+ */
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+ void *param)
{
- unsigned char ctrl;
+ int i;
unsigned long flags;
- unsigned char century = 0;
- bool retry;
+ unsigned char seconds;
-#ifdef CONFIG_MACH_DECSTATION
- unsigned int real_year;
-#endif
+ for (i = 0; i < 10; i++) {
+ spin_lock_irqsave(&rtc_lock, flags);
-again:
- spin_lock_irqsave(&rtc_lock, flags);
- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
- spin_unlock_irqrestore(&rtc_lock, flags);
- memset(time, 0xff, sizeof(*time));
- return 0;
- }
+ /*
+ * Check whether there is an update in progress during which the
+ * readout is unspecified. The maximum update time is ~2ms. Poll
+ * every msec for completion.
+ *
+ * Store the second value before checking UIP so a long lasting
+ * NMI which happens to hit after the UIP check cannot make
+ * an update cycle invisible.
+ */
+ seconds = CMOS_READ(RTC_SECONDS);
- /*
- * Check whether there is an update in progress during which the
- * readout is unspecified. The maximum update time is ~2ms. Poll
- * every msec for completion.
- *
- * Store the second value before checking UIP so a long lasting NMI
- * which happens to hit after the UIP check cannot make an update
- * cycle invisible.
- */
- time->tm_sec = CMOS_READ(RTC_SECONDS);
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ mdelay(1);
+ continue;
+ }
- if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
- spin_unlock_irqrestore(&rtc_lock, flags);
- mdelay(1);
- goto again;
- }
+ /* Revalidate the above readout */
+ if (seconds != CMOS_READ(RTC_SECONDS)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ continue;
+ }
- /* Revalidate the above readout */
- if (time->tm_sec != CMOS_READ(RTC_SECONDS)) {
+ if (callback)
+ callback(seconds, param);
+
+ /*
+ * Check for the UIP bit again. If it is set now then
+ * the above values may contain garbage.
+ */
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ mdelay(1);
+ continue;
+ }
+
+ /*
+ * A NMI might have interrupted the above sequence so check
+ * whether the seconds value has changed which indicates that
+ * the NMI took longer than the UIP bit was set. Unlikely, but
+ * possible and there is also virt...
+ */
+ if (seconds != CMOS_READ(RTC_SECONDS)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ continue;
+ }
spin_unlock_irqrestore(&rtc_lock, flags);
- goto again;
+
+ return true;
}
+ return false;
+}
+EXPORT_SYMBOL_GPL(mc146818_avoid_UIP);
+
+/*
+ * If the UIP (Update-in-progress) bit of the RTC is set for more then
+ * 10ms, the RTC is apparently broken or not present.
+ */
+bool mc146818_does_rtc_work(void)
+{
+ return mc146818_avoid_UIP(NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(mc146818_does_rtc_work);
+
+struct mc146818_get_time_callback_param {
+ struct rtc_time *time;
+ unsigned char ctrl;
+#ifdef CONFIG_ACPI
+ unsigned char century;
+#endif
+#ifdef CONFIG_MACH_DECSTATION
+ unsigned int real_year;
+#endif
+};
+
+static void mc146818_get_time_callback(unsigned char seconds, void *param_in)
+{
+ struct mc146818_get_time_callback_param *p = param_in;
/*
* Only the values that we read from the RTC are set. We leave
@@ -57,39 +109,39 @@ again:
* RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
* by the RTC when initially set to a non-zero value.
*/
- time->tm_min = CMOS_READ(RTC_MINUTES);
- time->tm_hour = CMOS_READ(RTC_HOURS);
- time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
- time->tm_mon = CMOS_READ(RTC_MONTH);
- time->tm_year = CMOS_READ(RTC_YEAR);
+ p->time->tm_sec = seconds;
+ p->time->tm_min = CMOS_READ(RTC_MINUTES);
+ p->time->tm_hour = CMOS_READ(RTC_HOURS);
+ p->time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
+ p->time->tm_mon = CMOS_READ(RTC_MONTH);
+ p->time->tm_year = CMOS_READ(RTC_YEAR);
#ifdef CONFIG_MACH_DECSTATION
- real_year = CMOS_READ(RTC_DEC_YEAR);
+ p->real_year = CMOS_READ(RTC_DEC_YEAR);
#endif
#ifdef CONFIG_ACPI
if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century)
- century = CMOS_READ(acpi_gbl_FADT.century);
+ acpi_gbl_FADT.century) {
+ p->century = CMOS_READ(acpi_gbl_FADT.century);
+ } else {
+ p->century = 0;
+ }
#endif
- ctrl = CMOS_READ(RTC_CONTROL);
- /*
- * Check for the UIP bit again. If it is set now then
- * the above values may contain garbage.
- */
- retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
- /*
- * A NMI might have interrupted the above sequence so check whether
- * the seconds value has changed which indicates that the NMI took
- * longer than the UIP bit was set. Unlikely, but possible and
- * there is also virt...
- */
- retry |= time->tm_sec != CMOS_READ(RTC_SECONDS);
- spin_unlock_irqrestore(&rtc_lock, flags);
+ p->ctrl = CMOS_READ(RTC_CONTROL);
+}
- if (retry)
- goto again;
+int mc146818_get_time(struct rtc_time *time)
+{
+ struct mc146818_get_time_callback_param p = {
+ .time = time
+ };
+
+ if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) {
+ memset(time, 0, sizeof(*time));
+ return -EIO;
+ }
- if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
{
time->tm_sec = bcd2bin(time->tm_sec);
time->tm_min = bcd2bin(time->tm_min);
@@ -97,15 +149,19 @@ again:
time->tm_mday = bcd2bin(time->tm_mday);
time->tm_mon = bcd2bin(time->tm_mon);
time->tm_year = bcd2bin(time->tm_year);
- century = bcd2bin(century);
+#ifdef CONFIG_ACPI
+ p.century = bcd2bin(p.century);
+#endif
}
#ifdef CONFIG_MACH_DECSTATION
- time->tm_year += real_year - 72;
+ time->tm_year += p.real_year - 72;
#endif
- if (century > 20)
- time->tm_year += (century - 19) * 100;
+#ifdef CONFIG_ACPI
+ if (p.century > 19)
+ time->tm_year += (p.century - 19) * 100;
+#endif
/*
* Account for differences between how the RTC uses the values
@@ -116,7 +172,7 @@ again:
time->tm_mon--;
- return RTC_24H;
+ return 0;
}
EXPORT_SYMBOL_GPL(mc146818_get_time);
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 56c58b055dff..81a5b1f2e68c 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -748,7 +748,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
/*
* Enable timestamp function and store timestamp of first trigger
- * event until TSF1 and TFS2 interrupt flags are cleared.
+ * event until TSF1 and TSF2 interrupt flags are cleared.
*/
ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
PCF2127_BIT_TS_CTRL_TSOFF |
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 15e50bb10cf0..df2b072c394d 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -514,21 +514,56 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063)
}
#endif
-static const struct pcf85063_config pcf85063tp_config = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x0a,
+enum pcf85063_type {
+ PCF85063,
+ PCF85063TP,
+ PCF85063A,
+ RV8263,
+ PCF85063_LAST_ID
+};
+
+static struct pcf85063_config pcf85063_cfg[] = {
+ [PCF85063] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
+ },
+ [PCF85063TP] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
+ },
+ [PCF85063A] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+ },
+ [RV8263] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+ .force_cap_7000 = 1,
},
};
+static const struct i2c_device_id pcf85063_ids[];
+
static int pcf85063_probe(struct i2c_client *client)
{
struct pcf85063 *pcf85063;
unsigned int tmp;
int err;
- const struct pcf85063_config *config = &pcf85063tp_config;
- const void *data = of_device_get_match_data(&client->dev);
+ const struct pcf85063_config *config;
struct nvmem_config nvmem_cfg = {
.name = "pcf85063_nvram",
.reg_read = pcf85063_nvmem_read,
@@ -544,8 +579,17 @@ static int pcf85063_probe(struct i2c_client *client)
if (!pcf85063)
return -ENOMEM;
- if (data)
- config = data;
+ if (client->dev.of_node) {
+ config = of_device_get_match_data(&client->dev);
+ if (!config)
+ return -ENODEV;
+ } else {
+ enum pcf85063_type type =
+ i2c_match_id(pcf85063_ids, client)->driver_data;
+ if (type >= PCF85063_LAST_ID)
+ return -ENODEV;
+ config = &pcf85063_cfg[type];
+ }
pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
if (IS_ERR(pcf85063->regmap))
@@ -604,31 +648,21 @@ static int pcf85063_probe(struct i2c_client *client)
return devm_rtc_register_device(pcf85063->rtc);
}
-#ifdef CONFIG_OF
-static const struct pcf85063_config pcf85063a_config = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
-};
-
-static const struct pcf85063_config rv8263_config = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
- .force_cap_7000 = 1,
+static const struct i2c_device_id pcf85063_ids[] = {
+ { "pcf85063", PCF85063 },
+ { "pcf85063tp", PCF85063TP },
+ { "pcf85063a", PCF85063A },
+ { "rv8263", RV8263 },
+ {}
};
+MODULE_DEVICE_TABLE(i2c, pcf85063_ids);
+#ifdef CONFIG_OF
static const struct of_device_id pcf85063_of_match[] = {
- { .compatible = "nxp,pcf85063", .data = &pcf85063tp_config },
- { .compatible = "nxp,pcf85063tp", .data = &pcf85063tp_config },
- { .compatible = "nxp,pcf85063a", .data = &pcf85063a_config },
- { .compatible = "microcrystal,rv8263", .data = &rv8263_config },
+ { .compatible = "nxp,pcf85063", .data = &pcf85063_cfg[PCF85063] },
+ { .compatible = "nxp,pcf85063tp", .data = &pcf85063_cfg[PCF85063TP] },
+ { .compatible = "nxp,pcf85063a", .data = &pcf85063_cfg[PCF85063A] },
+ { .compatible = "microcrystal,rv8263", .data = &pcf85063_cfg[RV8263] },
{}
};
MODULE_DEVICE_TABLE(of, pcf85063_of_match);
@@ -640,6 +674,7 @@ static struct i2c_driver pcf85063_driver = {
.of_match_table = of_match_ptr(pcf85063_of_match),
},
.probe_new = pcf85063_probe,
+ .id_table = pcf85063_ids,
};
module_i2c_driver(pcf85063_driver);
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index d2f1d8f754bf..cf8119b6d320 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
if (sa1100_rtc->irq_alarm < 0)
return -ENXIO;
+ sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+ if (IS_ERR(sa1100_rtc->rtc))
+ return PTR_ERR(sa1100_rtc->rtc);
+
pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start,
resource_size(pxa_rtc->ress));
if (!pxa_rtc->base) {
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 80980414890c..cb15983383f5 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -28,8 +28,10 @@
#define RS5C372_REG_MONTH 5
#define RS5C372_REG_YEAR 6
#define RS5C372_REG_TRIM 7
-# define RS5C372_TRIM_XSL 0x80
+# define RS5C372_TRIM_XSL 0x80 /* only if RS5C372[a|b] */
# define RS5C372_TRIM_MASK 0x7F
+# define R2221TL_TRIM_DEV (1 << 7) /* only if R2221TL */
+# define RS5C372_TRIM_DECR (1 << 6)
#define RS5C_REG_ALARM_A_MIN 8 /* or ALARM_W */
#define RS5C_REG_ALARM_A_HOURS 9
@@ -324,8 +326,12 @@ static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
struct rs5c372 *rs5c372 = i2c_get_clientdata(client);
u8 tmp = rs5c372->regs[RS5C372_REG_TRIM];
- if (osc)
- *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
+ if (osc) {
+ if (rs5c372->type == rtc_rs5c372a || rs5c372->type == rtc_rs5c372b)
+ *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
+ else
+ *osc = 32768;
+ }
if (trim) {
dev_dbg(&client->dev, "%s: raw trim=%x\n", __func__, tmp);
@@ -485,6 +491,176 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
#define rs5c372_rtc_proc NULL
#endif
+#ifdef CONFIG_RTC_INTF_DEV
+static int rs5c372_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+ unsigned char ctrl2;
+ int addr;
+ unsigned int flags;
+
+ dev_dbg(dev, "%s: cmd=%x\n", __func__, cmd);
+
+ addr = RS5C_ADDR(RS5C_REG_CTRL2);
+ ctrl2 = i2c_smbus_read_byte_data(rs5c->client, addr);
+
+ switch (cmd) {
+ case RTC_VL_READ:
+ flags = 0;
+
+ switch (rs5c->type) {
+ case rtc_r2025sd:
+ case rtc_r2221tl:
+ if ((rs5c->type == rtc_r2025sd && !(ctrl2 & R2x2x_CTRL2_XSTP)) ||
+ (rs5c->type == rtc_r2221tl && (ctrl2 & R2x2x_CTRL2_XSTP))) {
+ flags |= RTC_VL_DATA_INVALID;
+ }
+ if (ctrl2 & R2x2x_CTRL2_VDET)
+ flags |= RTC_VL_BACKUP_LOW;
+ break;
+ default:
+ if (ctrl2 & RS5C_CTRL2_XSTP)
+ flags |= RTC_VL_DATA_INVALID;
+ break;
+ }
+
+ return put_user(flags, (unsigned int __user *)arg);
+ case RTC_VL_CLR:
+ /* clear VDET bit */
+ if (rs5c->type == rtc_r2025sd || rs5c->type == rtc_r2221tl) {
+ ctrl2 &= ~R2x2x_CTRL2_VDET;
+ if (i2c_smbus_write_byte_data(rs5c->client, addr, ctrl2) < 0) {
+ dev_dbg(&rs5c->client->dev, "%s: write error in line %i\n",
+ __func__, __LINE__);
+ return -EIO;
+ }
+ }
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ return 0;
+}
+#else
+#define rs5c372_ioctl NULL
+#endif
+
+static int rs5c372_read_offset(struct device *dev, long *offset)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+ u8 val = rs5c->regs[RS5C372_REG_TRIM];
+ long ppb_per_step = 0;
+ bool decr = val & RS5C372_TRIM_DECR;
+
+ switch (rs5c->type) {
+ case rtc_r2221tl:
+ ppb_per_step = val & R2221TL_TRIM_DEV ? 1017 : 3051;
+ break;
+ case rtc_rs5c372a:
+ case rtc_rs5c372b:
+ ppb_per_step = val & RS5C372_TRIM_XSL ? 3125 : 3051;
+ break;
+ default:
+ ppb_per_step = 3051;
+ break;
+ }
+
+ /* Only bits[0:5] repsents the time counts */
+ val &= 0x3F;
+
+ /* If bits[1:5] are all 0, it means no increment or decrement */
+ if (!(val & 0x3E)) {
+ *offset = 0;
+ } else {
+ if (decr)
+ *offset = -(((~val) & 0x3F) + 1) * ppb_per_step;
+ else
+ *offset = (val - 1) * ppb_per_step;
+ }
+
+ return 0;
+}
+
+static int rs5c372_set_offset(struct device *dev, long offset)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+ int addr = RS5C_ADDR(RS5C372_REG_TRIM);
+ u8 val = 0;
+ u8 tmp = 0;
+ long ppb_per_step = 3051;
+ long steps = LONG_MIN;
+
+ switch (rs5c->type) {
+ case rtc_rs5c372a:
+ case rtc_rs5c372b:
+ tmp = rs5c->regs[RS5C372_REG_TRIM];
+ if (tmp & RS5C372_TRIM_XSL) {
+ ppb_per_step = 3125;
+ val |= RS5C372_TRIM_XSL;
+ }
+ break;
+ case rtc_r2221tl:
+ /*
+ * Check if it is possible to use high resolution mode (DEV=1).
+ * In this mode, the minimum resolution is 2 / (32768 * 20 * 3),
+ * which is about 1017 ppb.
+ */
+ steps = DIV_ROUND_CLOSEST(offset, 1017);
+ if (steps >= -0x3E && steps <= 0x3E) {
+ ppb_per_step = 1017;
+ val |= R2221TL_TRIM_DEV;
+ } else {
+ /*
+ * offset is out of the range of high resolution mode.
+ * Try to use low resolution mode (DEV=0). In this mode,
+ * the minimum resolution is 2 / (32768 * 20), which is
+ * about 3051 ppb.
+ */
+ steps = LONG_MIN;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (steps == LONG_MIN) {
+ steps = DIV_ROUND_CLOSEST(offset, ppb_per_step);
+ if (steps > 0x3E || steps < -0x3E)
+ return -ERANGE;
+ }
+
+ if (steps > 0) {
+ val |= steps + 1;
+ } else {
+ val |= RS5C372_TRIM_DECR;
+ val |= (~(-steps - 1)) & 0x3F;
+ }
+
+ if (!steps || !(val & 0x3E)) {
+ /*
+ * if offset is too small, set oscillation adjustment register
+ * or time trimming register with its default value whic means
+ * no increment or decrement. But for rs5c372[a|b], the XSL bit
+ * should be kept unchanged.
+ */
+ if (rs5c->type == rtc_rs5c372a || rs5c->type == rtc_rs5c372b)
+ val &= RS5C372_TRIM_XSL;
+ else
+ val = 0;
+ }
+
+ dev_dbg(&rs5c->client->dev, "write 0x%x for offset %ld\n", val, offset);
+
+ if (i2c_smbus_write_byte_data(rs5c->client, addr, val) < 0) {
+ dev_err(&rs5c->client->dev, "failed to write 0x%x to reg %d\n", val, addr);
+ return -EIO;
+ }
+
+ rs5c->regs[RS5C372_REG_TRIM] = val;
+
+ return 0;
+}
+
static const struct rtc_class_ops rs5c372_rtc_ops = {
.proc = rs5c372_rtc_proc,
.read_time = rs5c372_rtc_read_time,
@@ -492,6 +668,9 @@ static const struct rtc_class_ops rs5c372_rtc_ops = {
.read_alarm = rs5c_read_alarm,
.set_alarm = rs5c_set_alarm,
.alarm_irq_enable = rs5c_rtc_alarm_irq_enable,
+ .ioctl = rs5c372_ioctl,
+ .read_offset = rs5c372_read_offset,
+ .set_offset = rs5c372_set_offset,
};
#if IS_ENABLED(CONFIG_RTC_INTF_SYSFS)
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 0d5ed38bf60c..f69e0b1137cd 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -55,6 +55,7 @@
enum rv8803_type {
rv_8803,
+ rx_8804,
rx_8900
};
@@ -601,6 +602,7 @@ static int rv8803_probe(struct i2c_client *client,
static const struct i2c_device_id rv8803_id[] = {
{ "rv8803", rv_8803 },
+ { "rv8804", rx_8804 },
{ "rx8803", rv_8803 },
{ "rx8900", rx_8900 },
{ }
@@ -617,6 +619,10 @@ static const __maybe_unused struct of_device_id rv8803_of_match[] = {
.data = (void *)rv_8803
},
{
+ .compatible = "epson,rx8804",
+ .data = (void *)rx_8804
+ },
+ {
.compatible = "epson,rx8900",
.data = (void *)rx_8900
},
diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c
new file mode 100644
index 000000000000..e8e2ab1103fc
--- /dev/null
+++ b/drivers/rtc/rtc-sunplus.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * The RTC driver for Sunplus SP7021
+ *
+ * Copyright (C) 2019 Sunplus Technology Inc., All rights reseerved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/rtc.h>
+
+#define RTC_REG_NAME "rtc"
+
+#define RTC_CTRL 0x40
+#define TIMER_FREEZE_MASK_BIT BIT(5 + 16)
+#define TIMER_FREEZE BIT(5)
+#define DIS_SYS_RST_RTC_MASK_BIT BIT(4 + 16)
+#define DIS_SYS_RST_RTC BIT(4)
+#define RTC32K_MODE_RESET_MASK_BIT BIT(3 + 16)
+#define RTC32K_MODE_RESET BIT(3)
+#define ALARM_EN_OVERDUE_MASK_BIT BIT(2 + 16)
+#define ALARM_EN_OVERDUE BIT(2)
+#define ALARM_EN_PMC_MASK_BIT BIT(1 + 16)
+#define ALARM_EN_PMC BIT(1)
+#define ALARM_EN_MASK_BIT BIT(0 + 16)
+#define ALARM_EN BIT(0)
+#define RTC_TIMER_OUT 0x44
+#define RTC_DIVIDER 0x48
+#define RTC_TIMER_SET 0x4c
+#define RTC_ALARM_SET 0x50
+#define RTC_USER_DATA 0x54
+#define RTC_RESET_RECORD 0x58
+#define RTC_BATT_CHARGE_CTRL 0x5c
+#define BAT_CHARGE_RSEL_MASK_BIT GENMASK(3 + 16, 2 + 16)
+#define BAT_CHARGE_RSEL_MASK GENMASK(3, 2)
+#define BAT_CHARGE_RSEL_2K_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 0)
+#define BAT_CHARGE_RSEL_250_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 1)
+#define BAT_CHARGE_RSEL_50_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 2)
+#define BAT_CHARGE_RSEL_0_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 3)
+#define BAT_CHARGE_DSEL_MASK_BIT BIT(1 + 16)
+#define BAT_CHARGE_DSEL_MASK GENMASK(1, 1)
+#define BAT_CHARGE_DSEL_ON FIELD_PREP(BAT_CHARGE_DSEL_MASK, 0)
+#define BAT_CHARGE_DSEL_OFF FIELD_PREP(BAT_CHARGE_DSEL_MASK, 1)
+#define BAT_CHARGE_EN_MASK_BIT BIT(0 + 16)
+#define BAT_CHARGE_EN BIT(0)
+#define RTC_TRIM_CTRL 0x60
+
+struct sunplus_rtc {
+ struct rtc_device *rtc;
+ struct resource *res;
+ struct clk *rtcclk;
+ struct reset_control *rstc;
+ void __iomem *reg_base;
+ int irq;
+};
+
+static void sp_get_seconds(struct device *dev, unsigned long *secs)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ *secs = (unsigned long)readl(sp_rtc->reg_base + RTC_TIMER_OUT);
+}
+
+static void sp_set_seconds(struct device *dev, unsigned long secs)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ writel((u32)secs, sp_rtc->reg_base + RTC_TIMER_SET);
+}
+
+static int sp_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ unsigned long secs;
+
+ sp_get_seconds(dev, &secs);
+ rtc_time64_to_tm(secs, tm);
+
+ return 0;
+}
+
+static int sp_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+ unsigned long secs;
+
+ secs = rtc_tm_to_time64(tm);
+ dev_dbg(dev, "%s, secs = %lu\n", __func__, secs);
+ sp_set_seconds(dev, secs);
+
+ return 0;
+}
+
+static int sp_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+ unsigned long alarm_time;
+
+ alarm_time = rtc_tm_to_time64(&alrm->time);
+ dev_dbg(dev, "%s, alarm_time: %u\n", __func__, (u32)(alarm_time));
+ writel((u32)alarm_time, sp_rtc->reg_base + RTC_ALARM_SET);
+
+ return 0;
+}
+
+static int sp_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+ unsigned int alarm_time;
+
+ alarm_time = readl(sp_rtc->reg_base + RTC_ALARM_SET);
+ dev_dbg(dev, "%s, alarm_time: %u\n", __func__, alarm_time);
+
+ if (alarm_time == 0)
+ alrm->enabled = 0;
+ else
+ alrm->enabled = 1;
+
+ rtc_time64_to_tm((unsigned long)(alarm_time), &alrm->time);
+
+ return 0;
+}
+
+static int sp_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ if (enabled)
+ writel((TIMER_FREEZE_MASK_BIT | DIS_SYS_RST_RTC_MASK_BIT |
+ RTC32K_MODE_RESET_MASK_BIT | ALARM_EN_OVERDUE_MASK_BIT |
+ ALARM_EN_PMC_MASK_BIT | ALARM_EN_MASK_BIT) |
+ (DIS_SYS_RST_RTC | ALARM_EN_OVERDUE | ALARM_EN_PMC | ALARM_EN),
+ sp_rtc->reg_base + RTC_CTRL);
+ else
+ writel((ALARM_EN_OVERDUE_MASK_BIT | ALARM_EN_PMC_MASK_BIT | ALARM_EN_MASK_BIT) |
+ 0x0, sp_rtc->reg_base + RTC_CTRL);
+
+ return 0;
+}
+
+static const struct rtc_class_ops sp_rtc_ops = {
+ .read_time = sp_rtc_read_time,
+ .set_time = sp_rtc_set_time,
+ .set_alarm = sp_rtc_set_alarm,
+ .read_alarm = sp_rtc_read_alarm,
+ .alarm_irq_enable = sp_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t sp_rtc_irq_handler(int irq, void *dev_id)
+{
+ struct platform_device *plat_dev = dev_id;
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
+
+ rtc_update_irq(sp_rtc->rtc, 1, RTC_IRQF | RTC_AF);
+ dev_dbg(&plat_dev->dev, "[RTC] ALARM INT\n");
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * -------------------------------------------------------------------------------------
+ * bat_charge_rsel bat_charge_dsel bat_charge_en Remarks
+ * x x 0 Disable
+ * 0 0 1 0.86mA (2K Ohm with diode)
+ * 1 0 1 1.81mA (250 Ohm with diode)
+ * 2 0 1 2.07mA (50 Ohm with diode)
+ * 3 0 1 16.0mA (0 Ohm with diode)
+ * 0 1 1 1.36mA (2K Ohm without diode)
+ * 1 1 1 3.99mA (250 Ohm without diode)
+ * 2 1 1 4.41mA (50 Ohm without diode)
+ * 3 1 1 16.0mA (0 Ohm without diode)
+ * -------------------------------------------------------------------------------------
+ */
+static void sp_rtc_set_trickle_charger(struct device dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(&dev);
+ u32 ohms, rsel;
+ u32 chargeable;
+
+ if (of_property_read_u32(dev.of_node, "trickle-resistor-ohms", &ohms) ||
+ of_property_read_u32(dev.of_node, "aux-voltage-chargeable", &chargeable)) {
+ dev_warn(&dev, "battery charger disabled\n");
+ return;
+ }
+
+ switch (ohms) {
+ case 2000:
+ rsel = BAT_CHARGE_RSEL_2K_OHM;
+ break;
+ case 250:
+ rsel = BAT_CHARGE_RSEL_250_OHM;
+ break;
+ case 50:
+ rsel = BAT_CHARGE_RSEL_50_OHM;
+ break;
+ case 0:
+ rsel = BAT_CHARGE_RSEL_0_OHM;
+ break;
+ default:
+ dev_err(&dev, "invalid charger resistor value (%d)\n", ohms);
+ return;
+ }
+
+ writel(BAT_CHARGE_RSEL_MASK_BIT | rsel, sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+
+ switch (chargeable) {
+ case 0:
+ writel(BAT_CHARGE_DSEL_MASK_BIT | BAT_CHARGE_DSEL_OFF,
+ sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+ break;
+ case 1:
+ writel(BAT_CHARGE_DSEL_MASK_BIT | BAT_CHARGE_DSEL_ON,
+ sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+ break;
+ default:
+ dev_err(&dev, "invalid aux-voltage-chargeable value (%d)\n", chargeable);
+ return;
+ }
+
+ writel(BAT_CHARGE_EN_MASK_BIT | BAT_CHARGE_EN, sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+}
+
+static int sp_rtc_probe(struct platform_device *plat_dev)
+{
+ struct sunplus_rtc *sp_rtc;
+ int ret;
+
+ sp_rtc = devm_kzalloc(&plat_dev->dev, sizeof(*sp_rtc), GFP_KERNEL);
+ if (!sp_rtc)
+ return -ENOMEM;
+
+ sp_rtc->res = platform_get_resource_byname(plat_dev, IORESOURCE_MEM, RTC_REG_NAME);
+ sp_rtc->reg_base = devm_ioremap_resource(&plat_dev->dev, sp_rtc->res);
+ if (IS_ERR(sp_rtc->reg_base))
+ return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->reg_base),
+ "%s devm_ioremap_resource fail\n", RTC_REG_NAME);
+ dev_dbg(&plat_dev->dev, "res = 0x%x, reg_base = 0x%lx\n",
+ sp_rtc->res->start, (unsigned long)sp_rtc->reg_base);
+
+ sp_rtc->irq = platform_get_irq(plat_dev, 0);
+ if (sp_rtc->irq < 0)
+ return dev_err_probe(&plat_dev->dev, sp_rtc->irq, "platform_get_irq failed\n");
+
+ ret = devm_request_irq(&plat_dev->dev, sp_rtc->irq, sp_rtc_irq_handler,
+ IRQF_TRIGGER_RISING, "rtc irq", plat_dev);
+ if (ret)
+ return dev_err_probe(&plat_dev->dev, ret, "devm_request_irq failed:\n");
+
+ sp_rtc->rtcclk = devm_clk_get(&plat_dev->dev, NULL);
+ if (IS_ERR(sp_rtc->rtcclk))
+ return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->rtcclk),
+ "devm_clk_get fail\n");
+
+ sp_rtc->rstc = devm_reset_control_get_exclusive(&plat_dev->dev, NULL);
+ if (IS_ERR(sp_rtc->rstc))
+ return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->rstc),
+ "failed to retrieve reset controller\n");
+
+ ret = clk_prepare_enable(sp_rtc->rtcclk);
+ if (ret)
+ goto free_clk;
+
+ ret = reset_control_deassert(sp_rtc->rstc);
+ if (ret)
+ goto free_reset_assert;
+
+ device_init_wakeup(&plat_dev->dev, 1);
+ dev_set_drvdata(&plat_dev->dev, sp_rtc);
+
+ sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev);
+ if (IS_ERR(sp_rtc->rtc)) {
+ ret = PTR_ERR(sp_rtc->rtc);
+ goto free_reset_assert;
+ }
+
+ sp_rtc->rtc->range_max = U32_MAX;
+ sp_rtc->rtc->range_min = 0;
+ sp_rtc->rtc->ops = &sp_rtc_ops;
+
+ ret = devm_rtc_register_device(sp_rtc->rtc);
+ if (ret)
+ goto free_reset_assert;
+
+ /* Setup trickle charger */
+ if (plat_dev->dev.of_node)
+ sp_rtc_set_trickle_charger(plat_dev->dev);
+
+ /* Keep RTC from system reset */
+ writel(DIS_SYS_RST_RTC_MASK_BIT | DIS_SYS_RST_RTC, sp_rtc->reg_base + RTC_CTRL);
+
+ return 0;
+
+free_reset_assert:
+ reset_control_assert(sp_rtc->rstc);
+free_clk:
+ clk_disable_unprepare(sp_rtc->rtcclk);
+
+ return ret;
+}
+
+static int sp_rtc_remove(struct platform_device *plat_dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
+
+ device_init_wakeup(&plat_dev->dev, 0);
+ reset_control_assert(sp_rtc->rstc);
+ clk_disable_unprepare(sp_rtc->rtcclk);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sp_rtc_suspend(struct device *dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ if (device_may_wakeup(dev))
+ enable_irq_wake(sp_rtc->irq);
+
+ return 0;
+}
+
+static int sp_rtc_resume(struct device *dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ if (device_may_wakeup(dev))
+ disable_irq_wake(sp_rtc->irq);
+
+ return 0;
+}
+#endif
+
+static const struct of_device_id sp_rtc_of_match[] = {
+ { .compatible = "sunplus,sp7021-rtc" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sp_rtc_of_match);
+
+static SIMPLE_DEV_PM_OPS(sp_rtc_pm_ops, sp_rtc_suspend, sp_rtc_resume);
+
+static struct platform_driver sp_rtc_driver = {
+ .probe = sp_rtc_probe,
+ .remove = sp_rtc_remove,
+ .driver = {
+ .name = "sp7021-rtc",
+ .of_match_table = sp_rtc_of_match,
+ .pm = &sp_rtc_pm_ops,
+ },
+};
+module_platform_driver(sp_rtc_driver);
+
+MODULE_AUTHOR("Vincent Shih <vincent.sunplus@gmail.com>");
+MODULE_DESCRIPTION("Sunplus RTC driver");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index d24cafe02708..511bf8e0a436 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data)
goto out;
}
+ /* re-init to undo drop from zfcp_fc_adisc() */
+ port->d_id = ntoh24(adisc_resp->adisc_port_id);
/* port is good, unblock rport without going through erp */
zfcp_scsi_schedule_rport_register(port);
out:
@@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port)
struct zfcp_fc_req *fc_req;
struct zfcp_adapter *adapter = port->adapter;
struct Scsi_Host *shost = adapter->scsi_host;
+ u32 d_id;
int ret;
fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC);
@@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port)
fc_req->u.adisc.req.adisc_cmd = ELS_ADISC;
hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost));
- ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els,
+ d_id = port->d_id; /* remember as destination for send els below */
+ /*
+ * Force fresh GID_PN lookup on next port recovery.
+ * Must happen after request setup and before sending request,
+ * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler().
+ */
+ port->d_id = 0;
+
+ ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els,
ZFCP_FC_CTELS_TMO);
if (ret)
kmem_cache_free(zfcp_fc_req_cache, fc_req);
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index b9482da79512..3ebe66151dcb 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -1567,8 +1567,6 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
pci_try_set_mwi(pdev);
retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (retval)
- retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (retval) {
TW_PRINTK(host, TW_DRIVER, 0x18, "Failed to set dma mask");
retval = -ENODEV;
@@ -1786,8 +1784,6 @@ static int __maybe_unused twl_resume(struct device *dev)
pci_try_set_mwi(pdev);
retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (retval)
- retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (retval) {
TW_PRINTK(host, TW_DRIVER, 0x25, "Failed to set dma mask during resume");
retval = -ENODEV;
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 3ad3ebaca8e9..ad4972c0fc53 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1507,7 +1507,6 @@ NCR_700_intr(int irq, void *dev_id)
struct scsi_cmnd *SCp = hostdata->cmd;
handled = 1;
- SCp = hostdata->cmd;
if(istat & SCSI_INT_PENDING) {
udelay(10);
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 59f6b7b2a70a..b04d039da276 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -271,7 +271,7 @@ MODULE_PARM_DESC(msi, "IRQ handling."
" 0=PIC(default), 1=MSI, 2=MSI-X)");
module_param(startup_timeout, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(startup_timeout, "The duration of time in seconds to wait for"
- " adapter to have it's kernel up and\n"
+ " adapter to have its kernel up and\n"
"running. This is typically adjusted for large systems that do not"
" have a BIOS.");
module_param(aif_timeout, int, S_IRUGO|S_IWUSR);
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 5d566d2b2997..928099163f0f 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -755,11 +755,7 @@ ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
static int
ahd_linux_abort(struct scsi_cmnd *cmd)
{
- int error;
-
- error = ahd_linux_queue_abort_cmd(cmd);
-
- return error;
+ return ahd_linux_queue_abort_cmd(cmd);
}
/*
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 440ef32be048..e5aa982ffedc 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -732,9 +732,6 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad)
pci_set_master(pdev);
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (rc)
- rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-
if (rc) {
rc = -ENODEV;
printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev);
@@ -1560,9 +1557,6 @@ bfad_pci_slot_reset(struct pci_dev *pdev)
rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64));
if (rc)
- rc = dma_set_mask_and_coherent(&bfad->pcidev->dev,
- DMA_BIT_MASK(32));
- if (rc)
goto out_disable_device;
if (restart_bfa(bfad) == -1)
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 71fa62bd3083..9be273c320e2 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba);
static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba);
static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
struct device *parent, int npiv);
-static void bnx2fc_destroy_work(struct work_struct *work);
+static void bnx2fc_port_destroy(struct fcoe_port *port);
static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev);
static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device
@@ -907,9 +907,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event,
__bnx2fc_destroy(interface);
}
mutex_unlock(&bnx2fc_dev_lock);
-
- /* Ensure ALL destroy work has been completed before return */
- flush_workqueue(bnx2fc_wq);
return;
default:
@@ -1215,8 +1212,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport)
mutex_unlock(&n_port->lp_mutex);
bnx2fc_free_vport(interface->hba, port->lport);
bnx2fc_port_shutdown(port->lport);
+ bnx2fc_port_destroy(port);
bnx2fc_interface_put(interface);
- queue_work(bnx2fc_wq, &port->destroy_work);
return 0;
}
@@ -1525,7 +1522,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
port->lport = lport;
port->priv = interface;
port->get_netdev = bnx2fc_netdev;
- INIT_WORK(&port->destroy_work, bnx2fc_destroy_work);
/* Configure fcoe_port */
rc = bnx2fc_lport_config(lport);
@@ -1653,8 +1649,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface)
bnx2fc_interface_cleanup(interface);
bnx2fc_stop(interface);
list_del(&interface->list);
+ bnx2fc_port_destroy(port);
bnx2fc_interface_put(interface);
- queue_work(bnx2fc_wq, &port->destroy_work);
}
/**
@@ -1694,15 +1690,12 @@ netdev_err:
return rc;
}
-static void bnx2fc_destroy_work(struct work_struct *work)
+static void bnx2fc_port_destroy(struct fcoe_port *port)
{
- struct fcoe_port *port;
struct fc_lport *lport;
- port = container_of(work, struct fcoe_port, destroy_work);
lport = port->lport;
-
- BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n");
+ BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport);
bnx2fc_if_destroy(lport);
}
@@ -2556,9 +2549,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev)
__bnx2fc_destroy(interface);
mutex_unlock(&bnx2fc_dev_lock);
- /* Ensure ALL destroy work has been completed before return */
- flush_workqueue(bnx2fc_wq);
-
bnx2fc_ulp_stop(hba);
/* unregister cnic device */
if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic))
diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c
index ae62fc3c9ee3..b08fc8839808 100644
--- a/drivers/scsi/elx/efct/efct_driver.c
+++ b/drivers/scsi/elx/efct/efct_driver.c
@@ -541,13 +541,10 @@ efct_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, efct);
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) {
- dev_warn(&pdev->dev, "trying DMA_BIT_MASK(32)\n");
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
- dev_err(&pdev->dev, "setting DMA_BIT_MASK failed\n");
- rc = -1;
- goto dma_mask_out;
- }
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc) {
+ dev_err(&pdev->dev, "setting DMA_BIT_MASK failed\n");
+ goto dma_mask_out;
}
num_interrupts = efct_device_interrupts_required(efct);
diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c
index 7bb4f9aad2c8..84bc81d7ce76 100644
--- a/drivers/scsi/elx/libefc/efc_els.c
+++ b/drivers/scsi/elx/libefc/efc_els.c
@@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
efc = node->efc;
- spin_lock_irqsave(&node->els_ios_lock, flags);
-
if (!node->els_io_enabled) {
efc_log_err(efc, "els io alloc disabled\n");
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return NULL;
}
els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC);
if (!els) {
atomic_add_return(1, &efc->els_io_alloc_failed_count);
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return NULL;
}
@@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
&els->io.req.phys, GFP_KERNEL);
if (!els->io.req.virt) {
mempool_free(els, efc->els_io_pool);
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return NULL;
}
@@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
/* add els structure to ELS IO list */
INIT_LIST_HEAD(&els->list_entry);
+ spin_lock_irqsave(&node->els_ios_lock, flags);
list_add_tail(&els->list_entry, &node->els_ios_list);
+ spin_unlock_irqrestore(&node->els_ios_lock, flags);
}
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return els;
}
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index f46f679fe825..2f53a2ee024a 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1525,16 +1525,11 @@ static void hisi_sas_send_ata_reset_each_phy(struct hisi_hba *hisi_hba,
struct device *dev = hisi_hba->dev;
int s = sizeof(struct host_to_dev_fis);
int rc = TMF_RESP_FUNC_FAILED;
- struct asd_sas_phy *sas_phy;
struct ata_link *link;
u8 fis[20] = {0};
- u32 state;
int i;
- state = hisi_hba->hw->get_phys_state(hisi_hba);
for (i = 0; i < hisi_hba->n_phy; i++) {
- if (!(state & BIT(sas_phy->id)))
- continue;
if (!(sas_port->phy_mask & BIT(i)))
continue;
@@ -2671,9 +2666,6 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev,
goto err_out;
error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
- if (error)
- error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
if (error) {
dev_err(dev, "No usable DMA addressing method\n");
goto err_out;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index a45ef9a5e12e..a01a3a7b706b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -4695,8 +4695,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_out;
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (rc)
- rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (rc) {
dev_err(dev, "No usable DMA addressing method\n");
rc = -ENODEV;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index cd26c0f8c281..1bc0db572d9e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -17982,8 +17982,8 @@ lpfc_sli4_alloc_xri(struct lpfc_hba *phba)
* the driver starts at 0 each time.
*/
spin_lock_irq(&phba->hbalock);
- xri = find_next_zero_bit(phba->sli4_hba.xri_bmask,
- phba->sli4_hba.max_cfg_param.max_xri, 0);
+ xri = find_first_zero_bit(phba->sli4_hba.xri_bmask,
+ phba->sli4_hba.max_cfg_param.max_xri);
if (xri >= phba->sli4_hba.max_cfg_param.max_xri) {
spin_unlock_irq(&phba->hbalock);
return NO_XRI;
@@ -19660,7 +19660,7 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba)
max_rpi = phba->sli4_hba.max_cfg_param.max_rpi;
rpi_limit = phba->sli4_hba.next_rpi;
- rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, 0);
+ rpi = find_first_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit);
if (rpi >= rpi_limit)
rpi = LPFC_RPI_ALLOC_ERROR;
else {
@@ -20303,8 +20303,8 @@ next_priority:
* have been tested so that we can detect when we should
* change the priority level.
*/
- next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask,
- LPFC_SLI4_FCF_TBL_INDX_MAX, 0);
+ next_fcf_index = find_first_bit(phba->fcf.fcf_rr_bmask,
+ LPFC_SLI4_FCF_TBL_INDX_MAX);
}
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 0d31d7a5e335..bf987f3a7f3f 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -192,23 +192,21 @@ mega_query_adapter(adapter_t *adapter)
{
dma_addr_t prod_info_dma_handle;
mega_inquiry3 *inquiry3;
- u8 raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ u8 *raw_mbox = (u8 *)&mbox;
int retval;
/* Initialize adapter inquiry mailbox */
- mbox = (mbox_t *)raw_mbox;
-
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* Try to issue Inquiry3 command
* if not succeeded, then issue MEGA_MBOXCMD_ADAPTERINQ command and
* update enquiry3 structure
*/
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
inquiry3 = (mega_inquiry3 *)adapter->mega_buffer;
@@ -232,10 +230,10 @@ mega_query_adapter(adapter_t *adapter)
inq = &ext_inq->raid_inq;
- mbox->m_out.xferaddr = (u32)dma_handle;
+ mbox.xferaddr = (u32)dma_handle;
/*issue old 0x04 command to adapter */
- mbox->m_out.cmd = MEGA_MBOXCMD_ADPEXTINQ;
+ mbox.cmd = MEGA_MBOXCMD_ADPEXTINQ;
issue_scb_block(adapter, raw_mbox);
@@ -262,7 +260,7 @@ mega_query_adapter(adapter_t *adapter)
sizeof(mega_product_info),
DMA_FROM_DEVICE);
- mbox->m_out.xferaddr = prod_info_dma_handle;
+ mbox.xferaddr = prod_info_dma_handle;
raw_mbox[0] = FC_NEW_CONFIG; /* i.e. mbox->cmd=0xA1 */
raw_mbox[2] = NC_SUBOP_PRODUCT_INFO; /* i.e. 0x0E */
@@ -3569,16 +3567,14 @@ mega_n_to_m(void __user *arg, megacmd_t *mc)
static int
mega_is_bios_enabled(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
-
- mbox = (mbox_t *)raw_mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
raw_mbox[0] = IS_BIOS_ENABLED;
raw_mbox[2] = GET_BIOS;
@@ -3600,13 +3596,11 @@ mega_is_bios_enabled(adapter_t *adapter)
static void
mega_enum_raid_scsi(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
int i;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* issue command to find out what channels are raid/scsi
@@ -3616,7 +3610,7 @@ mega_enum_raid_scsi(adapter_t *adapter)
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
/*
* Non-ROMB firmware fail this command, so all channels
@@ -3655,23 +3649,21 @@ static void
mega_get_boot_drv(adapter_t *adapter)
{
struct private_bios_data *prv_bios_data;
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
u16 cksum = 0;
u8 *cksum_p;
u8 boot_pdrv;
int i;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
raw_mbox[0] = BIOS_PVT_DATA;
raw_mbox[2] = GET_BIOS_PVT_DATA;
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
adapter->boot_ldrv_enabled = 0;
adapter->boot_ldrv = 0;
@@ -3721,13 +3713,11 @@ mega_get_boot_drv(adapter_t *adapter)
static int
mega_support_random_del(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
int rval;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* issue command
@@ -3750,13 +3740,11 @@ mega_support_random_del(adapter_t *adapter)
static int
mega_support_ext_cdb(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
int rval;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* issue command to find out if controller supports extended CDBs.
*/
@@ -3865,16 +3853,14 @@ mega_do_del_logdrv(adapter_t *adapter, int logdrv)
static void
mega_get_max_sgl(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
- mbox = (mbox_t *)raw_mbox;
-
- memset(mbox, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
raw_mbox[0] = MAIN_MISC_OPCODE;
raw_mbox[2] = GET_MAX_SG_SUPPORT;
@@ -3888,7 +3874,7 @@ mega_get_max_sgl(adapter_t *adapter)
}
else {
adapter->sglen = *((char *)adapter->mega_buffer);
-
+
/*
* Make sure this is not more than the resources we are
* planning to allocate
@@ -3910,16 +3896,14 @@ mega_get_max_sgl(adapter_t *adapter)
static int
mega_support_cluster(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
-
- mbox = (mbox_t *)raw_mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
- memset(mbox, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
/*
* Try to get the initiator id. This command will succeed iff the
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index c39dd4978c9d..15bdc21ead66 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -901,7 +901,7 @@ static const struct {
},
{ MPI3MR_RESET_FROM_SYSFS, "sysfs invocation" },
{ MPI3MR_RESET_FROM_SYSFS_TIMEOUT, "sysfs TM timeout" },
- { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronus reset" },
+ { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronous reset" },
};
/**
@@ -1242,7 +1242,7 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
ioc_state = mpi3mr_get_iocstate(mrioc);
if (ioc_state == MRIOC_STATE_READY) {
ioc_info(mrioc,
- "successfully transistioned to %s state\n",
+ "successfully transitioned to %s state\n",
mpi3mr_iocstate_name(ioc_state));
return 0;
}
@@ -2174,7 +2174,7 @@ out:
* mpi3mr_check_rh_fault_ioc - check reset history and fault
* controller
* @mrioc: Adapter instance reference
- * @reason_code, reason code for the fault.
+ * @reason_code: reason code for the fault.
*
* This routine will save snapdump and fault the controller with
* the given reason code if it is not already in the fault or
@@ -3633,7 +3633,6 @@ static int mpi3mr_enable_events(struct mpi3mr_ioc *mrioc)
/**
* mpi3mr_init_ioc - Initialize the controller
* @mrioc: Adapter instance reference
- * @init_type: Flag to indicate is the init_type
*
* This the controller initialization routine, executed either
* after soft reset or from pci probe callback.
@@ -3844,7 +3843,7 @@ retry_init:
if (mrioc->shost->nr_hw_queues > mrioc->num_op_reply_q) {
ioc_err(mrioc,
- "cannot create minimum number of operatioanl queues expected:%d created:%d\n",
+ "cannot create minimum number of operational queues expected:%d created:%d\n",
mrioc->shost->nr_hw_queues, mrioc->num_op_reply_q);
goto out_failed_noretry;
}
@@ -4174,7 +4173,7 @@ static void mpi3mr_issue_ioc_shutdown(struct mpi3mr_ioc *mrioc)
/**
* mpi3mr_cleanup_ioc - Cleanup controller
* @mrioc: Adapter instance reference
-
+ *
* controller cleanup handler, Message unit reset or soft reset
* and shutdown notification is issued to the controller.
*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index a0af986633d2..949e98d523e2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -77,8 +77,8 @@
#define MPT3SAS_DRIVER_NAME "mpt3sas"
#define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
#define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION "39.100.00.00"
-#define MPT3SAS_MAJOR_VERSION 39
+#define MPT3SAS_DRIVER_VERSION "40.100.00.00"
+#define MPT3SAS_MAJOR_VERSION 40
#define MPT3SAS_MINOR_VERSION 100
#define MPT3SAS_BUILD_VERSION 0
#define MPT3SAS_RELEASE_VERSION 00
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 05b6c6a073c3..d92ca140d298 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -3533,11 +3533,31 @@ diag_trigger_master_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_MASTER_TRIGGER_T *master_tg;
unsigned long flags;
ssize_t rc;
+ bool set = 1;
- spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
rc = min(sizeof(struct SL_WH_MASTER_TRIGGER_T), count);
+
+ if (ioc->supports_trigger_pages) {
+ master_tg = kzalloc(sizeof(struct SL_WH_MASTER_TRIGGER_T),
+ GFP_KERNEL);
+ if (!master_tg)
+ return -ENOMEM;
+
+ memcpy(master_tg, buf, rc);
+ if (!master_tg->MasterData)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg1(ioc, master_tg,
+ set)) {
+ kfree(master_tg);
+ return -EFAULT;
+ }
+ kfree(master_tg);
+ }
+
+ spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
memset(&ioc->diag_trigger_master, 0,
sizeof(struct SL_WH_MASTER_TRIGGER_T));
memcpy(&ioc->diag_trigger_master, buf, rc);
@@ -3589,11 +3609,31 @@ diag_trigger_event_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_EVENT_TRIGGERS_T *event_tg;
unsigned long flags;
ssize_t sz;
+ bool set = 1;
- spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
sz = min(sizeof(struct SL_WH_EVENT_TRIGGERS_T), count);
+ if (ioc->supports_trigger_pages) {
+ event_tg = kzalloc(sizeof(struct SL_WH_EVENT_TRIGGERS_T),
+ GFP_KERNEL);
+ if (!event_tg)
+ return -ENOMEM;
+
+ memcpy(event_tg, buf, sz);
+ if (!event_tg->ValidEntries)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg2(ioc, event_tg,
+ set)) {
+ kfree(event_tg);
+ return -EFAULT;
+ }
+ kfree(event_tg);
+ }
+
+ spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+
memset(&ioc->diag_trigger_event, 0,
sizeof(struct SL_WH_EVENT_TRIGGERS_T));
memcpy(&ioc->diag_trigger_event, buf, sz);
@@ -3644,11 +3684,31 @@ diag_trigger_scsi_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_SCSI_TRIGGERS_T *scsi_tg;
unsigned long flags;
ssize_t sz;
+ bool set = 1;
+
+ sz = min(sizeof(struct SL_WH_SCSI_TRIGGERS_T), count);
+ if (ioc->supports_trigger_pages) {
+ scsi_tg = kzalloc(sizeof(struct SL_WH_SCSI_TRIGGERS_T),
+ GFP_KERNEL);
+ if (!scsi_tg)
+ return -ENOMEM;
+
+ memcpy(scsi_tg, buf, sz);
+ if (!scsi_tg->ValidEntries)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg3(ioc, scsi_tg,
+ set)) {
+ kfree(scsi_tg);
+ return -EFAULT;
+ }
+ kfree(scsi_tg);
+ }
spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
- sz = min(sizeof(ioc->diag_trigger_scsi), count);
+
memset(&ioc->diag_trigger_scsi, 0, sizeof(ioc->diag_trigger_scsi));
memcpy(&ioc->diag_trigger_scsi, buf, sz);
if (ioc->diag_trigger_scsi.ValidEntries > NUM_VALID_ENTRIES)
@@ -3698,11 +3758,30 @@ diag_trigger_mpi_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_MPI_TRIGGERS_T *mpi_tg;
unsigned long flags;
ssize_t sz;
+ bool set = 1;
- spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count);
+ if (ioc->supports_trigger_pages) {
+ mpi_tg = kzalloc(sizeof(struct SL_WH_MPI_TRIGGERS_T),
+ GFP_KERNEL);
+ if (!mpi_tg)
+ return -ENOMEM;
+
+ memcpy(mpi_tg, buf, sz);
+ if (!mpi_tg->ValidEntries)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg4(ioc, mpi_tg,
+ set)) {
+ kfree(mpi_tg);
+ return -EFAULT;
+ }
+ kfree(mpi_tg);
+ }
+
+ spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
memset(&ioc->diag_trigger_mpi, 0,
sizeof(ioc->diag_trigger_mpi));
memcpy(&ioc->diag_trigger_mpi, buf, sz);
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index 253ceca54a84..7eb8c39da366 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -2267,7 +2267,8 @@ static void myrs_cleanup(struct myrs_hba *cs)
myrs_unmap(cs);
if (cs->mmio_base) {
- cs->disable_intr(cs);
+ if (cs->disable_intr)
+ cs->disable_intr(cs);
iounmap(cs->mmio_base);
cs->mmio_base = NULL;
}
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 8b9e889bc306..92c818a8a84a 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1557,6 +1557,9 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
data->MmioAddress = (unsigned long)
ioremap(p_dev->resource[2]->start,
resource_size(p_dev->resource[2]));
+ if (!data->MmioAddress)
+ goto next_entry;
+
data->MmioLength = resource_size(p_dev->resource[2]);
}
/* If we got this far, we're cool! */
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index c9a16eef38c1..160ee8b228c9 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -1199,7 +1199,7 @@ int pm8001_abort_task(struct sas_task *task)
struct pm8001_device *pm8001_dev;
struct pm8001_tmf_task tmf_task;
int rc = TMF_RESP_FUNC_FAILED, ret;
- u32 phy_id;
+ u32 phy_id, port_id;
struct sas_task_slow slow_task;
if (unlikely(!task || !task->lldd_task || !task->dev))
@@ -1246,6 +1246,7 @@ int pm8001_abort_task(struct sas_task *task)
DECLARE_COMPLETION_ONSTACK(completion_reset);
DECLARE_COMPLETION_ONSTACK(completion);
struct pm8001_phy *phy = pm8001_ha->phy + phy_id;
+ port_id = phy->port->port_id;
/* 1. Set Device state as Recovery */
pm8001_dev->setds_completion = &completion;
@@ -1297,6 +1298,10 @@ int pm8001_abort_task(struct sas_task *task)
PORT_RESET_TMO);
if (phy->port_reset_status == PORT_RESET_TMO) {
pm8001_dev_gone_notify(dev);
+ PM8001_CHIP_DISP->hw_event_ack_req(
+ pm8001_ha, 0,
+ 0x07, /*HW_EVENT_PHY_DOWN ack*/
+ port_id, phy_id, 0, 0);
goto out;
}
}
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index 83eec16d021d..a17da1cebce1 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -216,6 +216,9 @@ struct pm8001_dispatch {
u32 state);
int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
+ void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha,
+ u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0,
+ u32 param1);
};
struct pm8001_chip_info {
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index ad3c6da12715..2530d1365556 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3712,8 +3712,10 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
break;
case HW_EVENT_PORT_RESET_TIMER_TMO:
pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n");
- pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
- port_id, phy_id, 0, 0);
+ if (!pm8001_ha->phy[phy_id].reset_completion) {
+ pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
+ port_id, phy_id, 0, 0);
+ }
sas_phy_disconnected(sas_phy);
phy->phy_attached = 0;
sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR,
@@ -4149,10 +4151,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
u32 ret = MPI_IO_STATUS_FAIL;
u32 regval;
+ /*
+ * Fatal errors are programmed to be signalled in irq vector
+ * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl.
+ * fatal_err_interrupt
+ */
if (vec == (pm8001_ha->max_q_num - 1)) {
+ u32 mipsall_ready;
+
+ if (pm8001_ha->chip_id == chip_8008 ||
+ pm8001_ha->chip_id == chip_8009)
+ mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT;
+ else
+ mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT;
+
regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
- if ((regval & SCRATCH_PAD_MIPSALL_READY) !=
- SCRATCH_PAD_MIPSALL_READY) {
+ if ((regval & mipsall_ready) != mipsall_ready) {
pm8001_ha->controller_fatal_error = true;
pm8001_dbg(pm8001_ha, FAIL,
"Firmware Fatal error! Regval:0x%x\n",
@@ -5055,4 +5069,5 @@ const struct pm8001_dispatch pm8001_80xx_dispatch = {
.fw_flash_update_req = pm8001_chip_fw_flash_update_req,
.set_dev_state_req = pm8001_chip_set_dev_state_req,
.fatal_errors = pm80xx_fatal_errors,
+ .hw_event_ack_req = pm80xx_hw_event_ack_req,
};
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
index c7e5d93bea92..c41ed039c92a 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.h
+++ b/drivers/scsi/pm8001/pm80xx_hwi.h
@@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t;
#define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0
#define SCRATCH_PAD_IOP0_READY 0xC00
#define SCRATCH_PAD_IOP1_READY 0x3000
-#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \
+#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \
SCRATCH_PAD_IOP0_READY | \
+ SCRATCH_PAD_ILA_READY | \
+ SCRATCH_PAD_RAAE_READY)
+#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \
+ SCRATCH_PAD_ILA_READY | \
SCRATCH_PAD_RAAE_READY)
/* boot loader state */
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 99a56ca1fb16..fab43dabe5b3 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -2250,6 +2250,7 @@ process_els:
io_req->tm_flags == FCP_TMF_TGT_RESET) {
clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
io_req->sc_cmd = NULL;
+ kref_put(&io_req->refcount, qedf_release_cmd);
complete(&io_req->tm_done);
}
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 1bf7a22d4948..6ad28bc8e948 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -911,7 +911,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport)
struct qed_link_output if_link;
if (lport->vport) {
- QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n");
+ printk_ratelimited("Cannot issue host reset on NPIV port.\n");
return;
}
@@ -1415,6 +1415,8 @@ static void qedf_upload_connection(struct qedf_ctx *qedf,
*/
term_params = dma_alloc_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE,
&term_params_dma, GFP_KERNEL);
+ if (!term_params)
+ return;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Uploading connection "
"port_id=%06x.\n", fcport->rdata->ids.port_id);
@@ -1862,6 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
vport_qedf->cmd_mgr = base_qedf->cmd_mgr;
init_completion(&vport_qedf->flogi_compl);
INIT_LIST_HEAD(&vport_qedf->fcports);
+ INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work);
rc = qedf_vport_libfc_config(vport, vn_port);
if (rc) {
@@ -3978,7 +3981,9 @@ void qedf_stag_change_work(struct work_struct *work)
struct qedf_ctx *qedf =
container_of(work, struct qedf_ctx, stag_work.work);
- QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n");
+ printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.",
+ dev_name(&qedf->pdev->dev), __func__, __LINE__,
+ qedf->dbg_ctx.host_no);
qedf_ctx_soft_reset(qedf->lport);
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 35e381f6d371..0a70aa763a96 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2067,7 +2067,6 @@ void scsi_exit_queue(void)
* @sdev: SCSI device to be queried
* @pf: Page format bit (1 == standard, 0 == vendor specific)
* @sp: Save page bit (0 == don't save, 1 == save)
- * @modepage: mode page being requested
* @buffer: request buffer (may not be smaller than eight bytes)
* @len: length of request buffer.
* @timeout: command timeout
@@ -2080,10 +2079,9 @@ void scsi_exit_queue(void)
* status on error
*
*/
-int
-scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
- unsigned char *buffer, int len, int timeout, int retries,
- struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
+int scsi_mode_select(struct scsi_device *sdev, int pf, int sp,
+ unsigned char *buffer, int len, int timeout, int retries,
+ struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
{
unsigned char cmd[10];
unsigned char *real_buffer;
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index d6982d355739..95aee1ad1383 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -49,7 +49,7 @@ static DEFINE_MUTEX(global_host_template_mutex);
static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct Scsi_Host *shost = PDE_DATA(file_inode(file));
+ struct Scsi_Host *shost = pde_data(file_inode(file));
ssize_t ret = -ENOMEM;
char *page;
@@ -79,7 +79,7 @@ static int proc_scsi_show(struct seq_file *m, void *v)
static int proc_scsi_host_open(struct inode *inode, struct file *file)
{
- return single_open_size(file, proc_scsi_show, PDE_DATA(inode),
+ return single_open_size(file, proc_scsi_show, pde_data(inode),
4 * PAGE_SIZE);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0e73c3f2f381..62eb9921cc94 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -209,7 +209,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
*/
data.device_specific = 0;
- if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT,
+ if (scsi_mode_select(sdp, 1, sp, buffer_data, len, SD_TIMEOUT,
sdkp->max_retries, &data, &sshdr)) {
if (scsi_sense_valid(&sshdr))
sd_print_sense_hdr(sdkp, &sshdr);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index ad12b3261845..6b43e97bd417 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -77,7 +77,7 @@ static int sg_proc_init(void);
#define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ)
-int sg_big_buff = SG_DEF_RESERVED_SIZE;
+static int sg_big_buff = SG_DEF_RESERVED_SIZE;
/* N.B. This variable is readable and writeable via
/proc/scsi/sg/def_reserved_size . Each time sg_open() is called a buffer
of this size (or less if there is not enough memory) will be reserved
@@ -1634,6 +1634,37 @@ MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element "
MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd");
MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))");
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+
+static struct ctl_table sg_sysctls[] = {
+ {
+ .procname = "sg-big-buff",
+ .data = &sg_big_buff,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+
+static struct ctl_table_header *hdr;
+static void register_sg_sysctls(void)
+{
+ if (!hdr)
+ hdr = register_sysctl("kernel", sg_sysctls);
+}
+
+static void unregister_sg_sysctls(void)
+{
+ if (hdr)
+ unregister_sysctl_table(hdr);
+}
+#else
+#define register_sg_sysctls() do { } while (0)
+#define unregister_sg_sysctls() do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
static int __init
init_sg(void)
{
@@ -1666,6 +1697,7 @@ init_sg(void)
return 0;
}
class_destroy(sg_sysfs_class);
+ register_sg_sysctls();
err_out:
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS);
return rc;
@@ -1674,6 +1706,7 @@ err_out:
static void __exit
exit_sg(void)
{
+ unregister_sg_sysctls();
#ifdef CONFIG_SCSI_PROC_FS
remove_proc_subtree("scsi/sg", NULL);
#endif /* CONFIG_SCSI_PROC_FS */
diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index 5393b5c9dd9c..86a938075f30 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -557,7 +557,7 @@ static void ufs_mtk_init_va09_pwr_ctrl(struct ufs_hba *hba)
struct ufs_mtk_host *host = ufshcd_get_variant(hba);
host->reg_va09 = regulator_get(hba->dev, "va09");
- if (!host->reg_va09)
+ if (IS_ERR(host->reg_va09))
dev_info(hba->dev, "failed to get va09");
else
host->caps |= UFS_MTK_CAP_VA09_PWR_CTRL;
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 8b16bbbcb806..87975d1a21c8 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -92,6 +92,11 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba)
clki->min_freq = clkfreq[i];
clki->max_freq = clkfreq[i+1];
clki->name = devm_kstrdup(dev, name, GFP_KERNEL);
+ if (!clki->name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
if (!strcmp(name, "ref_clk"))
clki->keep_link_active = true;
dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz",
@@ -127,6 +132,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
return -ENOMEM;
vreg->name = devm_kstrdup(dev, name, GFP_KERNEL);
+ if (!vreg->name)
+ return -ENOMEM;
snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name);
if (of_property_read_u32(np, prop_name, &vreg->max_uA)) {
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1049e41abd5b..50b12d60dc1b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7815,7 +7815,7 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba)
peer_pa_tactivate_us = peer_pa_tactivate *
gran_to_us_table[peer_granularity - 1];
- if (pa_tactivate_us > peer_pa_tactivate_us) {
+ if (pa_tactivate_us >= peer_pa_tactivate_us) {
u32 new_peer_pa_tactivate;
new_peer_pa_tactivate = pa_tactivate_us /
@@ -8613,7 +8613,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
* @pwr_mode: device power mode to set
*
* Returns 0 if requested power mode is set successfully
- * Returns non-zero if failed to set the requested power mode
+ * Returns < 0 if failed to set the requested power mode
*/
static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
enum ufs_dev_pwr_mode pwr_mode)
@@ -8667,8 +8667,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
sdev_printk(KERN_WARNING, sdp,
"START_STOP failed for power mode: %d, result %x\n",
pwr_mode, ret);
- if (ret > 0 && scsi_sense_valid(&sshdr))
- scsi_print_sense_hdr(sdp, NULL, &sshdr);
+ if (ret > 0) {
+ if (scsi_sense_valid(&sshdr))
+ scsi_print_sense_hdr(sdp, NULL, &sshdr);
+ ret = -EIO;
+ }
}
if (!ret)
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 6a295c88d850..a7ff0e5b5494 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -142,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor)
#define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\
CONTROLLER_FATAL_ERROR |\
SYSTEM_BUS_FATAL_ERROR |\
- CRYPTO_ENGINE_FATAL_ERROR)
+ CRYPTO_ENGINE_FATAL_ERROR |\
+ UIC_LINK_LOST)
/* HCS - Host Controller Status 30h */
#define DEVICE_PRESENT 0x1
diff --git a/drivers/soc/canaan/Kconfig b/drivers/soc/canaan/Kconfig
index 853096b7e84c..2527cf5757ec 100644
--- a/drivers/soc/canaan/Kconfig
+++ b/drivers/soc/canaan/Kconfig
@@ -5,7 +5,6 @@ config SOC_K210_SYSCTL
depends on RISCV && SOC_CANAAN && OF
default SOC_CANAAN
select PM
- select SYSCON
select MFD_SYSCON
help
Canaan Kendryte K210 SoC system controller driver.
diff --git a/drivers/soc/fsl/qbman/bman_portal.c b/drivers/soc/fsl/qbman/bman_portal.c
index acda8a5637c5..4d7b9caee1c4 100644
--- a/drivers/soc/fsl/qbman/bman_portal.c
+++ b/drivers/soc/fsl/qbman/bman_portal.c
@@ -155,7 +155,7 @@ static int bman_portal_probe(struct platform_device *pdev)
}
spin_lock(&bman_lock);
- cpu = cpumask_next_zero(-1, &portal_cpus);
+ cpu = cpumask_first_zero(&portal_cpus);
if (cpu >= nr_cpu_ids) {
__bman_portals_probed = 1;
/* unassigned portal, skip init */
diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c
index 96f74a1dc603..e23b60618c1a 100644
--- a/drivers/soc/fsl/qbman/qman_portal.c
+++ b/drivers/soc/fsl/qbman/qman_portal.c
@@ -248,7 +248,7 @@ static int qman_portal_probe(struct platform_device *pdev)
pcfg->pools = qm_get_pools_sdqcr();
spin_lock(&qman_lock);
- cpu = cpumask_next_zero(-1, &portal_cpus);
+ cpu = cpumask_first_zero(&portal_cpus);
if (cpu >= nr_cpu_ids) {
__qman_portals_probed = 1;
/* unassigned portal, skip init */
diff --git a/drivers/soc/ti/k3-ringacc.c b/drivers/soc/ti/k3-ringacc.c
index 56be39161489..31ab6c657fec 100644
--- a/drivers/soc/ti/k3-ringacc.c
+++ b/drivers/soc/ti/k3-ringacc.c
@@ -358,8 +358,8 @@ struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc,
goto out;
if (flags & K3_RINGACC_RING_USE_PROXY) {
- proxy_id = find_next_zero_bit(ringacc->proxy_inuse,
- ringacc->num_proxies, 0);
+ proxy_id = find_first_zero_bit(ringacc->proxy_inuse,
+ ringacc->num_proxies);
if (proxy_id == ringacc->num_proxies)
goto error;
}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 8075f60fd02c..2d5cf1714ae0 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal(
break;
}
spin_unlock(&tpg->tpg_np_lock);
+
+ if (match)
+ break;
}
spin_unlock(&tiqn->tiqn_tpg_lock);
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 8502b7d8df89..72acb1f61849 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -596,6 +596,7 @@ static const struct acpi_device_id int3400_thermal_match[] = {
{"INT3400", 0},
{"INTC1040", 0},
{"INTC1041", 0},
+ {"INTC10A0", 0},
{}
};
diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
index c3c4c4d34542..07e25321dfe3 100644
--- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
@@ -285,6 +285,7 @@ static const struct acpi_device_id int3403_device_ids[] = {
{"INT3403", 0},
{"INTC1043", 0},
{"INTC1046", 0},
+ {"INTC10A1", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
index 9b2a64ef55d0..49932a68abac 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
@@ -24,6 +24,7 @@
#define PCI_DEVICE_ID_INTEL_HSB_THERMAL 0x0A03
#define PCI_DEVICE_ID_INTEL_ICL_THERMAL 0x8a03
#define PCI_DEVICE_ID_INTEL_JSL_THERMAL 0x4E03
+#define PCI_DEVICE_ID_INTEL_RPL_THERMAL 0xA71D
#define PCI_DEVICE_ID_INTEL_SKL_THERMAL 0x1903
#define PCI_DEVICE_ID_INTEL_TGL_THERMAL 0x9A03
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index b4bcd3fe9eb2..ca40b0967cdd 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -358,6 +358,7 @@ static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
static const struct pci_device_id proc_thermal_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
+ { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
{ },
};
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index ba27b274c967..0b1808e3a912 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -322,6 +322,7 @@ static int addr_cnt;
#define GSM1_ESCAPE_BITS 0x20
#define XON 0x11
#define XOFF 0x13
+#define ISO_IEC_646_MASK 0x7F
static const struct tty_port_operations gsm_port_ops;
@@ -531,7 +532,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
int olen = 0;
while (len--) {
if (*input == GSM1_SOF || *input == GSM1_ESCAPE
- || *input == XON || *input == XOFF) {
+ || (*input & ISO_IEC_646_MASK) == XON
+ || (*input & ISO_IEC_646_MASK) == XOFF) {
*output++ = GSM1_ESCAPE;
*output++ = *input++ ^ GSM1_ESCAPE_BITS;
olen++;
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index a38fd65e39ab..8933ef1f83c0 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1938,7 +1938,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
more = n - (size - tail);
if (eol == N_TTY_BUF_SIZE && more) {
/* scan wrapped without finding set bit */
- eol = find_next_bit(ldata->read_flags, more, 0);
+ eol = find_first_bit(ldata->read_flags, more);
found = eol != more;
} else
found = eol != size;
diff --git a/drivers/tty/rpmsg_tty.c b/drivers/tty/rpmsg_tty.c
index dae2a4e44f38..29db413bbc03 100644
--- a/drivers/tty/rpmsg_tty.c
+++ b/drivers/tty/rpmsg_tty.c
@@ -50,10 +50,17 @@ static int rpmsg_tty_cb(struct rpmsg_device *rpdev, void *data, int len, void *p
static int rpmsg_tty_install(struct tty_driver *driver, struct tty_struct *tty)
{
struct rpmsg_tty_port *cport = idr_find(&tty_idr, tty->index);
+ struct tty_port *port;
tty->driver_data = cport;
- return tty_port_install(&cport->port, driver, tty);
+ port = tty_port_get(&cport->port);
+ return tty_port_install(port, driver, tty);
+}
+
+static void rpmsg_tty_cleanup(struct tty_struct *tty)
+{
+ tty_port_put(tty->port);
}
static int rpmsg_tty_open(struct tty_struct *tty, struct file *filp)
@@ -106,12 +113,19 @@ static unsigned int rpmsg_tty_write_room(struct tty_struct *tty)
return size;
}
+static void rpmsg_tty_hangup(struct tty_struct *tty)
+{
+ tty_port_hangup(tty->port);
+}
+
static const struct tty_operations rpmsg_tty_ops = {
.install = rpmsg_tty_install,
.open = rpmsg_tty_open,
.close = rpmsg_tty_close,
.write = rpmsg_tty_write,
.write_room = rpmsg_tty_write_room,
+ .hangup = rpmsg_tty_hangup,
+ .cleanup = rpmsg_tty_cleanup,
};
static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void)
@@ -137,8 +151,10 @@ static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void)
return cport;
}
-static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport)
+static void rpmsg_tty_destruct_port(struct tty_port *port)
{
+ struct rpmsg_tty_port *cport = container_of(port, struct rpmsg_tty_port, port);
+
mutex_lock(&idr_lock);
idr_remove(&tty_idr, cport->id);
mutex_unlock(&idr_lock);
@@ -146,7 +162,10 @@ static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport)
kfree(cport);
}
-static const struct tty_port_operations rpmsg_tty_port_ops = { };
+static const struct tty_port_operations rpmsg_tty_port_ops = {
+ .destruct = rpmsg_tty_destruct_port,
+};
+
static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
{
@@ -166,7 +185,8 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
cport->id, dev);
if (IS_ERR(tty_dev)) {
ret = dev_err_probe(dev, PTR_ERR(tty_dev), "Failed to register tty port\n");
- goto err_destroy;
+ tty_port_put(&cport->port);
+ return ret;
}
cport->rpdev = rpdev;
@@ -177,12 +197,6 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
rpdev->src, rpdev->dst, cport->id);
return 0;
-
-err_destroy:
- tty_port_destroy(&cport->port);
- rpmsg_tty_release_cport(cport);
-
- return ret;
}
static void rpmsg_tty_remove(struct rpmsg_device *rpdev)
@@ -192,13 +206,11 @@ static void rpmsg_tty_remove(struct rpmsg_device *rpdev)
dev_dbg(&rpdev->dev, "Removing rpmsg tty device %d\n", cport->id);
/* User hang up to release the tty */
- if (tty_port_initialized(&cport->port))
- tty_port_tty_hangup(&cport->port, false);
+ tty_port_tty_hangup(&cport->port, false);
tty_unregister_device(rpmsg_tty_driver, cport->id);
- tty_port_destroy(&cport->port);
- rpmsg_tty_release_cport(cport);
+ tty_port_put(&cport->port);
}
static struct rpmsg_device_id rpmsg_driver_tty_id_table[] = {
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index bce28729dd7b..be8626234627 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
port->mapsize = resource_size(&resource);
/* Check for shifted address mapping */
- if (of_property_read_u32(np, "reg-offset", &prop) == 0)
+ if (of_property_read_u32(np, "reg-offset", &prop) == 0) {
+ if (prop >= port->mapsize) {
+ dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n",
+ prop, &port->mapsize);
+ ret = -EINVAL;
+ goto err_unprepare;
+ }
+
port->mapbase += prop;
+ port->mapsize -= prop;
+ }
port->iotype = UPIO_MEM;
if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index e8b5469e9dfa..e17e97ea86fa 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -4779,8 +4779,30 @@ static const struct pci_device_id serial_pci_tbl[] = {
{ PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400,
PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */
pbn_b2_4_115200 },
+ /* Brainboxes Devices */
/*
- * BrainBoxes UC-260
+ * Brainboxes UC-101
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0BA1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-235/246
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0AA1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_1_115200 },
+ /*
+ * Brainboxes UC-257
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0861,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-260/271/701/756
*/
{ PCI_VENDOR_ID_INTASHIELD, 0x0D21,
PCI_ANY_ID, PCI_ANY_ID,
@@ -4788,7 +4810,81 @@ static const struct pci_device_id serial_pci_tbl[] = {
pbn_b2_4_115200 },
{ PCI_VENDOR_ID_INTASHIELD, 0x0E34,
PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+ PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-268
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0841,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-275/279
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0881,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_8_115200 },
+ /*
+ * Brainboxes UC-302
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x08E1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-310
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x08C1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-313
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x08A3,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-320/324
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0A61,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_1_115200 },
+ /*
+ * Brainboxes UC-346
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0B02,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-357
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0A81,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ { PCI_VENDOR_ID_INTASHIELD, 0x0A83,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-368
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0C41,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-420/431
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0921,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
pbn_b2_4_115200 },
/*
* Perle PCI-RAS cards
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 2abb3de11a48..3b12bfc1ed67 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2056,7 +2056,10 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
serial8250_rpm_put(up);
}
-static void wait_for_lsr(struct uart_8250_port *up, int bits)
+/*
+ * Wait for transmitter & holding register to empty
+ */
+static void wait_for_xmitr(struct uart_8250_port *up, int bits)
{
unsigned int status, tmout = 10000;
@@ -2073,16 +2076,6 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits)
udelay(1);
touch_nmi_watchdog();
}
-}
-
-/*
- * Wait for transmitter & holding register to empty
- */
-static void wait_for_xmitr(struct uart_8250_port *up, int bits)
-{
- unsigned int tmout;
-
- wait_for_lsr(up, bits);
/* Wait up to 1s for flow control if necessary */
if (up->port.flags & UPF_CONS_FLOW) {
@@ -3333,35 +3326,6 @@ static void serial8250_console_restore(struct uart_8250_port *up)
}
/*
- * Print a string to the serial port using the device FIFO
- *
- * It sends fifosize bytes and then waits for the fifo
- * to get empty.
- */
-static void serial8250_console_fifo_write(struct uart_8250_port *up,
- const char *s, unsigned int count)
-{
- int i;
- const char *end = s + count;
- unsigned int fifosize = up->port.fifosize;
- bool cr_sent = false;
-
- while (s != end) {
- wait_for_lsr(up, UART_LSR_THRE);
-
- for (i = 0; i < fifosize && s != end; ++i) {
- if (*s == '\n' && !cr_sent) {
- serial_out(up, UART_TX, '\r');
- cr_sent = true;
- } else {
- serial_out(up, UART_TX, *s++);
- cr_sent = false;
- }
- }
- }
-}
-
-/*
* Print a string to the serial port trying not to disturb
* any possible real use of the port...
*
@@ -3376,7 +3340,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
struct uart_8250_em485 *em485 = up->em485;
struct uart_port *port = &up->port;
unsigned long flags;
- unsigned int ier, use_fifo;
+ unsigned int ier;
int locked = 1;
touch_nmi_watchdog();
@@ -3408,20 +3372,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
mdelay(port->rs485.delay_rts_before_send);
}
- use_fifo = (up->capabilities & UART_CAP_FIFO) &&
- port->fifosize > 1 &&
- (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO) &&
- /*
- * After we put a data in the fifo, the controller will send
- * it regardless of the CTS state. Therefore, only use fifo
- * if we don't use control flow.
- */
- !(up->port.flags & UPF_CONS_FLOW);
-
- if (likely(use_fifo))
- serial8250_console_fifo_write(up, s, count);
- else
- uart_console_write(port, s, count, serial8250_console_putchar);
+ uart_console_write(port, s, count, serial8250_console_putchar);
/*
* Finally, wait for transmitter to become empty
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 1f1df46242f9..ba053a68529f 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1582,9 +1582,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
container_of(port, struct uart_amba_port, port);
unsigned int cr;
- if (port->rs485.flags & SER_RS485_ENABLED)
- mctrl &= ~TIOCM_RTS;
-
cr = pl011_read(uap, REG_CR);
#define TIOCMBIT(tiocmbit, uartbit) \
@@ -1808,14 +1805,8 @@ static int pl011_startup(struct uart_port *port)
cr &= UART011_CR_RTS | UART011_CR_DTR;
cr |= UART01x_CR_UARTEN | UART011_CR_RXE;
- if (port->rs485.flags & SER_RS485_ENABLED) {
- if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
- cr &= ~UART011_CR_RTS;
- else
- cr |= UART011_CR_RTS;
- } else {
+ if (!(port->rs485.flags & SER_RS485_ENABLED))
cr |= UART011_CR_TXE;
- }
pl011_write(cr, uap, REG_CR);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index dc40c4155356..0db90be4c3bc 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
unsigned long flags;
unsigned int old;
+ if (port->rs485.flags & SER_RS485_ENABLED) {
+ set &= ~TIOCM_RTS;
+ clear &= ~TIOCM_RTS;
+ }
+
spin_lock_irqsave(&port->lock, flags);
old = port->mctrl;
port->mctrl = (old & ~clear) | set;
@@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
static void uart_port_dtr_rts(struct uart_port *uport, int raise)
{
- int rs485_on = uport->rs485_config &&
- (uport->rs485.flags & SER_RS485_ENABLED);
- int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND);
-
- if (raise) {
- if (rs485_on && RTS_after_send) {
- uart_set_mctrl(uport, TIOCM_DTR);
- uart_clear_mctrl(uport, TIOCM_RTS);
- } else {
- uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
- }
- } else {
- unsigned int clear = TIOCM_DTR;
-
- clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0;
- uart_clear_mctrl(uport, clear);
- }
+ if (raise)
+ uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
+ else
+ uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
}
/*
@@ -1075,11 +1067,6 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
goto out;
if (!tty_io_error(tty)) {
- if (uport->rs485.flags & SER_RS485_ENABLED) {
- set &= ~TIOCM_RTS;
- clear &= ~TIOCM_RTS;
- }
-
uart_update_mctrl(uport, set, clear);
ret = 0;
}
@@ -2390,6 +2377,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
*/
spin_lock_irqsave(&port->lock, flags);
port->mctrl &= TIOCM_DTR;
+ if (port->rs485.flags & SER_RS485_ENABLED &&
+ !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND))
+ port->mctrl |= TIOCM_RTS;
port->ops->set_mctrl(port, port->mctrl);
spin_unlock_irqrestore(&port->lock, flags);
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 1f89ab0e49ac..9570002d07e7 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -550,11 +550,23 @@ static void stm32_usart_transmit_chars(struct uart_port *port)
struct stm32_port *stm32_port = to_stm32_port(port);
const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
struct circ_buf *xmit = &port->state->xmit;
+ u32 isr;
+ int ret;
if (port->x_char) {
if (stm32_usart_tx_dma_started(stm32_port) &&
stm32_usart_tx_dma_enabled(stm32_port))
stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+
+ /* Check that TDR is empty before filling FIFO */
+ ret =
+ readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
+ isr,
+ (isr & USART_SR_TXE),
+ 10, 1000);
+ if (ret)
+ dev_warn(port->dev, "1 character may be erased\n");
+
writel_relaxed(port->x_char, port->membase + ofs->tdr);
port->x_char = 0;
port->icount.tx++;
@@ -730,7 +742,7 @@ static void stm32_usart_start_tx(struct uart_port *port)
struct serial_rs485 *rs485conf = &port->rs485;
struct circ_buf *xmit = &port->state->xmit;
- if (uart_circ_empty(xmit))
+ if (uart_circ_empty(xmit) && !port->x_char)
return;
if (rs485conf->flags & SER_RS485_ENABLED) {
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
index 55c73b1d8704..d00ff98dffab 100644
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns)
/* Indicate the cdns3 core was power lost before */
bool cdns_power_is_lost(struct cdns *cdns)
{
- if (cdns->version == CDNS3_CONTROLLER_V1) {
- if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
+ if (cdns->version == CDNS3_CONTROLLER_V0) {
+ if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
return true;
} else {
- if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
+ if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
return true;
}
return false;
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 4169cf40a03b..8f8405b0d608 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver)
struct ulpi *ulpi = to_ulpi_dev(dev);
const struct ulpi_device_id *id;
- /* Some ULPI devices don't have a vendor id so rely on OF match */
- if (ulpi->id.vendor == 0)
+ /*
+ * Some ULPI devices don't have a vendor id
+ * or provide an id_table so rely on OF match.
+ */
+ if (ulpi->id.vendor == 0 || !drv->id_table)
return of_driver_match_device(dev, driver);
for (id = drv->id_table; id->vendor; id++)
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 3e01dd6e509b..d9712c2602af 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1563,6 +1563,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
urb->hcpriv = NULL;
INIT_LIST_HEAD(&urb->urb_list);
atomic_dec(&urb->use_count);
+ /*
+ * Order the write of urb->use_count above before the read
+ * of urb->reject below. Pairs with the memory barriers in
+ * usb_kill_urb() and usb_poison_urb().
+ */
+ smp_mb__after_atomic();
+
atomic_dec(&urb->dev->urbnum);
if (atomic_read(&urb->reject))
wake_up(&usb_kill_urb_queue);
@@ -1665,6 +1672,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
usb_anchor_resume_wakeups(anchor);
atomic_dec(&urb->use_count);
+ /*
+ * Order the write of urb->use_count above before the read
+ * of urb->reject below. Pairs with the memory barriers in
+ * usb_kill_urb() and usb_poison_urb().
+ */
+ smp_mb__after_atomic();
+
if (unlikely(atomic_read(&urb->reject)))
wake_up(&usb_kill_urb_queue);
usb_put_urb(urb);
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 30727729a44c..33d62d7e3929 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -715,6 +715,12 @@ void usb_kill_urb(struct urb *urb)
if (!(urb && urb->dev && urb->ep))
return;
atomic_inc(&urb->reject);
+ /*
+ * Order the write of urb->reject above before the read
+ * of urb->use_count below. Pairs with the barriers in
+ * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
+ */
+ smp_mb__after_atomic();
usb_hcd_unlink_urb(urb, -ENOENT);
wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
@@ -756,6 +762,12 @@ void usb_poison_urb(struct urb *urb)
if (!urb)
return;
atomic_inc(&urb->reject);
+ /*
+ * Order the write of urb->reject above before the read
+ * of urb->use_count below. Pairs with the barriers in
+ * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
+ */
+ smp_mb__after_atomic();
if (!urb->dev || !urb->ep)
return;
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 2bc03f41c70a..eee3504397e6 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -5097,7 +5097,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
hsotg->gadget.speed = USB_SPEED_UNKNOWN;
spin_unlock_irqrestore(&hsotg->lock, flags);
- for (ep = 0; ep < hsotg->num_of_eps; ep++) {
+ for (ep = 1; ep < hsotg->num_of_eps; ep++) {
if (hsotg->eps_in[ep])
dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
if (hsotg->eps_out[ep])
diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
index 9cc3ad701a29..e14ac15e24c3 100644
--- a/drivers/usb/dwc3/dwc3-xilinx.c
+++ b/drivers/usb/dwc3/dwc3-xilinx.c
@@ -102,14 +102,26 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
int ret;
u32 reg;
- usb3_phy = devm_phy_get(dev, "usb3-phy");
- if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) {
- ret = -EPROBE_DEFER;
+ usb3_phy = devm_phy_optional_get(dev, "usb3-phy");
+ if (IS_ERR(usb3_phy)) {
+ ret = PTR_ERR(usb3_phy);
+ dev_err_probe(dev, ret,
+ "failed to get USB3 PHY\n");
goto err;
- } else if (IS_ERR(usb3_phy)) {
- usb3_phy = NULL;
}
+ /*
+ * The following core resets are not required unless a USB3 PHY
+ * is used, and the subsequent register settings are not required
+ * unless a core reset is performed (they should be set properly
+ * by the first-stage boot loader, but may be reverted by a core
+ * reset). They may also break the configuration if USB3 is actually
+ * in use but the usb3-phy entry is missing from the device tree.
+ * Therefore, skip these operations in this case.
+ */
+ if (!usb3_phy)
+ goto skip_usb3_phy;
+
crst = devm_reset_control_get_exclusive(dev, "usb_crst");
if (IS_ERR(crst)) {
ret = PTR_ERR(crst);
@@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
goto err;
}
+skip_usb3_phy:
/*
* This routes the USB DMA traffic to go through FPD path instead
* of reaching DDR directly. This traffic routing is needed to
diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
index 1abf08e5164a..6803cd60cc6d 100644
--- a/drivers/usb/gadget/function/f_sourcesink.c
+++ b/drivers/usb/gadget/function/f_sourcesink.c
@@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
if (is_iso) {
switch (speed) {
+ case USB_SPEED_SUPER_PLUS:
case USB_SPEED_SUPER:
size = ss->isoc_maxpacket *
(ss->isoc_mult + 1) *
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 64de9f1b874c..431d5a7d737e 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -1117,7 +1117,7 @@ static int rndis_proc_show(struct seq_file *m, void *v)
static ssize_t rndis_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- rndis_params *p = PDE_DATA(file_inode(file));
+ rndis_params *p = pde_data(file_inode(file));
u32 speed = 0;
int i, fl_speed = 0;
@@ -1161,7 +1161,7 @@ static ssize_t rndis_proc_write(struct file *file, const char __user *buffer,
static int rndis_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rndis_proc_show, PDE_DATA(inode));
+ return single_open(file, rndis_proc_show, pde_data(inode));
}
static const struct proc_ops rndis_proc_ops = {
diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c
index dd0819df096e..9040a0561466 100644
--- a/drivers/usb/gadget/udc/at91_udc.c
+++ b/drivers/usb/gadget/udc/at91_udc.c
@@ -1895,7 +1895,7 @@ static int at91udc_probe(struct platform_device *pdev)
at91_vbus_irq, 0, driver_name, udc);
if (retval) {
DBG("request vbus irq %d failed\n",
- udc->board.vbus_pin);
+ desc_to_gpio(udc->board.vbus_pin));
goto err_unprepare_iclk;
}
}
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index c1edcc9b13ce..dc570ce4e831 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
int ret;
+ if (pm_runtime_suspended(dev))
+ pm_runtime_resume(dev);
+
ret = xhci_priv_suspend_quirk(hcd);
if (ret)
return ret;
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 29191d33c0e3..1a05e3dcfec8 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2301,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
US_FL_SCM_MULT_TARG ),
+/*
+ * Reported by DocMAX <mail@vacharakis.de>
+ * and Thomas Weißschuh <linux@weissschuh.net>
+ */
+UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999,
+ "VIA Labs, Inc.",
+ "VL817 SATA Bridge",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
"ST",
"2A",
diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c
index 07d307418b47..a7d507802509 100644
--- a/drivers/usb/typec/port-mapper.c
+++ b/drivers/usb/typec/port-mapper.c
@@ -56,7 +56,12 @@ int typec_link_ports(struct typec_port *con)
{
struct each_port_arg arg = { .port = con, .match = NULL };
+ if (!has_acpi_companion(&con->dev))
+ return 0;
+
bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match);
+ if (!arg.match)
+ return 0;
/*
* REVISIT: Now each connector can have only a single component master.
@@ -74,5 +79,6 @@ int typec_link_ports(struct typec_port *con)
void typec_unlink_ports(struct typec_port *con)
{
- component_master_del(&con->dev, &typec_aggregate_ops);
+ if (has_acpi_companion(&con->dev))
+ component_master_del(&con->dev, &typec_aggregate_ops);
}
diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index 35a1307349a2..e07d26a3cd8e 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val)
static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
{
struct tcpci *tcpci = tcpc_to_tcpci(tcpc);
+ bool vconn_pres;
+ enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1;
unsigned int reg;
int ret;
+ ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, &reg);
+ if (ret < 0)
+ return ret;
+
+ vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES);
+ if (vconn_pres) {
+ ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, &reg);
+ if (ret < 0)
+ return ret;
+
+ if (reg & TCPC_TCPC_CTRL_ORIENTATION)
+ polarity = TYPEC_POLARITY_CC2;
+ }
+
switch (cc) {
case TYPEC_CC_RA:
reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) |
@@ -112,6 +128,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
break;
}
+ if (vconn_pres) {
+ if (polarity == TYPEC_POLARITY_CC2) {
+ reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT);
+ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT);
+ } else {
+ reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT);
+ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT);
+ }
+ }
+
ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg);
if (ret < 0)
return ret;
diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h
index 2be7a77d400e..b2edd45f13c6 100644
--- a/drivers/usb/typec/tcpm/tcpci.h
+++ b/drivers/usb/typec/tcpm/tcpci.h
@@ -98,6 +98,7 @@
#define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4)
#define TCPC_POWER_STATUS_VBUS_DET BIT(3)
#define TCPC_POWER_STATUS_VBUS_PRES BIT(2)
+#define TCPC_POWER_STATUS_VCONN_PRES BIT(1)
#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0)
#define TCPC_FAULT_STATUS 0x1f
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 59d4fa2443f2..5fce795b69c7 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
case SNK_TRYWAIT_DEBOUNCE:
break;
case SNK_ATTACH_WAIT:
- tcpm_set_state(port, SNK_UNATTACHED, 0);
+ case SNK_DEBOUNCED:
+ /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */
break;
case SNK_NEGOTIATE_CAPABILITIES:
@@ -5263,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
case PR_SWAP_SNK_SRC_SOURCE_ON:
/* Do nothing, vsafe0v is expected during transition */
break;
+ case SNK_ATTACH_WAIT:
+ case SNK_DEBOUNCED:
+ /*Do nothing, still waiting for VSAFE5V for connect */
+ break;
default:
if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
tcpm_set_state(port, SNK_UNATTACHED, 0);
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index bff96d64dddf..6db7c8ddd51c 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc)
if (status < 0)
return status;
- if (!data)
+ if (!(data & DEV_INT))
return 0;
status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 362f91ec8845..352c725ccf18 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -309,13 +309,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
if ((pos & 3) && size > 2) {
u16 val;
+ __le16 lval;
ret = pci_user_read_config_word(pdev, pos, &val);
if (ret)
return ret;
- val = cpu_to_le16(val);
- if (copy_to_user(buf + count - size, &val, 2))
+ lval = cpu_to_le16(val);
+ if (copy_to_user(buf + count - size, &lval, 2))
return -EFAULT;
pos += 2;
@@ -324,13 +325,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
while (size > 3) {
u32 val;
+ __le32 lval;
ret = pci_user_read_config_dword(pdev, pos, &val);
if (ret)
return ret;
- val = cpu_to_le32(val);
- if (copy_to_user(buf + count - size, &val, 4))
+ lval = cpu_to_le32(val);
+ if (copy_to_user(buf + count - size, &lval, 4))
return -EFAULT;
pos += 4;
@@ -339,13 +341,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
while (size >= 2) {
u16 val;
+ __le16 lval;
ret = pci_user_read_config_word(pdev, pos, &val);
if (ret)
return ret;
- val = cpu_to_le16(val);
- if (copy_to_user(buf + count - size, &val, 2))
+ lval = cpu_to_le16(val);
+ if (copy_to_user(buf + count - size, &lval, 2))
return -EFAULT;
pos += 2;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f17490ab238f..9394aa9444c1 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -256,7 +256,7 @@ static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
static void vfio_dma_bitmap_free(struct vfio_dma *dma)
{
- kfree(dma->bitmap);
+ kvfree(dma->bitmap);
dma->bitmap = NULL;
}
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index 23999df52739..c8e0ea27caf1 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -287,8 +287,6 @@ struct hvfb_par {
static uint screen_width = HVFB_WIDTH;
static uint screen_height = HVFB_HEIGHT;
-static uint screen_width_max = HVFB_WIDTH;
-static uint screen_height_max = HVFB_HEIGHT;
static uint screen_depth;
static uint screen_fb_size;
static uint dio_fb_size; /* FB size for deferred IO */
@@ -582,7 +580,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev)
int ret = 0;
unsigned long t;
u8 index;
- int i;
memset(msg, 0, sizeof(struct synthvid_msg));
msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST;
@@ -613,13 +610,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev)
goto out;
}
- for (i = 0; i < msg->resolution_resp.resolution_count; i++) {
- screen_width_max = max_t(unsigned int, screen_width_max,
- msg->resolution_resp.supported_resolution[i].width);
- screen_height_max = max_t(unsigned int, screen_height_max,
- msg->resolution_resp.supported_resolution[i].height);
- }
-
screen_width =
msg->resolution_resp.supported_resolution[index].width;
screen_height =
@@ -941,7 +931,7 @@ static void hvfb_get_option(struct fb_info *info)
if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN ||
(synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) &&
- (x > screen_width_max || y > screen_height_max)) ||
+ (x * y * screen_depth / 8 > screen_fb_size)) ||
(par->synthvid_version == SYNTHVID_VERSION_WIN8 &&
x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) ||
(par->synthvid_version == SYNTHVID_VERSION_WIN7 &&
@@ -1194,8 +1184,8 @@ static int hvfb_probe(struct hv_device *hdev,
}
hvfb_get_option(info);
- pr_info("Screen resolution: %dx%d, Color depth: %d\n",
- screen_width, screen_height, screen_depth);
+ pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n",
+ screen_width, screen_height, screen_depth, screen_fb_size);
ret = hvfb_getmem(hdev, info);
if (ret) {
diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c
index 80b2e3f0e276..5ff1c53740c0 100644
--- a/drivers/virt/acrn/ioreq.c
+++ b/drivers/virt/acrn/ioreq.c
@@ -246,8 +246,7 @@ void acrn_ioreq_request_clear(struct acrn_vm *vm)
spin_lock_bh(&vm->ioreq_clients_lock);
client = vm->default_client;
if (client) {
- vcpu = find_next_bit(client->ioreqs_map,
- ACRN_IO_REQUEST_MAX, 0);
+ vcpu = find_first_bit(client->ioreqs_map, ACRN_IO_REQUEST_MAX);
while (vcpu < ACRN_IO_REQUEST_MAX) {
acrn_ioreq_complete_request(client, vcpu, NULL);
vcpu = find_next_bit(client->ioreqs_map,
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index 1c9ae08225d8..f916bf60b312 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -30,7 +30,7 @@ proc_bus_zorro_lseek(struct file *file, loff_t off, int whence)
static ssize_t
proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
- struct zorro_dev *z = PDE_DATA(file_inode(file));
+ struct zorro_dev *z = pde_data(file_inode(file));
struct ConfigDev cd;
loff_t pos = *ppos;
diff --git a/fs/Makefile b/fs/Makefile
index 84c5e4cdfee5..dab324aea08f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,6 +6,8 @@
# Rewritten to use lists instead of if-statements.
#
+obj-$(CONFIG_SYSCTL) += sysctls.o
+
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
ioctl.o readdir.o select.o dcache.o inode.o \
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index adbb3a1edcbf..5156821bfe6a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -355,7 +355,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct super_block *sb = inode->i_sb;
struct object_info obj;
- int ret;
obj.indaddr = ADFS_I(inode)->indaddr;
obj.name_len = 0;
@@ -365,6 +364,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
obj.attr = ADFS_I(inode)->attr;
obj.size = inode->i_size;
- ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
- return ret;
+ return adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
}
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 065a28bfa3f1..e1b863449296 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -227,7 +227,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
__acquires(cell->proc_lock)
{
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
rcu_read_lock();
return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos);
@@ -236,7 +236,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v,
loff_t *_pos)
{
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos);
}
@@ -322,7 +322,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
{
struct afs_vl_seq_net_private *priv = m->private;
struct afs_vlserver_list *vllist;
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
diff --git a/fs/aio.c b/fs/aio.c
index f6f1cbffef9e..4ceba13a7db0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -220,9 +220,35 @@ struct aio_kiocb {
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
-unsigned long aio_nr; /* current system wide number of aio requests */
-unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+static unsigned long aio_nr; /* current system wide number of aio requests */
+static unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
/*----end sysctl variables---*/
+#ifdef CONFIG_SYSCTL
+static struct ctl_table aio_sysctls[] = {
+ {
+ .procname = "aio-nr",
+ .data = &aio_nr,
+ .maxlen = sizeof(aio_nr),
+ .mode = 0444,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "aio-max-nr",
+ .data = &aio_max_nr,
+ .maxlen = sizeof(aio_max_nr),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {}
+};
+
+static void __init aio_sysctl_init(void)
+{
+ register_sysctl_init("fs", aio_sysctls);
+}
+#else
+#define aio_sysctl_init() do { } while (0)
+#endif
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
@@ -275,6 +301,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ aio_sysctl_init();
return 0;
}
__initcall(aio_setup);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8c7f26f1fbb..605017eb9349 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1116,11 +1116,11 @@ out_free_interp:
* independently randomized mmap region (0 load_bias
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
- if (interpreter) {
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ if (alignment > ELF_MIN_ALIGN) {
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
- alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
@@ -1585,7 +1585,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
- strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+ get_task_comm(psinfo->pr_fname, p);
return 0;
}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1eae7ea823a..c07f35719ee3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -817,16 +817,20 @@ static struct file_system_type bm_fs_type = {
};
MODULE_ALIAS_FS("binfmt_misc");
+static struct ctl_table_header *binfmt_misc_header;
+
static int __init init_misc_binfmt(void)
{
int err = register_filesystem(&bm_fs_type);
if (!err)
insert_binfmt(&misc_format);
- return err;
+ binfmt_misc_header = register_sysctl_mount_point("fs/binfmt_misc");
+ return 0;
}
static void __exit exit_misc_binfmt(void)
{
+ unregister_sysctl_table(binfmt_misc_header);
unregister_binfmt(&misc_format);
unregister_filesystem(&bm_fs_type);
}
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 520a0f6a7d9e..183e5c4aed34 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -18,8 +18,7 @@ config BTRFS_FS
select RAID6_PQ
select XOR_BLOCKS
select SRCU
- depends on !PPC_256K_PAGES # powerpc
- depends on !PAGE_SIZE_256KB # hexagon
+ depends on PAGE_SIZE_LESS_THAN_256KB
help
Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d6d48ecf823c..409bad3928db 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -12,7 +12,6 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/prefetch.h>
-#include <linux/cleancache.h>
#include <linux/fsverity.h>
#include "misc.h"
#include "extent_io.h"
@@ -3578,15 +3577,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
goto out;
}
- if (!PageUptodate(page)) {
- if (cleancache_get_page(page) == 0) {
- BUG_ON(blocksize != PAGE_SIZE);
- unlock_extent(tree, start, end);
- unlock_page(page);
- goto out;
- }
- }
-
if (page->index == last_byte >> PAGE_SHIFT) {
size_t zero_offset = offset_in_page(last_byte);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a5bd6926f7ff..d8af6620a941 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1214,6 +1214,35 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto next;
/*
+ * Our start offset might be in the middle of an existing extent
+ * map, so take that into account.
+ */
+ range_len = em->len - (cur - em->start);
+ /*
+ * If this range of the extent map is already flagged for delalloc,
+ * skip it, because:
+ *
+ * 1) We could deadlock later, when trying to reserve space for
+ * delalloc, because in case we can't immediately reserve space
+ * the flusher can start delalloc and wait for the respective
+ * ordered extents to complete. The deadlock would happen
+ * because we do the space reservation while holding the range
+ * locked, and starting writeback, or finishing an ordered
+ * extent, requires locking the range;
+ *
+ * 2) If there's delalloc there, it means there's dirty pages for
+ * which writeback has not started yet (we clean the delalloc
+ * flag when starting writeback and after creating an ordered
+ * extent). If we mark pages in an adjacent range for defrag,
+ * then we will have a larger contiguous range for delalloc,
+ * very likely resulting in a larger extent after writeback is
+ * triggered (except in a case of free space fragmentation).
+ */
+ if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1,
+ EXTENT_DELALLOC, 0, NULL))
+ goto next;
+
+ /*
* For do_compress case, we want to compress all valid file
* extents, thus no @extent_thresh or mergeable check.
*/
@@ -1221,7 +1250,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add;
/* Skip too large extent */
- if (em->len >= extent_thresh)
+ if (range_len >= extent_thresh)
goto next;
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
@@ -1442,9 +1471,11 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
list_for_each_entry(entry, &target_list, list) {
u32 range_len = entry->len;
- /* Reached the limit */
- if (max_sectors && max_sectors == *sectors_defragged)
+ /* Reached or beyond the limit */
+ if (max_sectors && *sectors_defragged >= max_sectors) {
+ ret = 1;
break;
+ }
if (max_sectors)
range_len = min_t(u32, range_len,
@@ -1465,7 +1496,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
extent_thresh, newer_than, do_compress);
if (ret < 0)
break;
- *sectors_defragged += range_len;
+ *sectors_defragged += range_len >>
+ inode->root->fs_info->sectorsize_bits;
}
out:
list_for_each_entry_safe(entry, tmp, &target_list, list) {
@@ -1484,6 +1516,12 @@ out:
* @newer_than: minimum transid to defrag
* @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
* will be defragged.
+ *
+ * Return <0 for error.
+ * Return >=0 for the number of sectors defragged, and range->start will be updated
+ * to indicate the file offset where next defrag should be started at.
+ * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without
+ * defragging all the range).
*/
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
@@ -1499,6 +1537,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
int compress_type = BTRFS_COMPRESS_ZLIB;
int ret = 0;
u32 extent_thresh = range->extent_thresh;
+ pgoff_t start_index;
if (isize == 0)
return 0;
@@ -1518,12 +1557,16 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
if (range->start + range->len > range->start) {
/* Got a specific range */
- last_byte = min(isize, range->start + range->len) - 1;
+ last_byte = min(isize, range->start + range->len);
} else {
/* Defrag until file end */
- last_byte = isize - 1;
+ last_byte = isize;
}
+ /* Align the range */
+ cur = round_down(range->start, fs_info->sectorsize);
+ last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
+
/*
* If we were not given a ra, allocate a readahead context. As
* readahead is just an optimization, defrag will work without it so
@@ -1536,16 +1579,26 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
file_ra_state_init(ra, inode->i_mapping);
}
- /* Align the range */
- cur = round_down(range->start, fs_info->sectorsize);
- last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
+ /*
+ * Make writeback start from the beginning of the range, so that the
+ * defrag range can be written sequentially.
+ */
+ start_index = cur >> PAGE_SHIFT;
+ if (start_index < inode->i_mapping->writeback_index)
+ inode->i_mapping->writeback_index = start_index;
while (cur < last_byte) {
+ const unsigned long prev_sectors_defragged = sectors_defragged;
u64 cluster_end;
/* The cluster size 256K should always be page aligned */
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
+ if (btrfs_defrag_cancelled(fs_info)) {
+ ret = -EAGAIN;
+ break;
+ }
+
/* We want the cluster end at page boundary when possible */
cluster_end = (((cur >> PAGE_SHIFT) +
(SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
@@ -1567,14 +1620,27 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
cluster_end + 1 - cur, extent_thresh,
newer_than, do_compress,
&sectors_defragged, max_to_defrag);
+
+ if (sectors_defragged > prev_sectors_defragged)
+ balance_dirty_pages_ratelimited(inode->i_mapping);
+
btrfs_inode_unlock(inode, 0);
if (ret < 0)
break;
cur = cluster_end + 1;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
}
if (ra_allocated)
kfree(ra);
+ /*
+ * Update range.start for autodefrag, this will indicate where to start
+ * in next run.
+ */
+ range->start = cur;
if (sectors_defragged) {
/*
* We have defragged some sectors, for compression case they
@@ -3086,10 +3152,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
btrfs_inode_lock(inode, 0);
err = btrfs_delete_subvolume(dir, dentry);
btrfs_inode_unlock(inode, 0);
- if (!err) {
- fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
- }
+ if (!err)
+ d_delete_notify(dir, dentry);
out_dput:
dput(dentry);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0ec09fe01be6..4d947ba32da9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -23,7 +23,6 @@
#include <linux/miscdevice.h>
#include <linux/magic.h>
#include <linux/slab.h>
-#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/crc32c.h>
#include <linux/btrfs.h>
@@ -1374,7 +1373,6 @@ static int btrfs_fill_super(struct super_block *sb,
goto fail_close;
}
- cleancache_init_fs(sb);
sb->s_flags |= SB_ACTIVE;
return 0;
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index ce4d4785003c..7077f72e6f47 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -49,11 +49,19 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
}
- /* check parameters */
+ /* Check features of the backing filesystem:
+ * - Directories must support looking up and directory creation
+ * - We create tmpfiles to handle invalidation
+ * - We use xattrs to store metadata
+ * - We need to be able to query the amount of space available
+ * - We want to be able to sync the filesystem when stopping the cache
+ * - We use DIO to/from pages, so the blocksize mustn't be too big.
+ */
ret = -EOPNOTSUPP;
if (d_is_negative(root) ||
!d_backing_inode(root)->i_op->lookup ||
!d_backing_inode(root)->i_op->mkdir ||
+ !d_backing_inode(root)->i_op->tmpfile ||
!(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs ||
@@ -84,9 +92,7 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
cache->bsize = stats.f_bsize;
- cache->bshift = 0;
- if (stats.f_bsize < PAGE_SIZE)
- cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
+ cache->bshift = ilog2(stats.f_bsize);
_debug("blksize %u (shift %u)",
cache->bsize, cache->bshift);
@@ -106,7 +112,6 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
(unsigned long long) cache->fcull,
(unsigned long long) cache->fstop);
- stats.f_blocks >>= cache->bshift;
do_div(stats.f_blocks, 100);
cache->bstop = stats.f_blocks * cache->bstop_percent;
cache->bcull = stats.f_blocks * cache->bcull_percent;
@@ -209,7 +214,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
return ret;
}
- b_avail = stats.f_bavail >> cache->bshift;
+ b_avail = stats.f_bavail;
b_writing = atomic_long_read(&cache->b_writing);
if (b_avail > b_writing)
b_avail -= b_writing;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 40a792421fc1..7ac04ee2c0a0 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -703,6 +703,17 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
return -EBUSY;
}
+ /* Make sure we have copies of the tag string */
+ if (!cache->tag) {
+ /*
+ * The tag string is released by the fops->release()
+ * function, so we don't release it on error here
+ */
+ cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+ if (!cache->tag)
+ return -ENOMEM;
+ }
+
return cachefiles_add_cache(cache);
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 8dd54d9375b6..c793d33b0224 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -86,7 +86,7 @@ struct cachefiles_cache {
unsigned bcull_percent; /* when to start culling (% blocks) */
unsigned bstop_percent; /* when to stop allocating (% blocks) */
unsigned bsize; /* cache's block size */
- unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
+ unsigned bshift; /* ilog2(bsize) */
uint64_t frun; /* when to stop culling */
uint64_t fcull; /* when to start culling */
uint64_t fstop; /* when to stop allocating */
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 60b1eac2ce78..04eb52736990 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -264,7 +264,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
ki->term_func = term_func;
ki->term_func_priv = term_func_priv;
ki->was_async = true;
- ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift;
+ ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
if (ki->term_func)
ki->iocb.ki_complete = cachefiles_write_complete;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 9bd692870617..f256c8aff7bb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -25,7 +25,9 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
trace_cachefiles_mark_active(object, inode);
can_use = true;
} else {
- pr_notice("cachefiles: Inode already in use: %pd\n", dentry);
+ trace_cachefiles_mark_failed(object, inode);
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, inode->i_ino);
}
return can_use;
@@ -101,6 +103,7 @@ retry:
subdir = lookup_one_len(dirname, dir, strlen(dirname));
else
subdir = ERR_PTR(ret);
+ trace_cachefiles_lookup(NULL, dir, subdir);
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
PTR_ERR(subdir),
@@ -135,6 +138,7 @@ retry:
cachefiles_trace_mkdir_error);
goto mkdir_error;
}
+ trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir))) {
cachefiles_put_directory(subdir);
@@ -233,7 +237,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
};
int ret;
- trace_cachefiles_unlink(object, dentry, why);
+ trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
ret = security_path_unlink(&path, dentry);
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
@@ -386,7 +390,7 @@ try_again:
.new_dir = d_inode(cache->graveyard),
.new_dentry = grave,
};
- trace_cachefiles_rename(object, rep, grave, why);
+ trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
ret = cachefiles_inject_read_error();
if (ret == 0)
ret = vfs_rename(&rd);
@@ -617,7 +621,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
object->d_name_len);
else
dentry = ERR_PTR(ret);
- trace_cachefiles_lookup(object, dentry);
+ trace_cachefiles_lookup(object, fan, dentry);
if (IS_ERR(dentry)) {
if (dentry == ERR_PTR(-ENOENT))
goto new_file;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b3d9459c9bbd..c98e5238a1b6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -297,10 +297,6 @@ out:
dout("%s: result %d\n", __func__, err);
}
-static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
-{
-}
-
static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
{
struct inode *inode = mapping->host;
@@ -312,7 +308,6 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
}
static const struct netfs_read_request_ops ceph_netfs_read_ops = {
- .init_rreq = ceph_init_rreq,
.is_cache_enabled = ceph_is_cache_enabled,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_op = ceph_netfs_issue_op,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d2c33cdbac6..b472cd066d1c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2218,6 +2218,7 @@ static int unsafe_request_wait(struct inode *inode)
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
+ unsigned int max_sessions;
int ret, err = 0;
spin_lock(&ci->i_unsafe_lock);
@@ -2236,36 +2237,44 @@ static int unsafe_request_wait(struct inode *inode)
spin_unlock(&ci->i_unsafe_lock);
/*
+ * The mdsc->max_sessions is unlikely to be changed
+ * mostly, here we will retry it by reallocating the
+ * sessions array memory to get rid of the mdsc->mutex
+ * lock.
+ */
+retry:
+ max_sessions = mdsc->max_sessions;
+
+ /*
* Trigger to flush the journal logs in all the relevant MDSes
* manually, or in the worst case we must wait at most 5 seconds
* to wait the journal logs to be flushed by the MDSes periodically.
*/
- if (req1 || req2) {
+ if ((req1 || req2) && likely(max_sessions)) {
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s;
struct ceph_mds_request *req;
- unsigned int max;
int i;
- /*
- * The mdsc->max_sessions is unlikely to be changed
- * mostly, here we will retry it by reallocating the
- * sessions arrary memory to get rid of the mdsc->mutex
- * lock.
- */
-retry:
- max = mdsc->max_sessions;
- sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
- if (!sessions)
- return -ENOMEM;
+ sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
+ if (!sessions) {
+ err = -ENOMEM;
+ goto out;
+ }
spin_lock(&ci->i_unsafe_lock);
if (req1) {
list_for_each_entry(req, &ci->i_unsafe_dirops,
r_unsafe_dir_item) {
s = req->r_session;
- if (unlikely(s->s_mds >= max)) {
+ if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
+ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s)
+ ceph_put_mds_session(s);
+ }
+ kfree(sessions);
goto retry;
}
if (!sessions[s->s_mds]) {
@@ -2278,8 +2287,14 @@ retry:
list_for_each_entry(req, &ci->i_unsafe_iops,
r_unsafe_target_item) {
s = req->r_session;
- if (unlikely(s->s_mds >= max)) {
+ if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
+ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s)
+ ceph_put_mds_session(s);
+ }
+ kfree(sessions);
goto retry;
}
if (!sessions[s->s_mds]) {
@@ -2300,7 +2315,7 @@ retry:
spin_unlock(&ci->i_ceph_lock);
/* send flush mdlog request to MDSes */
- for (i = 0; i < max; i++) {
+ for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
@@ -2317,15 +2332,19 @@ retry:
ceph_timeout_jiffies(req1->r_timeout));
if (ret)
err = -EIO;
- ceph_mdsc_put_request(req1);
}
if (req2) {
ret = !wait_for_completion_timeout(&req2->r_safe_completion,
ceph_timeout_jiffies(req2->r_timeout));
if (ret)
err = -EIO;
- ceph_mdsc_put_request(req2);
}
+
+out:
+ if (req1)
+ ceph_mdsc_put_request(req1);
+ if (req2)
+ ceph_mdsc_put_request(req2);
return err;
}
@@ -3376,8 +3395,7 @@ static void handle_cap_grant(struct inode *inode,
if ((newcaps & CEPH_CAP_LINK_SHARED) &&
(extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
set_nlink(inode, le32_to_cpu(grant->nlink));
- if (inode->i_nlink == 0 &&
- (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+ if (inode->i_nlink == 0)
deleted_inode = true;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9d9304e712d9..bbed3224ad68 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
int fmode, bool isdir)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mount_options *opt =
+ ceph_inode_to_client(&ci->vfs_inode)->mount_options;
struct ceph_file_info *fi;
dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
if (!fi)
return -ENOMEM;
+ if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ fi->flags |= CEPH_F_SYNC;
+
file->private_data = fi;
}
@@ -578,6 +583,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
struct ceph_inode_info *ci = ceph_inode(dir);
struct inode *inode;
struct timespec64 now;
+ struct ceph_string *pool_ns;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_vino vino = { .ino = req->r_deleg_ino,
.snap = CEPH_NOSNAP };
@@ -627,6 +633,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
in.max_size = cpu_to_le64(lo->stripe_unit);
ceph_file_layout_to_legacy(lo, &in.layout);
+ /* lo is private, so pool_ns can't change */
+ pool_ns = rcu_dereference_raw(lo->pool_ns);
+ if (pool_ns) {
+ iinfo.pool_ns_len = pool_ns->len;
+ iinfo.pool_ns_data = pool_ns->str;
+ }
down_read(&mdsc->snap_rwsem);
ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
@@ -745,8 +757,10 @@ retry:
restore_deleg_ino(dir, req->r_deleg_ino);
ceph_mdsc_put_request(req);
try_async = false;
+ ceph_put_string(rcu_dereference_raw(lo.pool_ns));
goto retry;
}
+ ceph_put_string(rcu_dereference_raw(lo.pool_ns));
goto out_req;
}
}
@@ -1541,7 +1555,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct ceph_inode_info *ci = ceph_inode(inode);
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
- int want, got = 0;
+ int want = 0, got = 0;
int retry_op = 0, read = 0;
again:
@@ -1556,13 +1570,14 @@ again:
else
ceph_start_io_read(inode);
+ if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+ want |= CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
- want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
- else
- want = CEPH_CAP_FILE_CACHE;
+ want |= CEPH_CAP_FILE_LAZYIO;
+
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (direct_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_read(inode);
@@ -1696,7 +1711,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
- int err, want, got;
+ int err, want = 0, got;
bool direct_lock = false;
u32 map_flags;
u64 pool_flags;
@@ -1771,10 +1786,10 @@ retry_snap:
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
+ if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+ want |= CEPH_CAP_FILE_BUFFER;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
- want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
- else
- want = CEPH_CAP_FILE_BUFFER;
+ want |= CEPH_CAP_FILE_LAZYIO;
got = 0;
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index c57699d8408d..0fcba68f9a99 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
msg->hdr.version = cpu_to_le16(1);
msg->hdr.compat_version = cpu_to_le16(1);
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- dout("client%llu send metrics to mds%d\n",
- ceph_client_gid(mdsc->fsc->client), s->s_mds);
ceph_con_send(&s->s_con, msg);
return true;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 620c691af40e..a338a3ec0dc4 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
/* if root is the real CephFS root, we don't have quota realms */
if (root && ceph_ino(root) == CEPH_INO_ROOT)
return false;
+ /* MDS stray dirs have no quota realms */
+ if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
+ return false;
/* otherwise, we can't know for sure */
return true;
}
@@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
if (ci->i_max_bytes) {
total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+ /* For quota size less than 4MB, use 4KB block size */
+ if (!total) {
+ total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
+ used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
+ buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+ }
/* It is possible for a quota to be exceeded.
* Report 'zero' in that case
*/
free = total > used ? total - used : 0;
+ /* For quota size less than 4KB, report the
+ * total=used=4KB,free=0 when quota is full
+ * and total=free=4KB, used=0 otherwise */
+ if (!total) {
+ total = 1;
+ free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
+ buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+ }
}
spin_unlock(&ci->i_ceph_lock);
if (total) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bea89bdb534a..bf79f369aec6 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -27,6 +27,8 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
+#include <uapi/linux/magic.h>
+
static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list);
@@ -146,6 +148,7 @@ enum {
Opt_mds_namespace,
Opt_recover_session,
Opt_source,
+ Opt_mon_addr,
/* string args above */
Opt_dirstat,
Opt_rbytes,
@@ -159,6 +162,7 @@ enum {
Opt_quotadf,
Opt_copyfrom,
Opt_wsync,
+ Opt_pagecache,
};
enum ceph_recover_session_mode {
@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source),
+ fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
+ fsparam_flag_no ("pagecache", Opt_pagecache),
{}
};
@@ -228,9 +234,92 @@ static void canonicalize_path(char *path)
}
/*
- * Parse the source parameter. Distinguish the server list from the path.
+ * Check if the mds namespace in ceph_mount_options matches
+ * the passed in namespace string. First time match (when
+ * ->mds_namespace is NULL) is treated specially, since
+ * ->mds_namespace needs to be initialized by the caller.
+ */
+static int namespace_equals(struct ceph_mount_options *fsopt,
+ const char *namespace, size_t len)
+{
+ return !(fsopt->mds_namespace &&
+ (strlen(fsopt->mds_namespace) != len ||
+ strncmp(fsopt->mds_namespace, namespace, len)));
+}
+
+static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ int r;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ if (*dev_name_end != ':')
+ return invalfc(fc, "separator ':' missing in source");
+
+ r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
+ pctx->copts, fc->log.log, ',');
+ if (r)
+ return r;
+
+ fsopt->new_dev_syntax = false;
+ return 0;
+}
+
+static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ size_t len;
+ struct ceph_fsid fsid;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+ char *fsid_start, *fs_name_start;
+
+ if (*dev_name_end != '=') {
+ dout("separator '=' missing in source");
+ return -EINVAL;
+ }
+
+ fsid_start = strchr(dev_name, '@');
+ if (!fsid_start)
+ return invalfc(fc, "missing cluster fsid");
+ ++fsid_start; /* start of cluster fsid */
+
+ fs_name_start = strchr(fsid_start, '.');
+ if (!fs_name_start)
+ return invalfc(fc, "missing file system name");
+
+ if (ceph_parse_fsid(fsid_start, &fsid))
+ return invalfc(fc, "Invalid FSID");
+
+ ++fs_name_start; /* start of file system name */
+ len = dev_name_end - fs_name_start;
+
+ if (!namespace_equals(fsopt, fs_name_start, len))
+ return invalfc(fc, "Mismatching mds_namespace");
+ kfree(fsopt->mds_namespace);
+ fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
+ if (!fsopt->mds_namespace)
+ return -ENOMEM;
+ dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
+
+ fsopt->new_dev_syntax = true;
+ return 0;
+}
+
+/*
+ * Parse the source parameter for new device format. Distinguish the device
+ * spec from the path. Try parsing new device format and fallback to old
+ * format if needed.
+ *
+ * New device syntax will looks like:
+ * <device_spec>=/<path>
+ * where
+ * <device_spec> is name@fsid.fsname
+ * <path> is optional, but if present must begin with '/'
+ * (monitor addresses are passed via mount option)
*
- * The source will look like:
+ * Old device syntax is:
* <server_spec>[,<server_spec>...]:[<path>]
* where
* <server_spec> is <ip>[:<port>]
@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = dev_name + strlen(dev_name);
}
- dev_name_end--; /* back up to ':' separator */
- if (dev_name_end < dev_name || *dev_name_end != ':')
- return invalfc(fc, "No path or : separator in source");
+ dev_name_end--; /* back up to separator */
+ if (dev_name_end < dev_name)
+ return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
- ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
- pctx->copts, fc->log.log);
- if (ret)
- return ret;
+ dout("trying new device syntax");
+ ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
+ if (ret) {
+ if (ret != -EINVAL)
+ return ret;
+ dout("trying old device syntax");
+ ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
+ if (ret)
+ return ret;
+ }
fc->source = param->string;
param->string = NULL;
return 0;
}
+static int ceph_parse_mon_addr(struct fs_parameter *param,
+ struct fs_context *fc)
+{
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ kfree(fsopt->mon_addr);
+ fsopt->mon_addr = param->string;
+ param->string = NULL;
+
+ return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
+ pctx->copts, fc->log.log, '/');
+}
+
static int ceph_parse_mount_param(struct fs_context *fc,
struct fs_parameter *param)
{
@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
param->string = NULL;
break;
case Opt_mds_namespace:
+ if (!namespace_equals(fsopt, param->string, strlen(param->string)))
+ return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = param->string;
param->string = NULL;
@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
if (fc->source)
return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc);
+ case Opt_mon_addr:
+ return ceph_parse_mon_addr(param, fc);
case Opt_wsize:
if (result.uint_32 < PAGE_SIZE ||
result.uint_32 > CEPH_MAX_WRITE_SIZE)
@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break;
+ case Opt_pagecache:
+ if (result.negated)
+ fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
+ else
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
+ break;
default:
BUG();
}
@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->mds_namespace);
kfree(args->server_path);
kfree(args->fscache_uniq);
+ kfree(args->mon_addr);
kfree(args);
}
@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
if (ret)
return ret;
+ ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
+ if (ret)
+ return ret;
+
return ceph_compare_options(new_opt, fsc->client);
}
@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom");
- if (fsopt->mds_namespace)
+ /* dump mds_namespace when old device syntax is in use */
+ if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+ if (fsopt->mon_addr)
+ seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
+
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync");
+ if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ seq_puts(m, ",nopagecache");
+
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
static int ceph_get_tree(struct fs_context *fc)
{
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
struct super_block *sb;
struct ceph_fs_client *fsc;
struct dentry *res;
@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source)
return invalfc(fc, "No source");
+ if (fsopt->new_dev_syntax && !fsopt->mon_addr)
+ return invalfc(fc, "No monitor address");
/* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts);
@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
+ if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
+ kfree(fsc->mount_options->mon_addr);
+ fsc->mount_options->mon_addr = fsopt->mon_addr;
+ fsopt->mon_addr = NULL;
+ pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+ }
+
sync_filesystem(fc->root->d_sb);
return 0;
}
@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false;
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
+/* for both v1 and v2 syntax */
+static bool mount_support = true;
+static const struct kernel_param_ops param_ops_mount_syntax = {
+ .get = param_get_bool,
+};
+module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
+module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
+
module_init(init_ceph);
module_exit(exit_ceph);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d0142cc5c41b..67f145e1ae7a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -24,13 +24,11 @@
#include <linux/fscache.h>
#endif
-/* f_type in struct statfs */
-#define CEPH_SUPER_MAGIC 0x00c36400
-
/* large granularity for statfs utilization stats to facilitate
* large volume sizes on 32-bit machines. */
#define CEPH_BLOCK_SHIFT 22 /* 4 MB */
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
+#define CEPH_4K_BLOCK_SHIFT 12 /* 4 KB */
#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
@@ -44,6 +42,7 @@
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
+#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */
#define CEPH_MOUNT_OPT_DEFAULT \
(CEPH_MOUNT_OPT_DCACHE | \
@@ -88,6 +87,8 @@ struct ceph_mount_options {
unsigned int max_readdir; /* max readdir result (entries) */
unsigned int max_readdir_bytes; /* max readdir result (bytes) */
+ bool new_dev_syntax;
+
/*
* everything above this point can be memcmp'd; everything below
* is handled in compare_mount_options()
@@ -97,6 +98,7 @@ struct ceph_mount_options {
char *mds_namespace; /* default NULL */
char *server_path; /* default NULL (means "/") */
char *fscache_uniq; /* default NULL */
+ char *mon_addr;
};
struct ceph_fs_client {
@@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
*
* These come from src/mds/mdstypes.h in the ceph sources.
*/
-#define CEPH_MAX_MDS 0x100
-#define CEPH_NUM_STRAY 10
+#define CEPH_MAX_MDS 0x100
+#define CEPH_NUM_STRAY 10
#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
+#define CEPH_MDS_INO_LOG_OFFSET (2 * CEPH_MAX_MDS)
#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
{
- if (vino.ino < CEPH_INO_SYSTEM_BASE &&
- vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
- WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
- return true;
- }
- return false;
+ if (vino.ino >= CEPH_INO_SYSTEM_BASE ||
+ vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET)
+ return false;
+
+ /* Don't warn on mdsdirs */
+ WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET,
+ "Attempt to access reserved inode number 0x%llx",
+ vino.ino);
+ return true;
}
static inline struct inode *ceph_find_inode(struct super_block *sb,
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 346ae8716deb..3b7e3b9e4fd2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -188,7 +188,7 @@ config CIFS_SMB_DIRECT
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
- depends on CIFS=m && FSCACHE_OLD_API || CIFS=y && FSCACHE_OLD_API=y
+ depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
help
Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
to be cached locally on disk through the general filesystem cache
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 87fcacdf3de7..cc8fdcb35b71 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -25,7 +25,7 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o
cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
-cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
+cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o
cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
deleted file mode 100644
index 8be57aaedab6..000000000000
--- a/fs/cifs/cache.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * CIFS filesystem cache index structure definitions
- *
- * Copyright (c) 2010 Novell, Inc.
- * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
- *
- */
-#include "fscache.h"
-#include "cifs_debug.h"
-
-/*
- * CIFS filesystem definition for FS-Cache
- */
-struct fscache_netfs cifs_fscache_netfs = {
- .name = "cifs",
- .version = 0,
-};
-
-/*
- * Register CIFS for caching with FS-Cache
- */
-int cifs_fscache_register(void)
-{
- return fscache_register_netfs(&cifs_fscache_netfs);
-}
-
-/*
- * Unregister CIFS for caching
- */
-void cifs_fscache_unregister(void)
-{
- fscache_unregister_netfs(&cifs_fscache_netfs);
-}
-
-/*
- * Server object for FS-Cache
- */
-const struct fscache_cookie_def cifs_fscache_server_index_def = {
- .name = "CIFS.server",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
-};
-
-static enum
-fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct cifs_fscache_super_auxdata auxdata;
- const struct cifs_tcon *tcon = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*
- * Superblock object for FS-Cache
- */
-const struct fscache_cookie_def cifs_fscache_super_index_def = {
- .name = "CIFS.super",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .check_aux = cifs_fscache_super_check_aux,
-};
-
-static enum
-fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-const struct fscache_cookie_def cifs_fscache_inode_object_def = {
- .name = "CIFS.uniqueid",
- .type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .check_aux = cifs_fscache_inode_check_aux,
-};
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 8f386dd9939e..463ebe34892b 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch
switch (state) {
case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
- cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
+ cifs_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
- cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
+ cifs_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
@@ -498,10 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
goto unlock;
}
- spin_lock(&cifs_tcp_ses_lock);
- if (tcon->ses->server->tcpStatus != CifsExiting)
- tcon->ses->server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&cifs_tcp_ses_lock);
+ cifs_reconnect(tcon->ses->server, false);
unlock:
mutex_unlock(&tcon->ses->server->srv_mutex);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 36b2e0cb9736..199edac0cb59 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -397,6 +397,9 @@ static void
cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
+ if (inode->i_state & I_PINNING_FSCACHE_WB)
+ cifs_fscache_unuse_inode_cookie(inode, true);
+ cifs_fscache_release_inode_cookie(inode);
clear_inode(inode);
}
@@ -721,6 +724,12 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
}
#endif
+static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ fscache_unpin_writeback(wbc, cifs_inode_cookie(inode));
+ return 0;
+}
+
static int cifs_drop_inode(struct inode *inode)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -733,6 +742,7 @@ static int cifs_drop_inode(struct inode *inode)
static const struct super_operations cifs_super_ops = {
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
+ .write_inode = cifs_write_inode,
.free_inode = cifs_free_inode,
.drop_inode = cifs_drop_inode,
.evict_inode = cifs_evict_inode,
@@ -1625,13 +1635,9 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}
- rc = cifs_fscache_register();
- if (rc)
- goto out_destroy_deferredclose_wq;
-
rc = cifs_init_inodecache();
if (rc)
- goto out_unreg_fscache;
+ goto out_destroy_deferredclose_wq;
rc = cifs_init_mids();
if (rc)
@@ -1693,8 +1699,6 @@ out_destroy_mids:
cifs_destroy_mids();
out_destroy_inodecache:
cifs_destroy_inodecache();
-out_unreg_fscache:
- cifs_fscache_unregister();
out_destroy_deferredclose_wq:
destroy_workqueue(deferredclose_wq);
out_destroy_cifsoplockd_wq:
@@ -1730,7 +1734,6 @@ exit_cifs(void)
cifs_destroy_request_bufs();
cifs_destroy_mids();
cifs_destroy_inodecache();
- cifs_fscache_unregister();
destroy_workqueue(deferredclose_wq);
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9e5d9e192ef0..15a5c5db038b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,6 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.34"
+#define SMB3_PRODUCT_BUILD 35
+#define CIFS_VERSION "2.35"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f84978b76bb6..48b343d03430 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -117,6 +117,7 @@ enum statusEnum {
CifsInSessSetup,
CifsNeedTcon,
CifsInTcon,
+ CifsNeedFilesInvalidate,
CifsInFilesInvalidate
};
@@ -667,9 +668,6 @@ struct TCP_Server_Info {
unsigned int total_read; /* total amount of data read in this pass */
atomic_t in_send; /* requests trying to send */
atomic_t num_waiters; /* blocked waiting to get in sendrecv */
-#ifdef CONFIG_CIFS_FSCACHE
- struct fscache_cookie *fscache; /* client index cache cookie */
-#endif
#ifdef CONFIG_CIFS_STATS2
atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */
atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */
@@ -923,6 +921,7 @@ struct cifs_chan {
*/
struct cifs_ses {
struct list_head smb_ses_list;
+ struct list_head rlist; /* reconnect list */
struct list_head tcon_list;
struct cifs_tcon *tcon_ipc;
struct mutex session_mutex;
@@ -1110,7 +1109,7 @@ struct cifs_tcon {
__u32 max_bytes_copy;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
- struct fscache_cookie *fscache; /* cookie for share */
+ struct fscache_volume *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fid crfid; /* Cached root fid */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e0dc147e69a8..d3701295402d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -131,6 +131,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *in_buf ,
struct smb_hdr *out_buf,
int *bytes_returned);
+void
+cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session);
extern int cifs_reconnect(struct TCP_Server_Info *server,
bool mark_smb_session);
extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
@@ -647,6 +650,11 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
int match_target_ip(struct TCP_Server_Info *server,
const char *share, size_t share_len,
bool *result);
+
+int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *dfs_link_path);
#endif
static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0f36deff790e..11a22a30ee14 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -168,7 +168,7 @@ static void cifs_resolve_server(struct work_struct *work)
* @server needs to be previously set to CifsNeedReconnect.
*
*/
-static void
+void
cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
bool mark_smb_session)
{
@@ -181,24 +181,26 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
server->maxBuf = 0;
server->max_read = 0;
- cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
- trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
/*
* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
* are not used until reconnected.
*/
- cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__);
+ cifs_dbg(FYI, "%s: marking necessary sessions and tcons for reconnect\n", __func__);
/* If server is a channel, select the primary channel */
pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
spin_lock(&ses->chan_lock);
if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server))
goto next_session;
- cifs_chan_set_need_reconnect(ses, server);
+ if (mark_smb_session)
+ CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses);
+ else
+ cifs_chan_set_need_reconnect(ses, server);
/* If all channels need reconnect, then tcon needs reconnect */
if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses))
@@ -218,13 +220,8 @@ next_session:
}
spin_unlock(&cifs_tcp_ses_lock);
- /*
- * before reconnecting the tcp session, mark the smb session (uid)
- * and the tid bad so they are not used until reconnected
- */
- cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect and tearing down socket\n",
- __func__);
/* do not want to be sending data on a socket we are freeing */
+ cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
mutex_lock(&server->srv_mutex);
if (server->ssocket) {
cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state,
@@ -280,7 +277,12 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num
wake_up(&server->response_q);
return false;
}
+
+ cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
+ trace_smb3_reconnect(server->CurrentMid, server->conn_id,
+ server->hostname);
server->tcpStatus = CifsNeedReconnect;
+
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
@@ -335,11 +337,14 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
}
} while (server->tcpStatus == CifsNeedReconnect);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate)
mod_delayed_work(cifsiod_wq, &server->echo, 0);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up(&server->response_q);
return rc;
@@ -454,6 +459,7 @@ reconnect_dfs_server(struct TCP_Server_Info *server,
spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
} while (server->tcpStatus == CifsNeedReconnect);
if (target_hint)
@@ -633,7 +639,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
if (server->tcpStatus == CifsNeedReconnect) {
spin_unlock(&cifs_tcp_ses_lock);
- cifs_reconnect(server, false);
return -ECONNABORTED;
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -1439,10 +1444,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
cifs_crypto_secmech_release(server);
- /* fscache server cookies are based on primary channel only */
- if (!CIFS_SERVER_IS_CHAN(server))
- cifs_fscache_release_client_cookie(server);
-
kfree(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
@@ -1604,14 +1605,6 @@ smbd_connected:
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- /* fscache server cookies are based on primary channel only */
- if (!CIFS_SERVER_IS_CHAN(tcp_ses))
- cifs_fscache_get_client_cookie(tcp_ses);
-#ifdef CONFIG_CIFS_FSCACHE
- else
- tcp_ses->fscache = tcp_ses->primary_server->fscache;
-#endif /* CONFIG_CIFS_FSCACHE */
-
/* queue echo request delayed work */
queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -1832,7 +1825,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
spin_lock(&ses->chan_lock);
chan_count = ses->chan_count;
- spin_unlock(&ses->chan_lock);
/* close any extra channels */
if (chan_count > 1) {
@@ -1849,6 +1841,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
ses->chans[i].server = NULL;
}
}
+ spin_unlock(&ses->chan_lock);
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
@@ -2124,8 +2117,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
mutex_unlock(&ses->session_mutex);
/* each channel uses a different signing key */
+ spin_lock(&ses->chan_lock);
memcpy(ses->chans[0].signkey, ses->smb3signingkey,
sizeof(ses->smb3signingkey));
+ spin_unlock(&ses->chan_lock);
if (rc)
goto get_ses_fail;
@@ -3121,7 +3116,8 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
* Inside cifs_fscache_get_super_cookie it checks
* that we do not get super cookie twice.
*/
- cifs_fscache_get_super_cookie(tcon);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ cifs_fscache_get_super_cookie(tcon);
out:
mnt_ctx->server = server;
@@ -3374,6 +3370,11 @@ static int is_path_remote(struct mount_ctx *mnt_ctx)
rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
full_path);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (rc == -ENOENT && is_tcon_dfs(tcon))
+ rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb,
+ full_path);
+#endif
if (rc != 0 && rc != -EREMOTE) {
kfree(full_path);
return rc;
@@ -3761,10 +3762,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
if (rc == 0) {
bool is_unicode;
- spin_lock(&cifs_tcp_ses_lock);
- tcon->tidStatus = CifsGood;
- spin_unlock(&cifs_tcp_ses_lock);
- tcon->need_reconnect = false;
tcon->tid = smb_buffer_response->Tid;
bcc_ptr = pByteArea(smb_buffer_response);
bytes_left = get_bcc(smb_buffer_response);
@@ -3879,6 +3876,11 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
else
rc = -EHOSTDOWN;
spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInNegotiate)
+ server->tcpStatus = CifsNeedNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
}
return rc;
@@ -3898,7 +3900,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
- ses->status = CifsInSessSetup;
+ server->tcpStatus = CifsInSessSetup;
spin_unlock(&cifs_tcp_ses_lock);
spin_lock(&ses->chan_lock);
@@ -3925,8 +3927,24 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
if (server->ops->sess_setup)
rc = server->ops->sess_setup(xid, ses, server, nls_info);
- if (rc)
+ if (rc) {
cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInSessSetup)
+ server->tcpStatus = CifsNeedSessSetup;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInSessSetup)
+ server->tcpStatus = CifsGood;
+ /* Even if one channel is active, session is in good state */
+ ses->status = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_need_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
+ }
return rc;
}
@@ -4271,17 +4289,6 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t
struct dfs_cache_tgt_iterator *tit;
bool target_match;
- /* only send once per connect */
- spin_lock(&cifs_tcp_ses_lock);
- if (tcon->ses->status != CifsGood ||
- (tcon->tidStatus != CifsNew &&
- tcon->tidStatus != CifsNeedTcon)) {
- spin_unlock(&cifs_tcp_ses_lock);
- return 0;
- }
- tcon->tidStatus = CifsInTcon;
- spin_unlock(&cifs_tcp_ses_lock);
-
extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
tit = dfs_cache_get_tgt_iterator(tl);
@@ -4381,7 +4388,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco
*/
if (rc && server->current_fullpath != server->origin_fullpath) {
server->current_fullpath = server->origin_fullpath;
- cifs_ses_mark_for_reconnect(tcon->ses);
+ cifs_reconnect(tcon->ses->server, true);
}
dfs_cache_free_tgts(tl);
@@ -4399,9 +4406,22 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
char *tree;
struct dfs_info3_param ref = {0};
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ (tcon->tidStatus != CifsNew &&
+ tcon->tidStatus != CifsNeedTcon)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ tcon->tidStatus = CifsInTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
- if (!tree)
- return -ENOMEM;
+ if (!tree) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (tcon->ipc) {
scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
@@ -4433,11 +4453,25 @@ out:
kfree(tree);
cifs_put_tcp_super(sb);
+ if (rc) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+ tcon->need_reconnect = false;
+ }
+
return rc;
}
#else
int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
{
+ int rc;
const struct smb_version_operations *ops = tcon->ses->server->ops;
/* only send once per connect */
@@ -4451,6 +4485,20 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
tcon->tidStatus = CifsInTcon;
spin_unlock(&cifs_tcp_ses_lock);
- return ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (rc) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+ tcon->need_reconnect = false;
+ }
+
+ return rc;
}
#endif
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index e9b0fa2a9614..dd9643751671 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
}
cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
- cifs_ses_mark_for_reconnect(tcon->ses);
+ cifs_reconnect(tcon->ses->server, true);
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6e8e7cc26ae2..ce9b22aecfba 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -22,6 +22,7 @@
#include "cifs_unicode.h"
#include "fs_context.h"
#include "cifs_ioctl.h"
+#include "fscache.h"
static void
renew_parental_timestamps(struct dentry *direntry)
@@ -507,8 +508,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
rc = -ENOMEM;
+ goto out;
}
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+
out:
cifs_put_tlink(tlink);
out_free_xid:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9fee3af83a73..59334be9ed3b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -376,8 +376,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
struct cifsLockInfo *li, *tmp;
struct super_block *sb = inode->i_sb;
- cifs_fscache_release_inode_cookie(inode);
-
/*
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
@@ -570,7 +568,7 @@ int cifs_open(struct inode *inode, struct file *file)
spin_lock(&CIFS_I(inode)->deferred_lock);
cifs_del_deferred_close(cfile);
spin_unlock(&CIFS_I(inode)->deferred_lock);
- goto out;
+ goto use_cache;
} else {
_cifsFileInfo_put(cfile, true, false);
}
@@ -632,8 +630,6 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
- cifs_fscache_set_inode_cookie(inode, file);
-
if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
/*
* Time to set mode which we can not set earlier due to
@@ -652,6 +648,15 @@ int cifs_open(struct inode *inode, struct file *file)
cfile->pid);
}
+use_cache:
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+ if (file->f_flags & O_DIRECT &&
+ (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
+ file->f_flags & O_APPEND))
+ cifs_invalidate_cache(file_inode(file),
+ FSCACHE_INVAL_DIO_WRITE);
+
out:
free_dentry_path(page);
free_xid(xid);
@@ -876,6 +881,8 @@ int cifs_close(struct inode *inode, struct file *file)
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_deferred_close *dclose;
+ cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
+
if (file->private_data != NULL) {
cfile = file->private_data;
file->private_data = NULL;
@@ -886,7 +893,6 @@ int cifs_close(struct inode *inode, struct file *file)
dclose) {
if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
inode->i_ctime = inode->i_mtime = current_time(inode);
- cifs_fscache_update_inode_cookie(inode);
}
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
@@ -4198,10 +4204,12 @@ static vm_fault_t
cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct file *file = vmf->vma->vm_file;
- struct inode *inode = file_inode(file);
- cifs_fscache_wait_on_page_write(inode, page);
+#ifdef CONFIG_CIFS_FSCACHE
+ if (PageFsCache(page) &&
+ wait_on_page_fscache_killable(page) < 0)
+ return VM_FAULT_RETRY;
+#endif
lock_page(page);
return VM_FAULT_LOCKED;
@@ -4275,8 +4283,6 @@ cifs_readv_complete(struct work_struct *work)
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
- else
- cifs_fscache_uncache_page(rdata->mapping->host, page);
got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
@@ -4593,11 +4599,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
kref_put(&rdata->refcount, cifs_readdata_release);
}
- /* Any pages that have been shown to fscache but didn't get added to
- * the pagecache must be uncached before they get returned to the
- * allocator.
- */
- cifs_fscache_readpages_cancel(mapping->host, page_list);
free_xid(xid);
return rc;
}
@@ -4801,17 +4802,19 @@ static int cifs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
return 0;
-
- return cifs_fscache_release_page(page, gfp);
+ if (PageFsCache(page)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ wait_on_page_fscache(page);
+ }
+ fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
+ return true;
}
static void cifs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
- struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
-
- if (offset == 0 && length == PAGE_SIZE)
- cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+ wait_on_page_fscache(page);
}
static int cifs_launder_page(struct page *page)
@@ -4831,7 +4834,7 @@ static int cifs_launder_page(struct page *page)
if (clear_page_dirty_for_io(page))
rc = cifs_writepage_locked(page, &wbc);
- cifs_fscache_invalidate_page(page, page->mapping->host);
+ wait_on_page_fscache(page);
return rc;
}
@@ -4988,6 +4991,19 @@ static void cifs_swap_deactivate(struct file *file)
/* do we need to unpin (or unlock) the file */
}
+/*
+ * Mark a page as having been made dirty and thus needing writeback. We also
+ * need to pin the cache object to write back to.
+ */
+#ifdef CONFIG_CIFS_FSCACHE
+static int cifs_set_page_dirty(struct page *page)
+{
+ return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host));
+}
+#else
+#define cifs_set_page_dirty __set_page_dirty_nobuffers
+#endif
+
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -4995,7 +5011,7 @@ const struct address_space_operations cifs_addr_ops = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = cifs_set_page_dirty,
.releasepage = cifs_release_page,
.direct_IO = cifs_direct_io,
.invalidatepage = cifs_invalidate_page,
@@ -5020,7 +5036,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = cifs_set_page_dirty,
.releasepage = cifs_release_page,
.invalidatepage = cifs_invalidate_page,
.launder_page = cifs_launder_page,
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index e3ed25dc6f3f..7ec35f3f0a5f 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -37,6 +37,8 @@
#include "rfc1002pdu.h"
#include "fs_context.h"
+static DEFINE_MUTEX(cifs_mount_mutex);
+
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
{ Smb_20, SMB20_VERSION_STRING},
@@ -707,10 +709,14 @@ static int smb3_get_tree_common(struct fs_context *fc)
static int smb3_get_tree(struct fs_context *fc)
{
int err = smb3_fs_context_validate(fc);
+ int ret;
if (err)
return err;
- return smb3_get_tree_common(fc);
+ mutex_lock(&cifs_mount_mutex);
+ ret = smb3_get_tree_common(fc);
+ mutex_unlock(&cifs_mount_mutex);
+ return ret;
}
static void smb3_fs_context_free(struct fs_context *fc)
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 003c5f1f4dfb..efaac4d5ff55 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -12,250 +12,136 @@
#include "cifs_fs_sb.h"
#include "cifsproto.h"
-/*
- * Key layout of CIFS server cache index object
- */
-struct cifs_server_key {
- __u64 conn_id;
-} __packed;
-
-/*
- * Get a cookie for a server object keyed by {IPaddress,port,family} tuple
- */
-void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
-{
- struct cifs_server_key key;
-
- /*
- * Check if cookie was already initialized so don't reinitialize it.
- * In the future, as we integrate with newer fscache features,
- * we may want to instead add a check if cookie has changed
- */
- if (server->fscache)
- return;
-
- memset(&key, 0, sizeof(key));
- key.conn_id = server->conn_id;
-
- server->fscache =
- fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
- &cifs_fscache_server_index_def,
- &key, sizeof(key),
- NULL, 0,
- server, 0, true);
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server, server->fscache);
-}
-
-void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
+static void cifs_fscache_fill_volume_coherency(
+ struct cifs_tcon *tcon,
+ struct cifs_fscache_volume_coherency_data *cd)
{
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server, server->fscache);
- fscache_relinquish_cookie(server->fscache, NULL, false);
- server->fscache = NULL;
+ memset(cd, 0, sizeof(*cd));
+ cd->resource_id = cpu_to_le64(tcon->resource_id);
+ cd->vol_create_time = tcon->vol_create_time;
+ cd->vol_serial_number = cpu_to_le32(tcon->vol_serial_number);
}
-void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
+int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
{
+ struct cifs_fscache_volume_coherency_data cd;
struct TCP_Server_Info *server = tcon->ses->server;
+ struct fscache_volume *vcookie;
+ const struct sockaddr *sa = (struct sockaddr *)&server->dstaddr;
+ size_t slen, i;
char *sharename;
- struct cifs_fscache_super_auxdata auxdata;
+ char *key;
+ int ret = -ENOMEM;
- /*
- * Check if cookie was already initialized so don't reinitialize it.
- * In the future, as we integrate with newer fscache features,
- * we may want to instead add a check if cookie has changed
- */
- if (tcon->fscache)
- return;
+ tcon->fscache = NULL;
+ switch (sa->sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ default:
+ cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
+ return -EINVAL;
+ }
+
+ memset(&key, 0, sizeof(key));
sharename = extract_sharename(tcon->treeName);
if (IS_ERR(sharename)) {
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
- tcon->fscache = NULL;
- return;
+ return -EINVAL;
}
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
+ slen = strlen(sharename);
+ for (i = 0; i < slen; i++)
+ if (sharename[i] == '/')
+ sharename[i] = ';';
+
+ key = kasprintf(GFP_KERNEL, "cifs,%pISpc,%s", sa, sharename);
+ if (!key)
+ goto out;
+
+ cifs_fscache_fill_volume_coherency(tcon, &cd);
+ vcookie = fscache_acquire_volume(key,
+ NULL, /* preferred_cache */
+ &cd, sizeof(cd));
+ cifs_dbg(FYI, "%s: (%s/0x%p)\n", __func__, key, vcookie);
+ if (IS_ERR(vcookie)) {
+ if (vcookie != ERR_PTR(-EBUSY)) {
+ ret = PTR_ERR(vcookie);
+ goto out_2;
+ }
+ pr_err("Cache volume key already in use (%s)\n", key);
+ vcookie = NULL;
+ }
- tcon->fscache =
- fscache_acquire_cookie(server->fscache,
- &cifs_fscache_super_index_def,
- sharename, strlen(sharename),
- &auxdata, sizeof(auxdata),
- tcon, 0, true);
+ tcon->fscache = vcookie;
+ ret = 0;
+out_2:
+ kfree(key);
+out:
kfree(sharename);
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server->fscache, tcon->fscache);
+ return ret;
}
void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
{
- struct cifs_fscache_super_auxdata auxdata;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
+ struct cifs_fscache_volume_coherency_data cd;
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache);
- fscache_relinquish_cookie(tcon->fscache, &auxdata, false);
- tcon->fscache = NULL;
-}
-
-static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
- struct cifs_tcon *tcon)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
- cifsi->fscache =
- fscache_acquire_cookie(tcon->fscache,
- &cifs_fscache_inode_object_def,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
- &auxdata, sizeof(auxdata),
- cifsi, cifsi->vfs_inode.i_size, true);
+ cifs_fscache_fill_volume_coherency(tcon, &cd);
+ fscache_relinquish_volume(tcon->fscache, &cd, false);
+ tcon->fscache = NULL;
}
-static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+void cifs_fscache_get_inode_cookie(struct inode *inode)
{
+ struct cifs_fscache_inode_coherency_data cd;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- if (cifsi->fscache)
- return;
-
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE))
- return;
-
- cifs_fscache_acquire_inode_cookie(cifsi, tcon);
+ cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
- cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
- __func__, tcon->fscache, cifsi->fscache);
+ cifsi->fscache =
+ fscache_acquire_cookie(tcon->fscache, 0,
+ &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &cd, sizeof(cd),
+ i_size_read(&cifsi->vfs_inode));
}
-void cifs_fscache_release_inode_cookie(struct inode *inode)
+void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update)
{
- struct cifs_fscache_inode_auxdata auxdata;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- if (cifsi->fscache) {
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
+ if (update) {
+ struct cifs_fscache_inode_coherency_data cd;
+ loff_t i_size = i_size_read(inode);
- cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- /* fscache_relinquish_cookie does not seem to update auxdata */
- fscache_update_cookie(cifsi->fscache, &auxdata);
- fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
- cifsi->fscache = NULL;
+ cifs_fscache_fill_coherency(inode, &cd);
+ fscache_unuse_cookie(cifs_inode_cookie(inode), &cd, &i_size);
+ } else {
+ fscache_unuse_cookie(cifs_inode_cookie(inode), NULL, NULL);
}
}
-void cifs_fscache_update_inode_cookie(struct inode *inode)
+void cifs_fscache_release_inode_cookie(struct inode *inode)
{
- struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- fscache_update_cookie(cifsi->fscache, &auxdata);
- }
-}
-
-void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
-{
- cifs_fscache_enable_inode_cookie(inode);
-}
-
-void cifs_fscache_reset_inode_cookie(struct inode *inode)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- struct fscache_cookie *old = cifsi->fscache;
-
- if (cifsi->fscache) {
- /* retire the current fscache cache and get a new one */
- fscache_relinquish_cookie(cifsi->fscache, NULL, true);
-
- cifs_fscache_acquire_inode_cookie(cifsi, tcon);
- cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
- __func__, cifsi->fscache, old);
+ fscache_relinquish_cookie(cifsi->fscache, false);
+ cifsi->fscache = NULL;
}
}
-int cifs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- if (PageFsCache(page)) {
- struct inode *inode = page->mapping->host;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, page, cifsi->fscache);
- if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
- return 0;
- }
-
- return 1;
-}
-
-static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
- int error)
-{
- cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error);
- if (!error)
- SetPageUptodate(page);
- unlock_page(page);
-}
-
/*
* Retrieve a page from FS-Cache
*/
int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
- int ret;
-
cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
__func__, CIFS_I(inode)->fscache, page, inode);
- ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
- cifs_readpage_from_fscache_complete,
- NULL,
- GFP_KERNEL);
- switch (ret) {
-
- case 0: /* page found in fscache, read submitted */
- cifs_dbg(FYI, "%s: submitted\n", __func__);
- return ret;
- case -ENOBUFS: /* page won't be cached */
- case -ENODATA: /* page not in cache */
- cifs_dbg(FYI, "%s: %d\n", __func__, ret);
- return 1;
-
- default:
- cifs_dbg(VFS, "unknown error ret = %d\n", ret);
- }
- return ret;
+ return -ENOBUFS; // Needs conversion to using netfslib
}
/*
@@ -266,78 +152,19 @@ int __cifs_readpages_from_fscache(struct inode *inode,
struct list_head *pages,
unsigned *nr_pages)
{
- int ret;
-
cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
__func__, CIFS_I(inode)->fscache, *nr_pages, inode);
- ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
- pages, nr_pages,
- cifs_readpage_from_fscache_complete,
- NULL,
- mapping_gfp_mask(mapping));
- switch (ret) {
- case 0: /* read submitted to the cache for all pages */
- cifs_dbg(FYI, "%s: submitted\n", __func__);
- return ret;
-
- case -ENOBUFS: /* some pages are not cached and can't be */
- case -ENODATA: /* some pages are not cached */
- cifs_dbg(FYI, "%s: no page\n", __func__);
- return 1;
-
- default:
- cifs_dbg(FYI, "unknown error ret = %d\n", ret);
- }
-
- return ret;
+ return -ENOBUFS; // Needs conversion to using netfslib
}
void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
{
struct cifsInodeInfo *cifsi = CIFS_I(inode);
- int ret;
WARN_ON(!cifsi->fscache);
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
__func__, cifsi->fscache, page, inode);
- ret = fscache_write_page(cifsi->fscache, page,
- cifsi->vfs_inode.i_size, GFP_KERNEL);
- if (ret != 0)
- fscache_uncache_page(cifsi->fscache, page);
-}
-
-void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
-{
- cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n",
- __func__, CIFS_I(inode)->fscache, inode);
- fscache_readpages_cancel(CIFS_I(inode)->fscache, pages);
-}
-
-void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_wait_on_page_write(cookie, page);
- fscache_uncache_page(cookie, page);
-}
-
-void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_wait_on_page_write(cookie, page);
-}
-
-void __cifs_fscache_uncache_page(struct inode *inode, struct page *page)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_uncache_page(cookie, page);
+ // Needs conversion to using netfslib
}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 9baa1d0f22bd..c6ca49ac33d4 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -13,84 +13,71 @@
#include "cifsglob.h"
-#ifdef CONFIG_CIFS_FSCACHE
-
/*
- * Auxiliary data attached to CIFS superblock within the cache
+ * Coherency data attached to CIFS volume within the cache
*/
-struct cifs_fscache_super_auxdata {
- u64 resource_id; /* unique server resource id */
+struct cifs_fscache_volume_coherency_data {
+ __le64 resource_id; /* unique server resource id */
__le64 vol_create_time;
- u32 vol_serial_number;
+ __le32 vol_serial_number;
} __packed;
/*
- * Auxiliary data attached to CIFS inode within the cache
+ * Coherency data attached to CIFS inode within the cache.
*/
-struct cifs_fscache_inode_auxdata {
- u64 last_write_time_sec;
- u64 last_change_time_sec;
- u32 last_write_time_nsec;
- u32 last_change_time_nsec;
- u64 eof;
+struct cifs_fscache_inode_coherency_data {
+ __le64 last_write_time_sec;
+ __le64 last_change_time_sec;
+ __le32 last_write_time_nsec;
+ __le32 last_change_time_nsec;
};
-/*
- * cache.c
- */
-extern struct fscache_netfs cifs_fscache_netfs;
-extern const struct fscache_cookie_def cifs_fscache_server_index_def;
-extern const struct fscache_cookie_def cifs_fscache_super_index_def;
-extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
-
-extern int cifs_fscache_register(void);
-extern void cifs_fscache_unregister(void);
+#ifdef CONFIG_CIFS_FSCACHE
/*
* fscache.c
*/
-extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
-extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
-extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
+extern int cifs_fscache_get_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
+extern void cifs_fscache_get_inode_cookie(struct inode *inode);
extern void cifs_fscache_release_inode_cookie(struct inode *);
-extern void cifs_fscache_update_inode_cookie(struct inode *inode);
-extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
-extern void cifs_fscache_reset_inode_cookie(struct inode *);
+extern void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update);
+
+static inline
+void cifs_fscache_fill_coherency(struct inode *inode,
+ struct cifs_fscache_inode_coherency_data *cd)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ memset(cd, 0, sizeof(*cd));
+ cd->last_write_time_sec = cpu_to_le64(cifsi->vfs_inode.i_mtime.tv_sec);
+ cd->last_write_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_mtime.tv_nsec);
+ cd->last_change_time_sec = cpu_to_le64(cifsi->vfs_inode.i_ctime.tv_sec);
+ cd->last_change_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_ctime.tv_nsec);
+}
+
-extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
-extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page);
-extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page);
extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
extern int __cifs_readpages_from_fscache(struct inode *,
struct address_space *,
struct list_head *,
unsigned *);
-extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *);
-
extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
-static inline void cifs_fscache_invalidate_page(struct page *page,
- struct inode *inode)
+static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
{
- if (PageFsCache(page))
- __cifs_fscache_invalidate_page(page, inode);
+ return CIFS_I(inode)->fscache;
}
-static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
- struct page *page)
+static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags)
{
- if (PageFsCache(page))
- __cifs_fscache_wait_on_page_write(inode, page);
-}
+ struct cifs_fscache_inode_coherency_data cd;
-static inline void cifs_fscache_uncache_page(struct inode *inode,
- struct page *page)
-{
- if (PageFsCache(page))
- __cifs_fscache_uncache_page(inode, page);
+ cifs_fscache_fill_coherency(inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd,
+ i_size_read(inode), flags);
}
static inline int cifs_readpage_from_fscache(struct inode *inode,
@@ -120,41 +107,21 @@ static inline void cifs_readpage_to_fscache(struct inode *inode,
__cifs_readpage_to_fscache(inode, page);
}
-static inline void cifs_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
+#else /* CONFIG_CIFS_FSCACHE */
+static inline
+void cifs_fscache_fill_coherency(struct inode *inode,
+ struct cifs_fscache_inode_coherency_data *cd)
{
- if (CIFS_I(inode)->fscache)
- return __cifs_fscache_readpages_cancel(inode, pages);
}
-#else /* CONFIG_CIFS_FSCACHE */
-static inline int cifs_fscache_register(void) { return 0; }
-static inline void cifs_fscache_unregister(void) {}
-
-static inline void
-cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
-static inline void
-cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
-static inline void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) {}
-static inline void
-cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
+static inline int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) { return 0; }
+static inline void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
+static inline void cifs_fscache_get_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
-static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {}
-static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
- struct file *filp) {}
-static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
-static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- return 1; /* May release page */
-}
-
-static inline void cifs_fscache_invalidate_page(struct page *page,
- struct inode *inode) {}
-static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
- struct page *page) {}
-static inline void cifs_fscache_uncache_page(struct inode *inode,
- struct page *page) {}
+static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
+static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
+static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
static inline int
cifs_readpage_from_fscache(struct inode *inode, struct page *page)
@@ -173,11 +140,6 @@ static inline int cifs_readpages_from_fscache(struct inode *inode,
static inline void cifs_readpage_to_fscache(struct inode *inode,
struct page *page) {}
-static inline void cifs_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
-{
-}
-
#endif /* CONFIG_CIFS_FSCACHE */
#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 279622e4eb1c..7d8b3ceb2af3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -952,6 +952,12 @@ cifs_get_inode_info(struct inode **inode,
rc = server->ops->query_path_info(xid, tcon, cifs_sb,
full_path, tmp_data,
&adjust_tz, &is_reparse_point);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (rc == -ENOENT && is_tcon_dfs(tcon))
+ rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon,
+ cifs_sb,
+ full_path);
+#endif
data = tmp_data;
}
@@ -1298,10 +1304,7 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
-#ifdef CONFIG_CIFS_FSCACHE
- /* initialize per-inode cache cookie pointer */
- CIFS_I(inode)->fscache = NULL;
-#endif
+ cifs_fscache_get_inode_cookie(inode);
unlock_new_inode(inode);
}
}
@@ -1370,6 +1373,7 @@ iget_no_retry:
iget_failed(inode);
inode = ERR_PTR(rc);
}
+
out:
kfree(path);
free_xid(xid);
@@ -2257,6 +2261,8 @@ cifs_dentry_needs_reval(struct dentry *dentry)
int
cifs_invalidate_mapping(struct inode *inode)
{
+ struct cifs_fscache_inode_coherency_data cd;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
int rc = 0;
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
@@ -2266,7 +2272,8 @@ cifs_invalidate_mapping(struct inode *inode)
__func__, inode);
}
- cifs_fscache_reset_inode_cookie(inode);
+ cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
return rc;
}
@@ -2771,8 +2778,10 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
goto out;
if ((attrs->ia_valid & ATTR_SIZE) &&
- attrs->ia_size != i_size_read(inode))
+ attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
+ }
setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
@@ -2967,8 +2976,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
goto cifs_setattr_exit;
if ((attrs->ia_valid & ATTR_SIZE) &&
- attrs->ia_size != i_size_read(inode))
+ attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
+ }
setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 5148d48d6a35..56598f7dbe00 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1302,4 +1302,53 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
return 0;
}
+
+/** cifs_dfs_query_info_nonascii_quirk
+ * Handle weird Windows SMB server behaviour. It responds with
+ * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request
+ * for "\<server>\<dfsname>\<linkpath>" DFS reference,
+ * where <dfsname> contains non-ASCII unicode symbols.
+ *
+ * Check such DFS reference and emulate -ENOENT if it is actual.
+ */
+int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *linkpath)
+{
+ char *treename, *dfspath, sep;
+ int treenamelen, linkpathlen, rc;
+
+ treename = tcon->treeName;
+ /* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL
+ * messages MUST be encoded with exactly one leading backslash, not two
+ * leading backslashes.
+ */
+ sep = CIFS_DIR_SEP(cifs_sb);
+ if (treename[0] == sep && treename[1] == sep)
+ treename++;
+ linkpathlen = strlen(linkpath);
+ treenamelen = strnlen(treename, MAX_TREE_SIZE + 1);
+ dfspath = kzalloc(treenamelen + linkpathlen + 1, GFP_KERNEL);
+ if (!dfspath)
+ return -ENOMEM;
+ if (treenamelen)
+ memcpy(dfspath, treename, treenamelen);
+ memcpy(dfspath + treenamelen, linkpath, linkpathlen);
+ rc = dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), dfspath, NULL, NULL);
+ if (rc == 0) {
+ cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n",
+ dfspath);
+ rc = -EREMOTE;
+ } else if (rc == -EEXIST) {
+ cifs_dbg(FYI, "DFS ref '%s' is not found, emulate -ENOENT\n",
+ dfspath);
+ rc = -ENOENT;
+ } else {
+ cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc);
+ }
+ kfree(dfspath);
+ return rc;
+}
#endif
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 43b16b6d108c..ebe236b9d9f5 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -896,10 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr)
if (class == ERRSRV && code == ERRbaduid) {
cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n",
code);
- spin_lock(&cifs_tcp_ses_lock);
- if (mid->server->tcpStatus != CifsExiting)
- mid->server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&cifs_tcp_ses_lock);
+ cifs_reconnect(mid->server, false);
}
}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 6d242af536cb..298458404252 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -40,7 +40,7 @@
#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000
#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
/* #define reserved4 0x1000000 */
-#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */
+#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we only set for SMB2+ */
/* #define reserved3 0x4000000 */
/* #define reserved2 0x8000000 */
/* #define reserved1 0x10000000 */
@@ -87,6 +87,30 @@ typedef struct _NEGOTIATE_MESSAGE {
/* followed by WorkstationString */
} __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
+#define NTLMSSP_REVISION_W2K3 0x0F
+
+/* See MS-NLMP section 2.2.2.10 */
+struct ntlmssp_version {
+ __u8 ProductMajorVersion;
+ __u8 ProductMinorVersion;
+ __le16 ProductBuild; /* we send the cifs.ko module version here */
+ __u8 Reserved[3];
+ __u8 NTLMRevisionCurrent; /* currently 0x0F */
+} __packed;
+
+/* see MS-NLMP section 2.2.1.1 */
+struct negotiate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmNegotiate = 1 */
+ __le32 NegotiateFlags;
+ SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */
+ SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */
+ struct ntlmssp_version Version;
+ /* SECURITY_BUFFER */
+ char DomainString[0];
+ /* followed by WorkstationString */
+} __packed;
+
typedef struct _CHALLENGE_MESSAGE {
__u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
__le32 MessageType; /* NtLmChallenge = 2 */
@@ -123,6 +147,10 @@ int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
+int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp);
int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
struct TCP_Server_Info *server,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d12490e12be5..dc3b16d1be09 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -17,6 +17,8 @@
#include "nterr.h"
#include <linux/utsname.h>
#include <linux/slab.h>
+#include <linux/version.h>
+#include "cifsfs.h"
#include "cifs_spnego.h"
#include "smb2proto.h"
#include "fs_context.h"
@@ -65,6 +67,8 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
return false;
}
+/* channel helper functions. assumed that chan_lock is held by caller. */
+
unsigned int
cifs_ses_get_chan_index(struct cifs_ses *ses,
struct TCP_Server_Info *server)
@@ -134,10 +138,10 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
left = ses->chan_max - ses->chan_count;
if (left <= 0) {
+ spin_unlock(&ses->chan_lock);
cifs_dbg(FYI,
"ses already at max_channels (%zu), nothing to open\n",
ses->chan_max);
- spin_unlock(&ses->chan_lock);
return 0;
}
@@ -364,19 +368,6 @@ out:
return rc;
}
-/* Mark all session channels for reconnect */
-void cifs_ses_mark_for_reconnect(struct cifs_ses *ses)
-{
- int i;
-
- for (i = 0; i < ses->chan_count; i++) {
- spin_lock(&cifs_tcp_ses_lock);
- if (ses->chans[i].server->tcpStatus != CifsExiting)
- ses->chans[i].server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&cifs_tcp_ses_lock);
- }
-}
-
static __u32 cifs_ssetup_hdr(struct cifs_ses *ses,
struct TCP_Server_Info *server,
SESSION_SETUP_ANDX *pSMB)
@@ -820,6 +811,74 @@ setup_ntlm_neg_ret:
return rc;
}
+/*
+ * Build ntlmssp blob with additional fields, such as version,
+ * supported by modern servers. For safety limit to SMB3 or later
+ * See notes in MS-NLMP Section 2.2.2.1 e.g.
+ */
+int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer,
+ u16 *buflen,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp)
+{
+ int rc = 0;
+ struct negotiate_message *sec_blob;
+ __u32 flags;
+ unsigned char *tmp;
+ int len;
+
+ len = size_of_ntlmssp_blob(ses, sizeof(struct negotiate_message));
+ *pbuffer = kmalloc(len, GFP_KERNEL);
+ if (!*pbuffer) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+ *buflen = 0;
+ goto setup_ntlm_smb3_neg_ret;
+ }
+ sec_blob = (struct negotiate_message *)*pbuffer;
+
+ memset(*pbuffer, 0, sizeof(struct negotiate_message));
+ memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
+ sec_blob->MessageType = NtLmNegotiate;
+
+ /* BB is NTLMV2 session security format easier to use here? */
+ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
+ NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
+ NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL |
+ NTLMSSP_NEGOTIATE_SIGN | NTLMSSP_NEGOTIATE_VERSION;
+ if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+
+ sec_blob->Version.ProductMajorVersion = LINUX_VERSION_MAJOR;
+ sec_blob->Version.ProductMinorVersion = LINUX_VERSION_PATCHLEVEL;
+ sec_blob->Version.ProductBuild = cpu_to_le16(SMB3_PRODUCT_BUILD);
+ sec_blob->Version.NTLMRevisionCurrent = NTLMSSP_REVISION_W2K3;
+
+ tmp = *pbuffer + sizeof(struct negotiate_message);
+ ses->ntlmssp->client_flags = flags;
+ sec_blob->NegotiateFlags = cpu_to_le32(flags);
+
+ /* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
+ cifs_security_buffer_from_str(&sec_blob->DomainName,
+ NULL,
+ CIFS_MAX_DOMAINNAME_LEN,
+ *pbuffer, &tmp,
+ nls_cp);
+
+ cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+ NULL,
+ CIFS_MAX_WORKSTATION_LEN,
+ *pbuffer, &tmp,
+ nls_cp);
+
+ *buflen = tmp - *pbuffer;
+setup_ntlm_smb3_neg_ret:
+ return rc;
+}
+
+
int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
@@ -1048,16 +1107,6 @@ sess_establish_session(struct sess_data *sess_data)
mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "CIFS session established successfully\n");
- spin_lock(&ses->chan_lock);
- cifs_chan_clear_need_reconnect(ses, server);
- spin_unlock(&ses->chan_lock);
-
- /* Even if one channel is active, session is in good state */
- spin_lock(&cifs_tcp_ses_lock);
- server->tcpStatus = CifsGood;
- ses->status = CifsGood;
- spin_unlock(&cifs_tcp_ses_lock);
-
return 0;
}
@@ -1413,7 +1462,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
&blob_len, ses, server,
sess_data->nls_cp);
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
sess_data->iov[1].iov_len = blob_len;
sess_data->iov[1].iov_base = ntlmsspblob;
@@ -1421,7 +1470,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
rc = sess_sendreceive(sess_data);
@@ -1435,14 +1484,14 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = 0;
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
if (smb_buf->WordCount != 4) {
rc = -EIO;
cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
+ goto out_free_ntlmsspblob;
}
ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
@@ -1456,10 +1505,13 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
cifs_dbg(VFS, "bad security blob length %d\n",
blob_len);
rc = -EINVAL;
- goto out;
+ goto out_free_ntlmsspblob;
}
rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
+
+out_free_ntlmsspblob:
+ kfree(ntlmsspblob);
out:
sess_free_buffer(sess_data);
@@ -1574,7 +1626,7 @@ out_free_ntlmsspblob:
out:
sess_free_buffer(sess_data);
- if (!rc)
+ if (!rc)
rc = sess_establish_session(sess_data);
/* Cleanup */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8d471df69c59..7e7909b1ae11 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -244,10 +244,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
spin_unlock(&ses->chan_lock);
return 0;
}
+ spin_unlock(&ses->chan_lock);
cifs_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d",
tcon->ses->chans_need_reconnect,
tcon->need_reconnect);
- spin_unlock(&ses->chan_lock);
nls_codepage = load_nls_default();
@@ -289,14 +289,18 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
rc = -EHOSTDOWN;
goto failed;
}
- }
-
- if (rc || !tcon->need_reconnect) {
+ } else {
mutex_unlock(&ses->session_mutex);
goto out;
}
+ mutex_unlock(&ses->session_mutex);
skip_sess_setup:
+ mutex_lock(&ses->session_mutex);
+ if (!tcon->need_reconnect) {
+ mutex_unlock(&ses->session_mutex);
+ goto out;
+ }
cifs_mark_open_files_invalid(tcon);
if (tcon->use_persistent)
tcon->need_reopen_files = true;
@@ -1382,17 +1386,6 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "SMB2/3 session established successfully\n");
-
- spin_lock(&ses->chan_lock);
- cifs_chan_clear_need_reconnect(ses, server);
- spin_unlock(&ses->chan_lock);
-
- /* Even if one channel is active, session is in good state */
- spin_lock(&cifs_tcp_ses_lock);
- server->tcpStatus = CifsGood;
- ses->status = CifsGood;
- spin_unlock(&cifs_tcp_ses_lock);
-
return rc;
}
@@ -1513,7 +1506,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
if (rc)
goto out_err;
- rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob,
+ rc = build_ntlmssp_smb3_negotiate_blob(&ntlmssp_blob,
&blob_length, ses, server,
sess_data->nls_cp);
if (rc)
@@ -1920,10 +1913,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->share_flags = le32_to_cpu(rsp->ShareFlags);
tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
- spin_lock(&cifs_tcp_ses_lock);
- tcon->tidStatus = CifsGood;
- spin_unlock(&cifs_tcp_ses_lock);
- tcon->need_reconnect = false;
tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId);
strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
@@ -2587,8 +2576,13 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
cp = load_nls_default();
cifs_strtoUTF16(*out_path, treename, treename_len, cp);
- UniStrcat(*out_path, sep);
- UniStrcat(*out_path, path);
+
+ /* Do not append the separator if the path is empty */
+ if (path[0] != cpu_to_le16(0x0000)) {
+ UniStrcat(*out_path, sep);
+ UniStrcat(*out_path, path);
+ }
+
unload_nls(cp);
return 0;
@@ -3782,27 +3776,35 @@ void smb2_reconnect_server(struct work_struct *work)
{
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, reconnect.work);
- struct cifs_ses *ses;
+ struct TCP_Server_Info *pserver;
+ struct cifs_ses *ses, *ses2;
struct cifs_tcon *tcon, *tcon2;
- struct list_head tmp_list;
- int tcon_exist = false;
+ struct list_head tmp_list, tmp_ses_list;
+ bool tcon_exist = false, ses_exist = false;
+ bool tcon_selected = false;
int rc;
- int resched = false;
+ bool resched = false;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
- mutex_lock(&server->reconnect_mutex);
+ mutex_lock(&pserver->reconnect_mutex);
INIT_LIST_HEAD(&tmp_list);
- cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+ INIT_LIST_HEAD(&tmp_ses_list);
+ cifs_dbg(FYI, "Reconnecting tcons and channels\n");
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+
+ tcon_selected = false;
+
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->need_reconnect || tcon->need_reopen_files) {
tcon->tc_count++;
list_add_tail(&tcon->rlist, &tmp_list);
- tcon_exist = true;
+ tcon_selected = tcon_exist = true;
}
}
/*
@@ -3811,15 +3813,27 @@ void smb2_reconnect_server(struct work_struct *work)
*/
if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) {
list_add_tail(&ses->tcon_ipc->rlist, &tmp_list);
- tcon_exist = true;
+ tcon_selected = tcon_exist = true;
ses->ses_count++;
}
+ /*
+ * handle the case where channel needs to reconnect
+ * binding session, but tcon is healthy (some other channel
+ * is active)
+ */
+ spin_lock(&ses->chan_lock);
+ if (!tcon_selected && cifs_chan_needs_reconnect(ses, server)) {
+ list_add_tail(&ses->rlist, &tmp_ses_list);
+ ses_exist = true;
+ ses->ses_count++;
+ }
+ spin_unlock(&ses->chan_lock);
}
/*
* Get the reference to server struct to be sure that the last call of
* cifs_put_tcon() in the loop below won't release the server pointer.
*/
- if (tcon_exist)
+ if (tcon_exist || ses_exist)
server->srv_count++;
spin_unlock(&cifs_tcp_ses_lock);
@@ -3837,13 +3851,41 @@ void smb2_reconnect_server(struct work_struct *work)
cifs_put_tcon(tcon);
}
- cifs_dbg(FYI, "Reconnecting tcons finished\n");
+ if (!ses_exist)
+ goto done;
+
+ /* allocate a dummy tcon struct used for reconnect */
+ tcon = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL);
+ if (!tcon) {
+ resched = true;
+ list_del_init(&ses->rlist);
+ cifs_put_smb_ses(ses);
+ goto done;
+ }
+
+ tcon->tidStatus = CifsGood;
+ tcon->retry = false;
+ tcon->need_reconnect = false;
+
+ /* now reconnect sessions for necessary channels */
+ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
+ tcon->ses = ses;
+ rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+ if (rc)
+ resched = true;
+ list_del_init(&ses->rlist);
+ cifs_put_smb_ses(ses);
+ }
+ kfree(tcon);
+
+done:
+ cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
if (resched)
queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
- mutex_unlock(&server->reconnect_mutex);
+ mutex_unlock(&pserver->reconnect_mutex);
/* now we can safely release srv struct */
- if (tcon_exist)
+ if (tcon_exist || ses_exist)
cifs_put_tcp_session(server, 1);
}
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index b70a49b4edc0..2af79093b78b 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -100,6 +100,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
goto out;
found:
+ spin_lock(&ses->chan_lock);
if (cifs_chan_needs_reconnect(ses, server) &&
!CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
/*
@@ -108,6 +109,7 @@ found:
* session key
*/
memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
goto out;
}
@@ -119,9 +121,11 @@ found:
chan = ses->chans + i;
if (chan->server == server) {
memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
goto out;
}
}
+ spin_unlock(&ses->chan_lock);
cifs_dbg(VFS,
"%s: Could not find channel signing key for session 0x%llx\n",
@@ -430,8 +434,10 @@ generate_smb3signingkey(struct cifs_ses *ses,
return rc;
/* safe to access primary channel, since it will never go away */
+ spin_lock(&ses->chan_lock);
memcpy(ses->chans[0].signkey, ses->smb3signingkey,
SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
rc = generate_key(ses, ptriplet->encryption.label,
ptriplet->encryption.context,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 93f0e8c1ea23..8540f7c13eae 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -431,7 +431,8 @@ unmask:
* socket so the server throws away the partial SMB
*/
spin_lock(&cifs_tcp_ses_lock);
- server->tcpStatus = CifsNeedReconnect;
+ if (server->tcpStatus != CifsExiting)
+ server->tcpStatus = CifsNeedReconnect;
spin_unlock(&cifs_tcp_ses_lock);
trace_smb3_partial_send_reconnect(server->CurrentMid,
server->conn_id, server->hostname);
@@ -729,17 +730,6 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
struct mid_q_entry **ppmidQ)
{
spin_lock(&cifs_tcp_ses_lock);
- if (ses->server->tcpStatus == CifsExiting) {
- spin_unlock(&cifs_tcp_ses_lock);
- return -ENOENT;
- }
-
- if (ses->server->tcpStatus == CifsNeedReconnect) {
- spin_unlock(&cifs_tcp_ses_lock);
- cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
- return -EAGAIN;
- }
-
if (ses->status == CifsNew) {
if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
(in_buf->Command != SMB_COM_NEGOTIATE)) {
@@ -1059,7 +1049,10 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
/* round robin */
index = (uint)atomic_inc_return(&ses->chan_seq);
+
+ spin_lock(&ses->chan_lock);
index %= ses->chan_count;
+ spin_unlock(&ses->chan_lock);
return ses->chans[index].server;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 1466b5d01cbb..d3cd2a94d1e8 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1780,8 +1780,8 @@ void configfs_unregister_group(struct config_group *group)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ d_drop(dentry);
fsnotify_rmdir(d_inode(parent), dentry);
- d_delete(dentry);
inode_unlock(d_inode(parent));
dput(dentry);
@@ -1922,10 +1922,10 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(dentry));
- d_delete(dentry);
+ d_drop(dentry);
+ fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(root));
diff --git a/fs/coredump.c b/fs/coredump.c
index 7dece20b162b..1c060c0a2d72 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/path.h>
#include <linux/timekeeping.h>
+#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -52,9 +53,9 @@
#include <trace/events/sched.h>
-int core_uses_pid;
-unsigned int core_pipe_limit;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
+static int core_uses_pid;
+static unsigned int core_pipe_limit;
+static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
struct core_name {
@@ -62,8 +63,6 @@ struct core_name {
int used, size;
};
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static int expand_corename(struct core_name *cn, int size)
{
char *corename = krealloc(cn->corename, size, GFP_KERNEL);
@@ -893,6 +892,63 @@ int dump_align(struct coredump_params *cprm, int align)
}
EXPORT_SYMBOL(dump_align);
+#ifdef CONFIG_SYSCTL
+
+void validate_coredump_safety(void)
+{
+ if (suid_dumpable == SUID_DUMP_ROOT &&
+ core_pattern[0] != '/' && core_pattern[0] != '|') {
+ pr_warn(
+"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
+"Pipe handler or fully qualified core dump path required.\n"
+"Set kernel.core_pattern before fs.suid_dumpable.\n"
+ );
+ }
+}
+
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dostring(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table coredump_sysctls[] = {
+ {
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "core_pattern",
+ .data = core_pattern,
+ .maxlen = CORENAME_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring_coredump,
+ },
+ {
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static int __init init_fs_coredump_sysctls(void)
+{
+ register_sysctl_init("kernel", coredump_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_coredump_sysctls);
+#endif /* CONFIG_SYSCTL */
+
/*
* The purpose of always_dump_vma() is to make sure that special kernel mappings
* that are useful for post-mortem analysis are included in every core dump.
diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fd..c84269c6e8bf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -115,10 +115,13 @@ static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
}
-
-/* Statistics gathering. */
-struct dentry_stat_t dentry_stat = {
- .age_limit = 45,
+struct dentry_stat_t {
+ long nr_dentry;
+ long nr_unused;
+ long age_limit; /* age in seconds */
+ long want_pages; /* pages requested by system */
+ long nr_negative; /* # of unused negative dentries */
+ long dummy; /* Reserved for future use */
};
static DEFINE_PER_CPU(long, nr_dentry);
@@ -126,6 +129,10 @@ static DEFINE_PER_CPU(long, nr_dentry_unused);
static DEFINE_PER_CPU(long, nr_dentry_negative);
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+/* Statistics gathering. */
+static struct dentry_stat_t dentry_stat = {
+ .age_limit = 45,
+};
/*
* Here we resort to our own counters instead of using generic per-cpu counters
@@ -167,14 +174,32 @@ static long get_nr_dentry_negative(void)
return sum < 0 ? 0 : sum;
}
-int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
dentry_stat.nr_dentry = get_nr_dentry();
dentry_stat.nr_unused = get_nr_dentry_unused();
dentry_stat.nr_negative = get_nr_dentry_negative();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
+
+static struct ctl_table fs_dcache_sysctls[] = {
+ {
+ .procname = "dentry-state",
+ .data = &dentry_stat,
+ .maxlen = 6*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_dentry,
+ },
+ { }
+};
+
+static int __init init_fs_dcache_sysctls(void)
+{
+ register_sysctl_init("fs", fs_dcache_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_dcache_sysctls);
#endif
/*
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 42e5a766d33c..4f25015aa534 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry)
dentry->d_fsdata = NULL;
drop_nlink(dentry->d_inode);
- fsnotify_unlink(d_inode(dentry->d_parent), dentry);
d_drop(dentry);
+ fsnotify_unlink(d_inode(dentry->d_parent), dentry);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 06f4c5ae1451..e2daa940ebce 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -307,7 +307,7 @@ static void unlist_file(struct epitems_head *head)
static long long_zero;
static long long_max = LONG_MAX;
-struct ctl_table epoll_table[] = {
+static struct ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
@@ -319,6 +319,13 @@ struct ctl_table epoll_table[] = {
},
{ }
};
+
+static void __init epoll_sysctls_init(void)
+{
+ register_sysctl("fs/epoll", epoll_table);
+}
+#else
+#define epoll_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static const struct file_operations eventpoll_fops;
@@ -2378,6 +2385,7 @@ static int __init eventpoll_init(void)
/* Allocates slab cache used to allocate "struct eppoll_entry" */
pwq_cache = kmem_cache_create("eventpoll_pwq",
sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ epoll_sysctls_init();
ephead_cache = kmem_cache_create("ep_head",
sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
diff --git a/fs/exec.c b/fs/exec.c
index 82db656ca709..79f2c9483302 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,6 +65,7 @@
#include <linux/vmalloc.h>
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
+#include <linux/coredump.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -1207,7 +1208,8 @@ static int unshare_sighand(struct task_struct *me)
char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
task_lock(tsk);
- strncpy(buf, tsk->comm, buf_size);
+ /* Always NUL terminated and zero-padded */
+ strscpy_pad(buf, tsk->comm, buf_size);
task_unlock(tsk);
return buf;
}
@@ -1222,7 +1224,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
- strlcpy(tsk->comm, buf, sizeof(tsk->comm));
+ strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk, exec);
}
@@ -2098,3 +2100,37 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
argv, envp, flags);
}
#endif
+
+#ifdef CONFIG_SYSCTL
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table fs_exec_sysctls[] = {
+ {
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_exec_sysctls(void)
+{
+ register_sysctl_init("fs", fs_exec_sysctls);
+ return 0;
+}
+
+fs_initcall(init_fs_exec_sysctls);
+#endif /* CONFIG_SYSCTL */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cf2fd9fc7d98..9f86dd947032 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2834,7 +2834,7 @@ out:
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group;
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2845,7 +2845,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group;
++*pos;
@@ -2857,7 +2857,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
int err, buddy_loaded = 0;
@@ -2985,7 +2985,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
@@ -2998,7 +2998,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
++*pos;
@@ -3010,7 +3010,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long position = ((unsigned long) v);
struct ext4_group_info *grp;
@@ -3058,7 +3058,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
__releases(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 3db923403505..4cd62f1d848c 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -43,7 +43,6 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
-#include <linux/cleancache.h>
#include "ext4.h"
@@ -350,11 +349,6 @@ int ext4_mpage_readpages(struct inode *inode,
} else if (fully_mapped) {
SetPageMappedToDisk(page);
}
- if (fully_mapped && blocks_per_page == 1 &&
- !PageUptodate(page) && cleancache_get_page(page) == 0) {
- SetPageUptodate(page);
- goto confused;
- }
/*
* This page will go to BIO. Do we need to send this
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index db9fe4843529..eee0d9ebfa6c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -39,7 +39,6 @@
#include <linux/log2.h>
#include <linux/crc16.h>
#include <linux/dax.h>
-#include <linux/cleancache.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
@@ -3149,8 +3148,6 @@ done:
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt, sbi->s_mount_opt2);
-
- cleancache_init_fs(sb);
return err;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0a1d236212f8..8c417864c66a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -18,7 +18,6 @@
#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
-#include <linux/cleancache.h>
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
#include <linux/iomap.h>
@@ -2035,12 +2034,6 @@ got_it:
block_nr = map->m_pblk + block_in_file - map->m_lblk;
SetPageMappedToDisk(page);
- if (!PageUptodate(page) && (!PageSwapCache(page) &&
- !cleancache_get_page(page))) {
- SetPageUptodate(page);
- goto confused;
- }
-
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
ret = -EFSCORRUPTED;
@@ -2096,12 +2089,6 @@ submit_and_realloc:
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
-confused:
- if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- unlock_page(page);
out:
*bio_ret = bio;
return ret;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 575d3dc418d0..1dabc8244083 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2555,8 +2555,8 @@ find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
- secno = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ secno = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
} else {
go_left = 1;
@@ -2571,8 +2571,8 @@ find_other_zone:
left_start--;
continue;
}
- left_start = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ left_start = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
break;
}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 13855ba49cd9..a5a309fcc7fa 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -175,9 +175,10 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static int fat_file_release(struct inode *inode, struct file *filp)
{
if ((filp->f_mode & FMODE_WRITE) &&
- MSDOS_SB(inode->i_sb)->options.flush) {
+ MSDOS_SB(inode->i_sb)->options.flush) {
fat_flush_inodes(inode->i_sb, inode, NULL);
- congestion_wait(BLK_RW_ASYNC, HZ/10);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ io_schedule_timeout(HZ/10);
}
return 0;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 45437f8e1003..57edef16dce4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -33,7 +33,7 @@
#include "internal.h"
/* sysctl tunables... */
-struct files_stat_struct files_stat = {
+static struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
@@ -75,22 +75,53 @@ unsigned long get_max_files(void)
}
EXPORT_SYMBOL_GPL(get_max_files);
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+
/*
* Handle nr_files sysctl
*/
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_nr_files(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
-#else
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+
+static struct ctl_table fs_stat_sysctls[] = {
+ {
+ .procname = "file-nr",
+ .data = &files_stat,
+ .maxlen = sizeof(files_stat),
+ .mode = 0444,
+ .proc_handler = proc_nr_files,
+ },
+ {
+ .procname = "file-max",
+ .data = &files_stat.max_files,
+ .maxlen = sizeof(files_stat.max_files),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = SYSCTL_LONG_ZERO,
+ .extra2 = SYSCTL_LONG_MAX,
+ },
+ {
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ { }
+};
+
+static int __init init_fs_stat_sysctls(void)
{
- return -ENOSYS;
+ register_sysctl_init("fs", fs_stat_sysctls);
+ return 0;
}
+fs_initcall(init_fs_stat_sysctls);
#endif
static struct file *__alloc_file(int flags, const struct cred *cred)
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a57c6cbee858..f2aa7dbad766 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -142,12 +142,12 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
wait_var_event_timeout(&candidate->flags,
- fscache_is_acquire_pending(candidate), 20 * HZ);
+ !fscache_is_acquire_pending(candidate), 20 * HZ);
if (!fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
- wait_var_event(&candidate->flags, fscache_is_acquire_pending(candidate));
+ wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
}
}
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 456e87aec7fd..68b4240c6191 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -260,8 +260,10 @@ struct hfsplus_cat_folder {
__be32 access_date;
__be32 backup_date;
struct hfsplus_perm permissions;
- struct DInfo user_info;
- struct DXInfo finder_info;
+ struct_group_attr(info, __packed,
+ struct DInfo user_info;
+ struct DXInfo finder_info;
+ );
__be32 text_encoding;
__be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */
} __packed;
@@ -294,8 +296,10 @@ struct hfsplus_cat_file {
__be32 access_date;
__be32 backup_date;
struct hfsplus_perm permissions;
- struct FInfo user_info;
- struct FXInfo finder_info;
+ struct_group_attr(info, __packed,
+ struct FInfo user_info;
+ struct FXInfo finder_info;
+ );
__be32 text_encoding;
u32 reserved2;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e2855ceefd39..49891b12c415 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -296,7 +296,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
sizeof(hfsplus_cat_entry));
if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
if (size == folder_finderinfo_len) {
- memcpy(&entry.folder.user_info, value,
+ memcpy(&entry.folder.info, value,
folder_finderinfo_len);
hfs_bnode_write(cat_fd.bnode, &entry,
cat_fd.entryoffset,
@@ -309,7 +309,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
}
} else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
if (size == file_finderinfo_len) {
- memcpy(&entry.file.user_info, value,
+ memcpy(&entry.file.info, value,
file_finderinfo_len);
hfs_bnode_write(cat_fd.bnode, &entry,
cat_fd.entryoffset,
diff --git a/fs/inode.c b/fs/inode.c
index 980e7b7a5460..63324df6fa27 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -67,11 +67,6 @@ const struct address_space_operations empty_aops = {
};
EXPORT_SYMBOL(empty_aops);
-/*
- * Statistics gathering..
- */
-struct inodes_stat_t inodes_stat;
-
static DEFINE_PER_CPU(unsigned long, nr_inodes);
static DEFINE_PER_CPU(unsigned long, nr_unused);
@@ -106,13 +101,43 @@ long get_nr_dirty_inodes(void)
* Handle nr_inode sysctl
*/
#ifdef CONFIG_SYSCTL
-int proc_nr_inodes(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+/*
+ * Statistics gathering..
+ */
+static struct inodes_stat_t inodes_stat;
+
+static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
+
+static struct ctl_table inodes_sysctls[] = {
+ {
+ .procname = "inode-nr",
+ .data = &inodes_stat,
+ .maxlen = 2*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "inode-state",
+ .data = &inodes_stat,
+ .maxlen = 7*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ { }
+};
+
+static int __init init_fs_inode_sysctls(void)
+{
+ register_sysctl_init("fs", inodes_sysctls);
+ return 0;
+}
+early_initcall(init_fs_inode_sysctls);
#endif
static int no_open(struct inode *inode, struct file *file)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index a7763127f884..bb7f161bb19c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -48,7 +48,8 @@ struct io_worker {
struct io_wqe *wqe;
struct io_wq_work *cur_work;
- spinlock_t lock;
+ struct io_wq_work *next_work;
+ raw_spinlock_t lock;
struct completion ref_done;
@@ -405,8 +406,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
*/
-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
- struct io_wq_work *work)
+static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
__must_hold(wqe->lock)
{
if (worker->flags & IO_WORKER_F_FREE) {
@@ -529,9 +529,10 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock(&worker->lock);
+ raw_spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock(&worker->lock);
+ worker->next_work = NULL;
+ raw_spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -546,7 +547,7 @@ static void io_worker_handle_work(struct io_worker *worker)
do {
struct io_wq_work *work;
-get_next:
+
/*
* If we got some work, mark us as busy. If we didn't, but
* the list isn't empty, it means we stalled on hashed work.
@@ -555,9 +556,20 @@ get_next:
* clear the stalled flag.
*/
work = io_get_next_work(acct, worker);
- if (work)
- __io_worker_busy(wqe, worker, work);
-
+ if (work) {
+ __io_worker_busy(wqe, worker);
+
+ /*
+ * Make sure cancelation can find this, even before
+ * it becomes the active work. That avoids a window
+ * where the work has been removed from our general
+ * work list, but isn't yet discoverable as the
+ * current work item for this worker.
+ */
+ raw_spin_lock(&worker->lock);
+ worker->next_work = work;
+ raw_spin_unlock(&worker->lock);
+ }
raw_spin_unlock(&wqe->lock);
if (!work)
break;
@@ -594,11 +606,6 @@ get_next:
spin_unlock_irq(&wq->hash->wait.lock);
if (wq_has_sleeper(&wq->hash->wait))
wake_up(&wq->hash->wait);
- raw_spin_lock(&wqe->lock);
- /* skip unnecessary unlock-lock wqe->lock */
- if (!work)
- goto get_next;
- raw_spin_unlock(&wqe->lock);
}
} while (work);
@@ -815,7 +822,7 @@ fail:
refcount_set(&worker->ref, 1);
worker->wqe = wqe;
- spin_lock_init(&worker->lock);
+ raw_spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
@@ -973,6 +980,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
+static bool __io_wq_worker_cancel(struct io_worker *worker,
+ struct io_cb_cancel_data *match,
+ struct io_wq_work *work)
+{
+ if (work && match->fn(work, match->data)) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ set_notify_signal(worker->task);
+ return true;
+ }
+
+ return false;
+}
+
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@@ -981,13 +1001,11 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock(&worker->lock);
- if (worker->cur_work &&
- match->fn(worker->cur_work, match->data)) {
- set_notify_signal(worker->task);
+ raw_spin_lock(&worker->lock);
+ if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
+ __io_wq_worker_cancel(worker, match, worker->next_work))
match->nr_running++;
- }
- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
@@ -1039,17 +1057,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
int i;
retry:
- raw_spin_lock(&wqe->lock);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
+ raw_spin_lock(&wqe->lock);
if (match->cancel_all)
goto retry;
- return;
+ break;
}
}
- raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1074,25 +1091,27 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
* First check pending list, if we're lucky we can just remove it
* from there. CANCEL_OK means that the work is returned as-new,
* no completion will be posted for it.
- */
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- io_wqe_cancel_pending_work(wqe, &match);
- if (match.nr_pending && !match.cancel_all)
- return IO_WQ_CANCEL_OK;
- }
-
- /*
- * Now check if a free (going busy) or busy worker has the work
+ *
+ * Then check if a free (going busy) or busy worker has the work
* currently running. If we find it there, we'll return CANCEL_RUNNING
* as an indication that we attempt to signal cancellation. The
* completion will run normally in this case.
+ *
+ * Do both of these while holding the wqe->lock, to ensure that
+ * we'll find a work item regardless of state.
*/
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
+ io_wqe_cancel_pending_work(wqe, &match);
+ if (match.nr_pending && !match.cancel_all) {
+ raw_spin_unlock(&wqe->lock);
+ return IO_WQ_CANCEL_OK;
+ }
+
io_wqe_cancel_running_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
return IO_WQ_CANCEL_RUNNING;
}
@@ -1263,7 +1282,9 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all,
.cancel_all = true,
};
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index de9c9de90655..2e04f718319d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
return atomic_dec_and_test(&req->refs);
}
-static inline void req_ref_put(struct io_kiocb *req)
-{
- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
- WARN_ON_ONCE(req_ref_put_and_test(req));
-}
-
static inline void req_ref_get(struct io_kiocb *req)
{
WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
@@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
{
- struct wait_queue_head *head = poll->head;
+ struct wait_queue_head *head = smp_load_acquire(&poll->head);
- spin_lock_irq(&head->lock);
- list_del_init(&poll->wait.entry);
- poll->head = NULL;
- spin_unlock_irq(&head->lock);
+ if (head) {
+ spin_lock_irq(&head->lock);
+ list_del_init(&poll->wait.entry);
+ poll->head = NULL;
+ spin_unlock_irq(&head->lock);
+ }
}
static void io_poll_remove_entries(struct io_kiocb *req)
@@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
struct io_poll_iocb *poll = io_poll_get_single(req);
struct io_poll_iocb *poll_double = io_poll_get_double(req);
- if (poll->head)
- io_poll_remove_entry(poll);
- if (poll_double && poll_double->head)
+ /*
+ * While we hold the waitqueue lock and the waitqueue is nonempty,
+ * wake_up_pollfree() will wait for us. However, taking the waitqueue
+ * lock in the first place can race with the waitqueue being freed.
+ *
+ * We solve this as eventpoll does: by taking advantage of the fact that
+ * all users of wake_up_pollfree() will RCU-delay the actual free. If
+ * we enter rcu_read_lock() and see that the pointer to the queue is
+ * non-NULL, we can then lock it without the memory being freed out from
+ * under us.
+ *
+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+ * case the caller deletes the entry from the queue, leaving it empty.
+ * In that case, only RCU prevents the queue memory from being freed.
+ */
+ rcu_read_lock();
+ io_poll_remove_entry(poll);
+ if (poll_double)
io_poll_remove_entry(poll_double);
+ rcu_read_unlock();
}
/*
@@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
wait);
__poll_t mask = key_to_poll(key);
+ if (unlikely(mask & POLLFREE)) {
+ io_poll_mark_cancelled(req);
+ /* we have to kick tw in case it's not already */
+ io_poll_execute(req, 0);
+
+ /*
+ * If the waitqueue is being freed early but someone is already
+ * holds ownership over it, we have to tear down the request as
+ * best we can. That means immediately removing the request from
+ * its waitqueue and preventing all further accesses to the
+ * waitqueue via the request.
+ */
+ list_del_init(&poll->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon
+ * as req->head is NULL'ed out, the request can be
+ * completed and freed, since aio_poll_complete_work()
+ * will no longer need to take the waitqueue lock.
+ */
+ smp_store_release(&poll->head, NULL);
+ return 1;
+ }
+
/* for instances that support it check for an event match first */
if (mask && !(mask & poll->events))
return 0;
@@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT)
- return ret;
+ /*
+ * Fall-through even for -EALREADY, as we may have poll armed
+ * that need unarming.
+ */
+ if (!ret)
+ return 0;
spin_lock(&ctx->completion_lock);
+ ret = io_poll_cancel(ctx, sqe_addr, false);
+ if (ret != -ENOENT)
+ goto out;
+
spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
spin_unlock_irq(&ctx->timeout_lock);
- if (ret != -ENOENT)
- goto out;
- ret = io_poll_cancel(ctx, sqe_addr, false);
out:
spin_unlock(&ctx->completion_lock);
return ret;
@@ -7781,10 +7822,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
unsigned long flags;
bool first_add = false;
+ unsigned long delay = HZ;
spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
node->done = true;
+ /* if we are mid-quiesce then do not delay */
+ if (node->rsrc_data->quiesce)
+ delay = 0;
+
while (!list_empty(&ctx->rsrc_ref_list)) {
node = list_first_entry(&ctx->rsrc_ref_list,
struct io_rsrc_node, node);
@@ -7797,10 +7843,10 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
if (first_add)
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
+static struct io_rsrc_node *io_rsrc_node_alloc(void)
{
struct io_rsrc_node *ref_node;
@@ -7851,7 +7897,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
- ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
+ ctx->rsrc_backup_node = io_rsrc_node_alloc();
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0b86a4365b66..bf108d4493d2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1212,7 +1212,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
static int jbd2_seq_info_open(struct inode *inode, struct file *file)
{
- journal_t *journal = PDE_DATA(inode);
+ journal_t *journal = pde_data(inode);
struct jbd2_stats_proc_session *s;
int rc, size;
@@ -2972,6 +2972,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
jbd_unlock_bh_journal_head(bh);
return jh;
}
+EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
static void __journal_remove_journal_head(struct buffer_head *bh)
{
@@ -3024,6 +3025,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
jbd_unlock_bh_journal_head(bh);
}
}
+EXPORT_SYMBOL(jbd2_journal_put_journal_head);
/*
* Initialize jbd inode head
diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c
index b014f4638610..c03eba090368 100644
--- a/fs/ksmbd/asn1.c
+++ b/fs/ksmbd/asn1.c
@@ -21,101 +21,11 @@
#include "ksmbd_spnego_negtokeninit.asn1.h"
#include "ksmbd_spnego_negtokentarg.asn1.h"
-#define SPNEGO_OID_LEN 7
#define NTLMSSP_OID_LEN 10
-#define KRB5_OID_LEN 7
-#define KRB5U2U_OID_LEN 8
-#define MSKRB5_OID_LEN 7
-static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
-static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
-static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
-static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
-static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
0x82, 0x37, 0x02, 0x02, 0x0a };
-static bool
-asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
- unsigned long *subid)
-{
- const unsigned char *ptr = *begin;
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (ptr >= end)
- return false;
-
- ch = *ptr++;
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
-
- *begin = ptr;
- return true;
-}
-
-static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
- unsigned long **oid, size_t *oidlen)
-{
- const unsigned char *iptr = value, *end = value + vlen;
- unsigned long *optr;
- unsigned long subid;
-
- vlen += 1;
- if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
- goto fail_nullify;
-
- *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
- if (!*oid)
- return false;
-
- optr = *oid;
-
- if (!asn1_subid_decode(&iptr, end, &subid))
- goto fail;
-
- if (subid < 40) {
- optr[0] = 0;
- optr[1] = subid;
- } else if (subid < 80) {
- optr[0] = 1;
- optr[1] = subid - 40;
- } else {
- optr[0] = 2;
- optr[1] = subid - 80;
- }
-
- *oidlen = 2;
- optr += 2;
-
- while (iptr < end) {
- if (++(*oidlen) > vlen)
- goto fail;
-
- if (!asn1_subid_decode(&iptr, end, optr++))
- goto fail;
- }
- return true;
-
-fail:
- kfree(*oid);
-fail_nullify:
- *oid = NULL;
- return false;
-}
-
-static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
- unsigned long *oid2, unsigned int oid2len)
-{
- if (oid1len != oid2len)
- return false;
-
- return memcmp(oid1, oid2, oid1len) == 0;
-}
-
int
ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
struct ksmbd_conn *conn)
@@ -252,26 +162,18 @@ int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
const void *value, size_t vlen)
{
- unsigned long *oid;
- size_t oidlen;
- int err = 0;
-
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
- err = -EBADMSG;
- goto out;
- }
+ enum OID oid;
- if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
- err = -EBADMSG;
- kfree(oid);
-out:
- if (err) {
+ oid = look_up_OID(value, vlen);
+ if (oid != OID_spnego) {
char buf[50];
sprint_oid(value, vlen, buf, sizeof(buf));
ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
}
- return err;
+
+ return 0;
}
int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
@@ -279,37 +181,31 @@ int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
size_t vlen)
{
struct ksmbd_conn *conn = context;
- unsigned long *oid;
- size_t oidlen;
+ enum OID oid;
int mech_type;
- char buf[50];
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
- goto fail;
-
- if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+ oid = look_up_OID(value, vlen);
+ if (oid == OID_ntlmssp) {
mech_type = KSMBD_AUTH_NTLMSSP;
- else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+ } else if (oid == OID_mskrb5) {
mech_type = KSMBD_AUTH_MSKRB5;
- else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+ } else if (oid == OID_krb5) {
mech_type = KSMBD_AUTH_KRB5;
- else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+ } else if (oid == OID_krb5u2u) {
mech_type = KSMBD_AUTH_KRB5U2U;
- else
- goto fail;
+ } else {
+ char buf[50];
+
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
+ }
conn->auth_mechs |= mech_type;
if (conn->preferred_auth_mech == 0)
conn->preferred_auth_mech = mech_type;
- kfree(oid);
return 0;
-
-fail:
- kfree(oid);
- sprint_oid(value, vlen, buf, sizeof(buf));
- ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
- return -EBADMSG;
}
int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 3503b1c48cb4..dc3d061edda9 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -215,7 +215,7 @@ out:
* Return: 0 on success, error number on error
*/
int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
- int blen, char *domain_name)
+ int blen, char *domain_name, char *cryptkey)
{
char ntlmv2_hash[CIFS_ENCPWD_SIZE];
char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
@@ -256,7 +256,7 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
goto out;
}
- memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(construct, cryptkey, CIFS_CRYPTO_KEY_SIZE);
memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
@@ -295,7 +295,8 @@ out:
* Return: 0 on success, error number on error
*/
int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
- int blob_len, struct ksmbd_session *sess)
+ int blob_len, struct ksmbd_conn *conn,
+ struct ksmbd_session *sess)
{
char *domain_name;
unsigned int nt_off, dn_off;
@@ -324,7 +325,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
/* TODO : use domain name that imported from configuration file */
domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off,
- dn_len, true, sess->conn->local_nls);
+ dn_len, true, conn->local_nls);
if (IS_ERR(domain_name))
return PTR_ERR(domain_name);
@@ -333,7 +334,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
domain_name);
ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
nt_len - CIFS_ENCPWD_SIZE,
- domain_name);
+ domain_name, conn->ntlmssp.cryptkey);
kfree(domain_name);
return ret;
}
@@ -347,7 +348,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
*
*/
int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
- int blob_len, struct ksmbd_session *sess)
+ int blob_len, struct ksmbd_conn *conn)
{
if (blob_len < sizeof(struct negotiate_message)) {
ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
@@ -361,7 +362,7 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
return -EINVAL;
}
- sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+ conn->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
return 0;
}
@@ -375,14 +376,14 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
*/
unsigned int
ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
- struct ksmbd_session *sess)
+ struct ksmbd_conn *conn)
{
struct target_info *tinfo;
wchar_t *name;
__u8 *target_name;
unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
int len, uni_len, conv_len;
- int cflags = sess->ntlmssp.client_flags;
+ int cflags = conn->ntlmssp.client_flags;
memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
chgblob->MessageType = NtLmChallenge;
@@ -403,7 +404,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
if (cflags & NTLMSSP_REQUEST_TARGET)
flags |= NTLMSSP_REQUEST_TARGET;
- if (sess->conn->use_spnego &&
+ if (conn->use_spnego &&
(cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
@@ -414,7 +415,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
return -ENOMEM;
conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
- sess->conn->local_nls);
+ conn->local_nls);
if (conv_len < 0 || conv_len > len) {
kfree(name);
return -EINVAL;
@@ -430,8 +431,8 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
/* Initialize random conn challenge */
- get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
- memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
+ get_random_bytes(conn->ntlmssp.cryptkey, sizeof(__u64));
+ memcpy(chgblob->Challenge, conn->ntlmssp.cryptkey,
CIFS_CRYPTO_KEY_SIZE);
/* Add Target Information to security buffer */
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
index 9c2d4badd05d..95629651cf26 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/ksmbd/auth.h
@@ -38,16 +38,16 @@ struct kvec;
int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
unsigned int nvec, int enc);
void ksmbd_copy_gss_neg_header(void *buf);
-int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
- int blen, char *domain_name);
+ int blen, char *domain_name, char *cryptkey);
int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
- int blob_len, struct ksmbd_session *sess);
+ int blob_len, struct ksmbd_conn *conn,
+ struct ksmbd_session *sess);
int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
- int blob_len, struct ksmbd_session *sess);
+ int blob_len, struct ksmbd_conn *conn);
unsigned int
ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
- struct ksmbd_session *sess);
+ struct ksmbd_conn *conn);
int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
int in_len, char *out_blob, int *out_len);
int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 83a94d0bb480..208d2cff7bd3 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
conn->total_credits = 1;
+ conn->outstanding_credits = 1;
init_waitqueue_head(&conn->req_running_q);
INIT_LIST_HEAD(&conn->conns_list);
@@ -386,17 +387,24 @@ out:
static void stop_sessions(void)
{
struct ksmbd_conn *conn;
+ struct ksmbd_transport *t;
again:
read_lock(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
struct task_struct *task;
- task = conn->transport->handler;
+ t = conn->transport;
+ task = t->handler;
if (task)
ksmbd_debug(CONN, "Stop session handler %s/%d\n",
task->comm, task_pid_nr(task));
conn->status = KSMBD_SESS_EXITING;
+ if (t->ops->shutdown) {
+ read_unlock(&conn_list_lock);
+ t->ops->shutdown(t);
+ read_lock(&conn_list_lock);
+ }
}
read_unlock(&conn_list_lock);
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index e5403c587a58..7a59aacb5daa 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -61,8 +61,8 @@ struct ksmbd_conn {
atomic_t req_running;
/* References which are made for this Server object*/
atomic_t r_count;
- unsigned short total_credits;
- unsigned short max_credits;
+ unsigned int total_credits;
+ unsigned int outstanding_credits;
spinlock_t credits_lock;
wait_queue_head_t req_running_q;
/* Lock to protect requests list*/
@@ -72,12 +72,7 @@ struct ksmbd_conn {
int connection_type;
struct ksmbd_stats stats;
char ClientGUID[SMB2_CLIENT_GUID_SIZE];
- union {
- /* pending trans request table */
- struct trans_state *recent_trans;
- /* Used by ntlmssp */
- char *ntlmssp_cryptkey;
- };
+ struct ntlmssp_auth ntlmssp;
spinlock_t llist_lock;
struct list_head lock_list;
@@ -122,6 +117,7 @@ struct ksmbd_conn_ops {
struct ksmbd_transport_ops {
int (*prepare)(struct ksmbd_transport *t);
void (*disconnect)(struct ksmbd_transport *t);
+ void (*shutdown)(struct ksmbd_transport *t);
int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
int size, bool need_invalidate_rkey,
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index c6718a05d347..71bfb7de4472 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -103,6 +103,8 @@ struct ksmbd_startup_request {
* we set the SPARSE_FILES bit (0x40).
*/
__u32 sub_auth[3]; /* Subauth value for Security ID */
+ __u32 smb2_max_credits; /* MAX credits */
+ __u32 reserved[128]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
};
@@ -113,7 +115,7 @@ struct ksmbd_startup_request {
* IPC request to shutdown ksmbd server.
*/
struct ksmbd_shutdown_request {
- __s32 reserved;
+ __s32 reserved[16];
};
/*
@@ -122,6 +124,7 @@ struct ksmbd_shutdown_request {
struct ksmbd_login_request {
__u32 handle;
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -135,6 +138,7 @@ struct ksmbd_login_response {
__u16 status;
__u16 hash_sz; /* hash size */
__s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -143,6 +147,7 @@ struct ksmbd_login_response {
struct ksmbd_share_config_request {
__u32 handle;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -157,6 +162,7 @@ struct ksmbd_share_config_response {
__u16 force_directory_mode;
__u16 force_uid;
__u16 force_gid;
+ __u32 reserved[128]; /* Reserved room */
__u32 veto_list_sz;
__s8 ____payload[];
};
@@ -187,6 +193,7 @@ struct ksmbd_tree_connect_request {
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
__s8 share[KSMBD_REQ_MAX_SHARE_NAME];
__s8 peer_addr[64];
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -196,6 +203,7 @@ struct ksmbd_tree_connect_response {
__u32 handle;
__u16 status;
__u16 connection_flags;
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -204,6 +212,7 @@ struct ksmbd_tree_connect_response {
struct ksmbd_tree_disconnect_request {
__u64 session_id; /* session id */
__u64 connect_id; /* tree connection id */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -212,6 +221,7 @@ struct ksmbd_tree_disconnect_request {
struct ksmbd_logout_request {
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
__u32 account_flags;
+ __u32 reserved[16]; /* Reserved room */
};
/*
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c
index 1019d3677d55..279d00feff21 100644
--- a/fs/ksmbd/mgmt/user_config.c
+++ b/fs/ksmbd/mgmt/user_config.c
@@ -67,3 +67,13 @@ int ksmbd_anonymous_user(struct ksmbd_user *user)
return 1;
return 0;
}
+
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2)
+{
+ if (strcmp(u1->name, u2->name))
+ return false;
+ if (memcmp(u1->passkey, u2->passkey, u1->passkey_sz))
+ return false;
+
+ return true;
+}
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
index aff80b029579..6a44109617f1 100644
--- a/fs/ksmbd/mgmt/user_config.h
+++ b/fs/ksmbd/mgmt/user_config.h
@@ -64,4 +64,5 @@ struct ksmbd_user *ksmbd_login_user(const char *account);
struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
void ksmbd_free_user(struct ksmbd_user *user);
int ksmbd_anonymous_user(struct ksmbd_user *user);
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2);
#endif /* __USER_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
index 82289c3cbd2b..e241f16a3851 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -45,7 +45,6 @@ struct ksmbd_session {
int state;
__u8 *Preauth_HashValue;
- struct ntlmssp_auth ntlmssp;
char sess_key[CIFS_KEY_SIZE];
struct hlist_node hlist;
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
index 50d0b1022289..4a9460153b59 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/ksmbd/smb2misc.c
@@ -289,7 +289,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge);
void *__hdr = hdr;
- int ret;
+ int ret = 0;
switch (hdr->Command) {
case SMB2_QUERY_INFO:
@@ -326,21 +326,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n",
credit_charge, calc_credit_num);
return 1;
- } else if (credit_charge > conn->max_credits) {
+ } else if (credit_charge > conn->vals->max_credits) {
ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge);
return 1;
}
spin_lock(&conn->credits_lock);
- if (credit_charge <= conn->total_credits) {
- conn->total_credits -= credit_charge;
- ret = 0;
- } else {
+ if (credit_charge > conn->total_credits) {
ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
credit_charge, conn->total_credits);
ret = 1;
}
+
+ if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) {
+ ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n",
+ credit_charge, conn->outstanding_credits);
+ ret = 1;
+ } else
+ conn->outstanding_credits += credit_charge;
+
spin_unlock(&conn->credits_lock);
+
return ret;
}
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index 02a44d28bdaf..ab23da2120b9 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -19,6 +19,7 @@ static struct smb_version_values smb21_server_values = {
.max_read_size = SMB21_DEFAULT_IOSIZE,
.max_write_size = SMB21_DEFAULT_IOSIZE,
.max_trans_size = SMB21_DEFAULT_IOSIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -44,6 +45,7 @@ static struct smb_version_values smb30_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -70,6 +72,7 @@ static struct smb_version_values smb302_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -96,6 +99,7 @@ static struct smb_version_values smb311_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -197,7 +201,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn)
conn->ops = &smb2_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -215,7 +218,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->ops = &smb3_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -240,7 +242,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
conn->ops = &smb3_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -265,7 +266,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->ops = &smb3_11_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -304,3 +304,11 @@ void init_smb2_max_trans_size(unsigned int sz)
smb302_server_values.max_trans_size = sz;
smb311_server_values.max_trans_size = sz;
}
+
+void init_smb2_max_credits(unsigned int sz)
+{
+ smb21_server_values.max_credits = sz;
+ smb30_server_values.max_credits = sz;
+ smb302_server_values.max_credits = sz;
+ smb311_server_values.max_credits = sz;
+}
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index b8b3a4c28b74..1866c81c5c99 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -299,16 +299,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
struct ksmbd_conn *conn = work->conn;
- unsigned short credits_requested;
+ unsigned short credits_requested, aux_max;
unsigned short credit_charge, credits_granted = 0;
- unsigned short aux_max, aux_credits;
if (work->send_no_response)
return 0;
hdr->CreditCharge = req_hdr->CreditCharge;
- if (conn->total_credits > conn->max_credits) {
+ if (conn->total_credits > conn->vals->max_credits) {
hdr->CreditRequest = 0;
pr_err("Total credits overflow: %d\n", conn->total_credits);
return -EINVAL;
@@ -316,6 +315,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
credit_charge = max_t(unsigned short,
le16_to_cpu(req_hdr->CreditCharge), 1);
+ if (credit_charge > conn->total_credits) {
+ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
+ credit_charge, conn->total_credits);
+ return -EINVAL;
+ }
+
+ conn->total_credits -= credit_charge;
+ conn->outstanding_credits -= credit_charge;
credits_requested = max_t(unsigned short,
le16_to_cpu(req_hdr->CreditRequest), 1);
@@ -325,16 +332,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
* TODO: Need to adjuct CreditRequest value according to
* current cpu load
*/
- aux_credits = credits_requested - 1;
if (hdr->Command == SMB2_NEGOTIATE)
- aux_max = 0;
+ aux_max = 1;
else
- aux_max = conn->max_credits - credit_charge;
- aux_credits = min_t(unsigned short, aux_credits, aux_max);
- credits_granted = credit_charge + aux_credits;
+ aux_max = conn->vals->max_credits - credit_charge;
+ credits_granted = min_t(unsigned short, credits_requested, aux_max);
- if (conn->max_credits - conn->total_credits < credits_granted)
- credits_granted = conn->max_credits -
+ if (conn->vals->max_credits - conn->total_credits < credits_granted)
+ credits_granted = conn->vals->max_credits -
conn->total_credits;
conn->total_credits += credits_granted;
@@ -610,16 +615,14 @@ static void destroy_previous_session(struct ksmbd_user *user, u64 id)
/**
* smb2_get_name() - get filename string from on the wire smb format
- * @share: ksmbd_share_config pointer
* @src: source buffer
* @maxlen: maxlen of source string
- * @nls_table: nls_table pointer
+ * @local_nls: nls_table pointer
*
* Return: matching converted filename on success, otherwise error ptr
*/
static char *
-smb2_get_name(struct ksmbd_share_config *share, const char *src,
- const int maxlen, struct nls_table *local_nls)
+smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
{
char *name;
@@ -1303,7 +1306,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
int sz, rc;
ksmbd_debug(SMB, "negotiate phase\n");
- rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess);
+ rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->conn);
if (rc)
return rc;
@@ -1313,7 +1316,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
memset(chgblob, 0, sizeof(struct challenge_message));
if (!work->conn->use_spnego) {
- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
if (sz < 0)
return -ENOMEM;
@@ -1329,7 +1332,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
return -ENOMEM;
chgblob = (struct challenge_message *)neg_blob;
- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
if (sz < 0) {
rc = -ENOMEM;
goto out;
@@ -1450,60 +1453,62 @@ static int ntlm_authenticate(struct ksmbd_work *work)
ksmbd_free_user(user);
return 0;
}
- ksmbd_free_user(sess->user);
- }
- sess->user = user;
- if (user_guest(sess->user)) {
- if (conn->sign) {
- ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
+ if (!ksmbd_compare_user(sess->user, user)) {
+ ksmbd_free_user(user);
return -EPERM;
}
+ ksmbd_free_user(user);
+ } else {
+ sess->user = user;
+ }
+ if (user_guest(sess->user)) {
rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
} else {
struct authenticate_message *authblob;
authblob = user_authblob(conn, req);
sz = le16_to_cpu(req->SecurityBufferLength);
- rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
+ rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess);
if (rc) {
set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
ksmbd_debug(SMB, "authentication failed\n");
return -EPERM;
}
+ }
- /*
- * If session state is SMB2_SESSION_VALID, We can assume
- * that it is reauthentication. And the user/password
- * has been verified, so return it here.
- */
- if (sess->state == SMB2_SESSION_VALID) {
- if (conn->binding)
- goto binding_session;
- return 0;
- }
+ /*
+ * If session state is SMB2_SESSION_VALID, We can assume
+ * that it is reauthentication. And the user/password
+ * has been verified, so return it here.
+ */
+ if (sess->state == SMB2_SESSION_VALID) {
+ if (conn->binding)
+ goto binding_session;
+ return 0;
+ }
- if ((conn->sign || server_conf.enforced_signing) ||
- (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
- sess->sign = true;
+ if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE &&
+ (conn->sign || server_conf.enforced_signing)) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
- if (smb3_encryption_negotiated(conn) &&
- !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
- rc = conn->ops->generate_encryptionkey(sess);
- if (rc) {
- ksmbd_debug(SMB,
- "SMB3 encryption key generation failed\n");
- return -EINVAL;
- }
- sess->enc = true;
- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
- /*
- * signing is disable if encryption is enable
- * on this session
- */
- sess->sign = false;
+ if (smb3_encryption_negotiated(conn) &&
+ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ rc = conn->ops->generate_encryptionkey(sess);
+ if (rc) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
}
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ /*
+ * signing is disable if encryption is enable
+ * on this session
+ */
+ sess->sign = false;
}
binding_session:
@@ -2057,9 +2062,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
ksmbd_debug(SMB, "request\n");
- /* Got a valid session, set connection state */
- WARN_ON(sess->conn != conn);
-
/* setting CifsExiting here may race with start_tcp_sess */
ksmbd_conn_set_need_reconnect(work);
ksmbd_close_session_fds(work);
@@ -2530,8 +2532,7 @@ int smb2_open(struct ksmbd_work *work)
goto err_out1;
}
- name = smb2_get_name(share,
- req->Buffer,
+ name = smb2_get_name(req->Buffer,
le16_to_cpu(req->NameLength),
work->conn->local_nls);
if (IS_ERR(name)) {
@@ -3392,7 +3393,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
* @conn: connection instance
* @info_level: smb information level
* @d_info: structure included variables for query dir
- * @user_ns: user namespace
* @ksmbd_kstat: ksmbd wrapper of dirent stat information
*
* if directory has many entries, find first can't read it fully.
@@ -4018,6 +4018,7 @@ err_out2:
* buffer_check_err() - helper function to check buffer errors
* @reqOutputBufferLength: max buffer length expected in command response
* @rsp: query info response buffer contains output buffer length
+ * @rsp_org: base response buffer pointer in case of chained response
* @infoclass_size: query info class response buffer size
*
* Return: 0 on success, otherwise error
@@ -5398,8 +5399,7 @@ static int smb2_rename(struct ksmbd_work *work,
goto out;
}
- new_name = smb2_get_name(share,
- file_info->FileName,
+ new_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(new_name)) {
@@ -5510,8 +5510,7 @@ static int smb2_create_link(struct ksmbd_work *work,
if (!pathname)
return -ENOMEM;
- link_name = smb2_get_name(share,
- file_info->FileName,
+ link_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
@@ -5849,7 +5848,7 @@ static int set_file_mode_info(struct ksmbd_file *fp,
* smb2_set_info_file() - handler for smb2 set info command
* @work: smb work containing set info command buffer
* @fp: ksmbd_file pointer
- * @info_class: smb2 set info class
+ * @req: request buffer pointer
* @share: ksmbd_share_config pointer
*
* Return: 0 on success, otherwise error
@@ -6121,25 +6120,33 @@ out:
return err;
}
-static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
- struct smb2_read_req *req, void *data_buf,
- size_t length)
+static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+ struct smb2_buffer_desc_v1 *desc,
+ __le32 Channel,
+ __le16 ChannelInfoOffset,
+ __le16 ChannelInfoLength)
{
- struct smb2_buffer_desc_v1 *desc =
- (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- int err;
-
if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
+ Channel != SMB2_CHANNEL_RDMA_V1)
return -EINVAL;
- if (req->ReadChannelInfoOffset == 0 ||
- le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+ if (ChannelInfoOffset == 0 ||
+ le16_to_cpu(ChannelInfoLength) < sizeof(*desc))
return -EINVAL;
work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
work->remote_key = le32_to_cpu(desc->token);
+ return 0;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+ struct smb2_read_req *req, void *data_buf,
+ size_t length)
+{
+ struct smb2_buffer_desc_v1 *desc =
+ (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+ int err;
err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
le32_to_cpu(desc->token),
@@ -6162,7 +6169,7 @@ int smb2_read(struct ksmbd_work *work)
struct ksmbd_conn *conn = work->conn;
struct smb2_read_req *req;
struct smb2_read_rsp *rsp;
- struct ksmbd_file *fp;
+ struct ksmbd_file *fp = NULL;
loff_t offset;
size_t length, mincount;
ssize_t nbytes = 0, remain_bytes = 0;
@@ -6176,6 +6183,18 @@ int smb2_read(struct ksmbd_work *work)
return smb2_read_pipe(work);
}
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ &req->Buffer[0],
+ req->Channel,
+ req->ReadChannelInfoOffset,
+ req->ReadChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
le64_to_cpu(req->PersistentFileId));
if (!fp) {
@@ -6361,21 +6380,6 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
- return -EINVAL;
-
- if (req->Length != 0 || req->DataOffset != 0)
- return -EINVAL;
-
- if (req->WriteChannelInfoOffset == 0 ||
- le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
- return -EINVAL;
-
- work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
- work->remote_key = le32_to_cpu(desc->token);
-
data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
if (!data_buf)
return -ENOMEM;
@@ -6422,6 +6426,20 @@ int smb2_write(struct ksmbd_work *work)
return smb2_write_pipe(work);
}
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+ if (req->Length != 0 || req->DataOffset != 0)
+ return -EINVAL;
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ &req->Buffer[0],
+ req->Channel,
+ req->WriteChannelInfoOffset,
+ req->WriteChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
ksmbd_debug(SMB, "User does not have write permission\n");
err = -EACCES;
@@ -7243,15 +7261,10 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
struct sockaddr_storage_rsp *sockaddr_storage;
unsigned int flags;
unsigned long long speed;
- struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
rtnl_lock();
for_each_netdev(&init_net, netdev) {
- if (out_buf_len <
- nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
- rtnl_unlock();
- return -ENOSPC;
- }
+ bool ipv4_set = false;
if (netdev->type == ARPHRD_LOOPBACK)
continue;
@@ -7259,12 +7272,20 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
flags = dev_get_flags(netdev);
if (!(flags & IFF_RUNNING))
continue;
+ipv6_retry:
+ if (out_buf_len <
+ nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
+ rtnl_unlock();
+ return -ENOSPC;
+ }
nii_rsp = (struct network_interface_info_ioctl_rsp *)
&rsp->Buffer[nbytes];
nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
nii_rsp->Capability = 0;
+ if (netdev->real_num_tx_queues > 1)
+ nii_rsp->Capability |= cpu_to_le32(RSS_CAPABLE);
if (ksmbd_rdma_capable_netdev(netdev))
nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
@@ -7289,8 +7310,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
nii_rsp->SockAddr_Storage;
memset(sockaddr_storage, 0, 128);
- if (conn->peer_addr.ss_family == PF_INET ||
- ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+ if (!ipv4_set) {
struct in_device *idev;
sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
@@ -7301,6 +7321,9 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
continue;
sockaddr_storage->addr4.IPv4address =
idev_ipv4_address(idev);
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+ ipv4_set = true;
+ goto ipv6_retry;
} else {
struct inet6_dev *idev6;
struct inet6_ifaddr *ifa;
@@ -7322,9 +7345,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
break;
}
sockaddr_storage->addr6.ScopeId = 0;
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
-
- nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
rtnl_unlock();
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index 4a3e4339d4c4..725b800c29c8 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -980,6 +980,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn);
void init_smb2_max_read_size(unsigned int sz);
void init_smb2_max_write_size(unsigned int sz);
void init_smb2_max_trans_size(unsigned int sz);
+void init_smb2_max_credits(unsigned int sz);
bool is_smb2_neg_cmd(struct ksmbd_work *work);
bool is_smb2_rsp(struct ksmbd_work *work);
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 50590842b651..e1369b4345a9 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -365,6 +365,7 @@ struct smb_version_values {
__u32 max_read_size;
__u32 max_write_size;
__u32 max_trans_size;
+ __u32 max_credits;
__u32 large_lock_type;
__u32 exclusive_lock_type;
__u32 shared_lock_type;
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index 1acf1892a466..3ad6881e0f7e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -301,6 +301,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
init_smb2_max_write_size(req->smb2_max_write);
if (req->smb2_max_trans)
init_smb2_max_trans_size(req->smb2_max_trans);
+ if (req->smb2_max_credits)
+ init_smb2_max_credits(req->smb2_max_credits);
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 7e57cbb0bb35..3c1ec1ac0b27 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -34,7 +34,8 @@
#include "smbstatus.h"
#include "transport_rdma.h"
-#define SMB_DIRECT_PORT 5445
+#define SMB_DIRECT_PORT_IWARP 5445
+#define SMB_DIRECT_PORT_INFINIBAND 445
#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
@@ -60,6 +61,10 @@
* as defined in [MS-SMBD] 3.1.1.1
* Those may change after a SMB_DIRECT negotiation
*/
+
+/* Set 445 port to SMB Direct port by default */
+static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
+
/* The local peer's maximum number of credits to grant to the peer */
static int smb_direct_receive_credit_max = 255;
@@ -75,10 +80,18 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
static int smb_direct_max_receive_size = 8192;
-static int smb_direct_max_read_write_size = 1024 * 1024;
+static int smb_direct_max_read_write_size = 1048512;
static int smb_direct_max_outstanding_rw_ops = 8;
+static LIST_HEAD(smb_direct_device_list);
+static DEFINE_RWLOCK(smb_direct_device_lock);
+
+struct smb_direct_device {
+ struct ib_device *ib_dev;
+ struct list_head list;
+};
+
static struct smb_direct_listener {
struct rdma_cm_id *cm_id;
} smb_direct_listener;
@@ -415,6 +428,7 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
+ ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
ib_destroy_qp(t->qp);
}
@@ -555,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
t->negotiation_requested = true;
t->full_packet_received = true;
+ enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status);
break;
case SMB_DIRECT_MSG_DATA_TRANSFER: {
@@ -1438,6 +1453,15 @@ static void smb_direct_disconnect(struct ksmbd_transport *t)
free_transport(st);
}
+static void smb_direct_shutdown(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+ ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
+
+ smb_direct_disconnect_rdma_work(&st->disconnect_work);
+}
+
static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1581,19 +1605,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t)
pr_err("error at rdma_accept: %d\n", ret);
return ret;
}
-
- wait_event_interruptible(t->wait_status,
- t->status != SMB_DIRECT_CS_NEW);
- if (t->status != SMB_DIRECT_CS_CONNECTED)
- return -ENOTCONN;
return 0;
}
-static int smb_direct_negotiate(struct smb_direct_transport *t)
+static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
{
int ret;
struct smb_direct_recvmsg *recvmsg;
- struct smb_direct_negotiate_req *req;
recvmsg = get_free_recvmsg(t);
if (!recvmsg)
@@ -1603,44 +1621,20 @@ static int smb_direct_negotiate(struct smb_direct_transport *t)
ret = smb_direct_post_recv(t, recvmsg);
if (ret) {
pr_err("Can't post recv: %d\n", ret);
- goto out;
+ goto out_err;
}
t->negotiation_requested = false;
ret = smb_direct_accept_client(t);
if (ret) {
pr_err("Can't accept client\n");
- goto out;
+ goto out_err;
}
smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
-
- ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
- ret = wait_event_interruptible_timeout(t->wait_status,
- t->negotiation_requested ||
- t->status == SMB_DIRECT_CS_DISCONNECTED,
- SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
- if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
- ret = ret < 0 ? ret : -ETIMEDOUT;
- goto out;
- }
-
- ret = smb_direct_check_recvmsg(recvmsg);
- if (ret == -ECONNABORTED)
- goto out;
-
- req = (struct smb_direct_negotiate_req *)recvmsg->packet;
- t->max_recv_size = min_t(int, t->max_recv_size,
- le32_to_cpu(req->preferred_send_size));
- t->max_send_size = min_t(int, t->max_send_size,
- le32_to_cpu(req->max_receive_size));
- t->max_fragmented_send_size =
- le32_to_cpu(req->max_fragmented_size);
-
- ret = smb_direct_send_negotiate_response(t, ret);
-out:
- if (recvmsg)
- put_recvmsg(t, recvmsg);
+ return 0;
+out_err:
+ put_recvmsg(t, recvmsg);
return ret;
}
@@ -1724,7 +1718,9 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0;
- cap->max_rdma_ctxs = 0;
+ cap->max_rdma_ctxs =
+ rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) *
+ smb_direct_max_outstanding_rw_ops;
return 0;
}
@@ -1806,6 +1802,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
{
int ret;
struct ib_qp_init_attr qp_attr;
+ int pages_per_rw;
t->pd = ib_alloc_pd(t->cm_id->device, 0);
if (IS_ERR(t->pd)) {
@@ -1853,6 +1850,23 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
t->qp = t->cm_id->qp;
t->cm_id->event_handler = smb_direct_cm_handler;
+ pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+ if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
+ int pages_per_mr, mr_count;
+
+ pages_per_mr = min_t(int, pages_per_rw,
+ t->cm_id->device->attrs.max_fast_reg_page_list_len);
+ mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) *
+ atomic_read(&t->rw_avail_ops);
+ ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count,
+ IB_MR_TYPE_MEM_REG, pages_per_mr, 0);
+ if (ret) {
+ pr_err("failed to init mr pool count %d pages %d\n",
+ mr_count, pages_per_mr);
+ goto err;
+ }
+ }
+
return 0;
err:
if (t->qp) {
@@ -1877,6 +1891,49 @@ err:
static int smb_direct_prepare(struct ksmbd_transport *t)
{
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_negotiate_req *req;
+ int ret;
+
+ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+ ret = wait_event_interruptible_timeout(st->wait_status,
+ st->negotiation_requested ||
+ st->status == SMB_DIRECT_CS_DISCONNECTED,
+ SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+ if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
+ return ret < 0 ? ret : -ETIMEDOUT;
+
+ recvmsg = get_first_reassembly(st);
+ if (!recvmsg)
+ return -ECONNABORTED;
+
+ ret = smb_direct_check_recvmsg(recvmsg);
+ if (ret == -ECONNABORTED)
+ goto out;
+
+ req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+ st->max_recv_size = min_t(int, st->max_recv_size,
+ le32_to_cpu(req->preferred_send_size));
+ st->max_send_size = min_t(int, st->max_send_size,
+ le32_to_cpu(req->max_receive_size));
+ st->max_fragmented_send_size =
+ le32_to_cpu(req->max_fragmented_size);
+ st->max_fragmented_recv_size =
+ (st->recv_credit_max * st->max_recv_size) / 2;
+
+ ret = smb_direct_send_negotiate_response(st, ret);
+out:
+ spin_lock_irq(&st->reassembly_queue_lock);
+ st->reassembly_queue_length--;
+ list_del(&recvmsg->list);
+ spin_unlock_irq(&st->reassembly_queue_lock);
+ put_recvmsg(st, recvmsg);
+
+ return ret;
+}
+
+static int smb_direct_connect(struct smb_direct_transport *st)
+{
int ret;
struct ib_qp_cap qp_cap;
@@ -1898,13 +1955,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
return ret;
}
- ret = smb_direct_negotiate(st);
+ ret = smb_direct_prepare_negotiation(st);
if (ret) {
pr_err("Can't negotiate: %d\n", ret);
return ret;
}
-
- st->status = SMB_DIRECT_CS_CONNECTED;
return 0;
}
@@ -1920,6 +1975,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
{
struct smb_direct_transport *t;
+ int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
ksmbd_debug(RDMA,
@@ -1932,18 +1988,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
if (!t)
return -ENOMEM;
+ ret = smb_direct_connect(t);
+ if (ret)
+ goto out_err;
+
KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
KSMBD_TRANS(t)->conn, "ksmbd:r%u",
- SMB_DIRECT_PORT);
+ smb_direct_port);
if (IS_ERR(KSMBD_TRANS(t)->handler)) {
- int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
-
+ ret = PTR_ERR(KSMBD_TRANS(t)->handler);
pr_err("Can't start thread\n");
- free_transport(t);
- return ret;
+ goto out_err;
}
return 0;
+out_err:
+ free_transport(t);
+ return ret;
}
static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
@@ -2007,12 +2068,65 @@ err:
return ret;
}
+static int smb_direct_ib_client_add(struct ib_device *ib_dev)
+{
+ struct smb_direct_device *smb_dev;
+
+ /* Set 5445 port if device type is iWARP(No IB) */
+ if (ib_dev->node_type != RDMA_NODE_IB_CA)
+ smb_direct_port = SMB_DIRECT_PORT_IWARP;
+
+ if (!ib_dev->ops.get_netdev ||
+ !rdma_frwr_is_supported(&ib_dev->attrs))
+ return 0;
+
+ smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
+ if (!smb_dev)
+ return -ENOMEM;
+ smb_dev->ib_dev = ib_dev;
+
+ write_lock(&smb_direct_device_lock);
+ list_add(&smb_dev->list, &smb_direct_device_list);
+ write_unlock(&smb_direct_device_lock);
+
+ ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
+ return 0;
+}
+
+static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
+ void *client_data)
+{
+ struct smb_direct_device *smb_dev, *tmp;
+
+ write_lock(&smb_direct_device_lock);
+ list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
+ if (smb_dev->ib_dev == ib_dev) {
+ list_del(&smb_dev->list);
+ kfree(smb_dev);
+ break;
+ }
+ }
+ write_unlock(&smb_direct_device_lock);
+}
+
+static struct ib_client smb_direct_ib_client = {
+ .name = "ksmbd_smb_direct_ib",
+ .add = smb_direct_ib_client_add,
+ .remove = smb_direct_ib_client_remove,
+};
+
int ksmbd_rdma_init(void)
{
int ret;
smb_direct_listener.cm_id = NULL;
+ ret = ib_register_client(&smb_direct_ib_client);
+ if (ret) {
+ pr_err("failed to ib_register_client\n");
+ return ret;
+ }
+
/* When a client is running out of send credits, the credits are
* granted by the server's sending a packet using this queue.
* This avoids the situation that a clients cannot send packets
@@ -2023,7 +2137,7 @@ int ksmbd_rdma_init(void)
if (!smb_direct_wq)
return -ENOMEM;
- ret = smb_direct_listen(SMB_DIRECT_PORT);
+ ret = smb_direct_listen(smb_direct_port);
if (ret) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
@@ -2036,36 +2150,67 @@ int ksmbd_rdma_init(void)
return 0;
}
-int ksmbd_rdma_destroy(void)
+void ksmbd_rdma_destroy(void)
{
- if (smb_direct_listener.cm_id)
- rdma_destroy_id(smb_direct_listener.cm_id);
+ if (!smb_direct_listener.cm_id)
+ return;
+
+ ib_unregister_client(&smb_direct_ib_client);
+ rdma_destroy_id(smb_direct_listener.cm_id);
+
smb_direct_listener.cm_id = NULL;
if (smb_direct_wq) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
}
- return 0;
}
bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
{
- struct ib_device *ibdev;
+ struct smb_direct_device *smb_dev;
+ int i;
bool rdma_capable = false;
- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
- if (ibdev) {
- if (rdma_frwr_is_supported(&ibdev->attrs))
- rdma_capable = true;
- ib_device_put(ibdev);
+ read_lock(&smb_direct_device_lock);
+ list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
+ for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
+ struct net_device *ndev;
+
+ ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
+ i + 1);
+ if (!ndev)
+ continue;
+
+ if (ndev == netdev) {
+ dev_put(ndev);
+ rdma_capable = true;
+ goto out;
+ }
+ dev_put(ndev);
+ }
+ }
+out:
+ read_unlock(&smb_direct_device_lock);
+
+ if (rdma_capable == false) {
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+ if (ibdev) {
+ if (rdma_frwr_is_supported(&ibdev->attrs))
+ rdma_capable = true;
+ ib_device_put(ibdev);
+ }
}
+
return rdma_capable;
}
static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.prepare = smb_direct_prepare,
.disconnect = smb_direct_disconnect,
+ .shutdown = smb_direct_shutdown,
.writev = smb_direct_writev,
.read = smb_direct_read,
.rdma_read = smb_direct_rdma_read,
diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
index 0fa8adc0776f..5567d93a6f96 100644
--- a/fs/ksmbd/transport_rdma.h
+++ b/fs/ksmbd/transport_rdma.h
@@ -7,8 +7,6 @@
#ifndef __KSMBD_TRANSPORT_RDMA_H__
#define __KSMBD_TRANSPORT_RDMA_H__
-#define SMB_DIRECT_PORT 5445
-
/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
struct smb_direct_negotiate_req {
__le16 min_version;
@@ -52,7 +50,7 @@ struct smb_direct_data_transfer {
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
int ksmbd_rdma_init(void);
-int ksmbd_rdma_destroy(void);
+void ksmbd_rdma_destroy(void);
bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
#else
static inline int ksmbd_rdma_init(void) { return 0; }
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index c14320e03b69..82a1429bbe12 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -404,7 +404,7 @@ static int create_socket(struct interface *iface)
&ksmbd_socket);
if (ret) {
pr_err("Can't create socket for ipv4: %d\n", ret);
- goto out_error;
+ goto out_clear;
}
sin.sin_family = PF_INET;
@@ -462,6 +462,7 @@ static int create_socket(struct interface *iface)
out_error:
tcp_destroy_socket(ksmbd_socket);
+out_clear:
iface->ksmbd_socket = NULL;
return ret;
}
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
index 448576fbe4b7..36239ce31afd 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/ksmbd/vfs_cache.h
@@ -96,16 +96,6 @@ struct ksmbd_file {
int durable_timeout;
- /* for SMB1 */
- int pid;
-
- /* conflict lock fail count for SMB1 */
- unsigned int cflock_cnt;
- /* last lock failure start offset for SMB1 */
- unsigned long long llock_fstart;
-
- int dirent_offset;
-
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
int dot_dotdot[2];
diff --git a/fs/locks.c b/fs/locks.c
index 0fca9d680978..8c6df10cd9ed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -62,6 +62,7 @@
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
+#include <linux/sysctl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>
@@ -88,8 +89,37 @@ static int target_leasetype(struct file_lock *fl)
return fl->fl_type;
}
-int leases_enable = 1;
-int lease_break_time = 45;
+static int leases_enable = 1;
+static int lease_break_time = 45;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table locks_sysctls[] = {
+ {
+ .procname = "leases-enable",
+ .data = &leases_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_MMU
+ {
+ .procname = "lease-break-time",
+ .data = &lease_break_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif /* CONFIG_MMU */
+ {}
+};
+
+static int __init init_fs_locks_sysctls(void)
+{
+ register_sysctl_init("fs", locks_sysctls);
+ return 0;
+}
+early_initcall(init_fs_locks_sysctls);
+#endif /* CONFIG_SYSCTL */
/*
* The global file_lock_list is only used for displaying /proc/locks, so we
diff --git a/fs/mpage.c b/fs/mpage.c
index 334e7d09aa65..87f5cfef6caa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -29,7 +29,6 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
-#include <linux/cleancache.h>
#include "internal.h"
/*
@@ -284,12 +283,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
SetPageMappedToDisk(page);
}
- if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
- cleancache_get_page(page) == 0) {
- SetPageUptodate(page);
- goto confused;
- }
-
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
diff --git a/fs/namei.c b/fs/namei.c
index d81f04f8d818..3f1829b3ab5b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1020,10 +1020,60 @@ static inline void put_link(struct nameidata *nd)
path_put(&last->link);
}
-int sysctl_protected_symlinks __read_mostly = 0;
-int sysctl_protected_hardlinks __read_mostly = 0;
-int sysctl_protected_fifos __read_mostly;
-int sysctl_protected_regular __read_mostly;
+static int sysctl_protected_symlinks __read_mostly;
+static int sysctl_protected_hardlinks __read_mostly;
+static int sysctl_protected_fifos __read_mostly;
+static int sysctl_protected_regular __read_mostly;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table namei_sysctls[] = {
+ {
+ .procname = "protected_symlinks",
+ .data = &sysctl_protected_symlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_hardlinks",
+ .data = &sysctl_protected_hardlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_namei_sysctls(void)
+{
+ register_sysctl_init("fs", namei_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_namei_sysctls);
+
+#endif /* CONFIG_SYSCTL */
/**
* may_follow_link - Check symlink following for unsafe situations
@@ -3974,13 +4024,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
- fsnotify_rmdir(dir, dentry);
out:
inode_unlock(dentry->d_inode);
dput(dentry);
if (!error)
- d_delete(dentry);
+ d_delete_notify(dir, dentry);
return error;
}
EXPORT_SYMBOL(vfs_rmdir);
@@ -4102,7 +4151,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
if (!error) {
dont_mount(dentry);
detach_mounts(dentry);
- fsnotify_unlink(dir, dentry);
}
}
}
@@ -4110,9 +4158,11 @@ out:
inode_unlock(target);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
- if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
+ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+ fsnotify_unlink(dir, dentry);
+ } else if (!error) {
fsnotify_link_count(target);
- d_delete(dentry);
+ d_delete_notify(dir, dentry);
}
return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index dc31ad6b370f..40b994a29e90 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -37,7 +37,7 @@
#include "internal.h"
/* Maximum number of mounts in a mount namespace */
-unsigned int sysctl_mount_max __read_mostly = 100000;
+static unsigned int sysctl_mount_max __read_mostly = 100000;
static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
@@ -4620,3 +4620,25 @@ const struct proc_ns_operations mntns_operations = {
.install = mntns_install,
.owner = mntns_owner,
};
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table fs_namespace_sysctls[] = {
+ {
+ .procname = "mount-max",
+ .data = &sysctl_mount_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ { }
+};
+
+static int __init init_fs_namespace_sysctls(void)
+{
+ register_sysctl_init("fs", fs_namespace_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_namespace_sysctls);
+
+#endif /* CONFIG_SYSCTL */
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 6169659857b3..501da990c259 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -55,7 +55,8 @@ static struct netfs_read_request *netfs_alloc_read_request(
INIT_WORK(&rreq->work, netfs_rreq_work);
refcount_set(&rreq->usage, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- ops->init_rreq(rreq, file);
+ if (ops->init_rreq)
+ ops->init_rreq(rreq, file);
netfs_stat(&netfs_n_rh_rreq);
}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 6a2033131c06..ccd4f245cae2 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -170,7 +170,7 @@ struct cb_devicenotifyitem {
};
struct cb_devicenotifyargs {
- int ndevs;
+ uint32_t ndevs;
struct cb_devicenotifyitem *devs;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 09c5b1cb3e07..c343666d9a42 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -358,7 +358,7 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
struct cb_process_state *cps)
{
struct cb_devicenotifyargs *args = argp;
- int i;
+ uint32_t i;
__be32 res = 0;
struct nfs_client *clp = cps->clp;
struct nfs_server *server = NULL;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a67c41ec545f..f90de8043b0f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_devicenotifyargs *args = argp;
+ uint32_t tmp, n, i;
__be32 *p;
__be32 status = 0;
- u32 tmp;
- int n, i;
- args->ndevs = 0;
/* Num of device notifications */
p = xdr_inline_decode(xdr, sizeof(uint32_t));
@@ -271,7 +269,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
goto out;
}
n = ntohl(*p++);
- if (n <= 0)
+ if (n == 0)
goto out;
if (n > ULONG_MAX / sizeof(*args->devs)) {
status = htonl(NFS4ERR_BADXDR);
@@ -330,19 +328,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
dev->cbd_immediate = 0;
}
- args->ndevs++;
-
dprintk("%s: type %d layout 0x%x immediate %d\n",
__func__, dev->cbd_notify_type, dev->cbd_layout_type,
dev->cbd_immediate);
}
+ args->ndevs = n;
+ dprintk("%s: ndevs %d\n", __func__, args->ndevs);
+ return 0;
+err:
+ kfree(args->devs);
out:
+ args->devs = NULL;
+ args->ndevs = 0;
dprintk("%s: status %d ndevs %d\n",
__func__, ntohl(status), args->ndevs);
return status;
-err:
- kfree(args->devs);
- goto out;
}
static __be32 decode_sessionid(struct xdr_stream *xdr,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d8b85b5a641..f18e80fda9bf 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -856,6 +856,13 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
server->namelen = pathinfo.max_namelen;
}
+ if (clp->rpc_ops->discover_trunking != NULL &&
+ (server->caps & NFS_CAP_FS_LOCATIONS)) {
+ error = clp->rpc_ops->discover_trunking(server, mntfh);
+ if (error < 0)
+ return error;
+ }
+
return 0;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 347793626f19..848f3b8fb821 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1325,6 +1325,14 @@ void nfs_clear_verifier_delegated(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
+{
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
+ d_really_is_negative(dentry))
+ return dentry->d_time == inode_peek_iversion_raw(dir);
+ return nfs_verify_change_attribute(dir, dentry->d_time);
+}
+
/*
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
@@ -1338,7 +1346,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
return 0;
- if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!nfs_dentry_verify_change(dir, dentry))
return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
if (nfs_mapping_need_revalidate_inode(dir)) {
@@ -1347,7 +1355,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
return 0;
}
- if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!nfs_dentry_verify_change(dir, dentry))
return 0;
return 1;
}
@@ -1437,6 +1445,9 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
+ /* Case insensitive server? Revalidate negative dentries */
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ return 1;
return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
@@ -1537,7 +1548,7 @@ out:
* If the lookup failed despite the dentry change attribute being
* a match, then we should revalidate the directory cache.
*/
- if (!ret && nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!ret && nfs_dentry_verify_change(dir, dentry))
nfs_mark_dir_for_revalidate(dir);
return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
}
@@ -1776,8 +1787,11 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (error == -ENOENT)
+ if (error == -ENOENT) {
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ dir_verifier = inode_peek_iversion_raw(dir);
goto no_entry;
+ }
if (error < 0) {
res = ERR_PTR(error);
goto out;
@@ -1806,6 +1820,14 @@ out:
}
EXPORT_SYMBOL_GPL(nfs_lookup);
+void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
+{
+ /* Case insensitive server? Revalidate dentries */
+ if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
+ d_prune_aliases(inode);
+}
+EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
+
#if IS_ENABLED(CONFIG_NFS_V4)
static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
@@ -1867,6 +1889,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
unsigned int lookup_flags = 0;
+ unsigned long dir_verifier;
bool switched = false;
int created = 0;
int err;
@@ -1940,7 +1963,11 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
switch (err) {
case -ENOENT:
d_splice_alias(NULL, dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ dir_verifier = inode_peek_iversion_raw(dir);
+ else
+ dir_verifier = nfs_save_change_attribute(dir);
+ nfs_set_verifier(dentry, dir_verifier);
break;
case -EISDIR:
case -ENOTDIR:
@@ -1968,6 +1995,24 @@ out:
no_open:
res = nfs_lookup(dir, dentry, lookup_flags);
+ if (!res) {
+ inode = d_inode(dentry);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !S_ISDIR(inode->i_mode))
+ res = ERR_PTR(-ENOTDIR);
+ else if (inode && S_ISREG(inode->i_mode))
+ res = ERR_PTR(-EOPENSTALE);
+ } else if (!IS_ERR(res)) {
+ inode = d_inode(res);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !S_ISDIR(inode->i_mode)) {
+ dput(res);
+ res = ERR_PTR(-ENOTDIR);
+ } else if (inode && S_ISREG(inode->i_mode)) {
+ dput(res);
+ res = ERR_PTR(-EOPENSTALE);
+ }
+ }
if (switched) {
d_lookup_done(dentry);
if (!res)
@@ -2186,8 +2231,10 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
switch (error) {
case -ENOENT:
d_delete(dentry);
- fallthrough;
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ break;
case 0:
+ nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
}
}
@@ -2380,6 +2427,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
trace_nfs_link_enter(inode, dir, dentry);
d_drop(dentry);
+ if (S_ISREG(inode->i_mode))
+ nfs_sync_inode(inode);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -2469,6 +2518,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
}
}
+ if (S_ISREG(old_inode->i_mode))
+ nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
@@ -2529,7 +2580,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
- put_cred(entry->cred);
+ put_group_info(entry->group_info);
kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
@@ -2655,6 +2706,43 @@ void nfs_access_zap_cache(struct inode *inode)
}
EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
+static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
+{
+ struct group_info *ga, *gb;
+ int g;
+
+ if (uid_lt(a->fsuid, b->fsuid))
+ return -1;
+ if (uid_gt(a->fsuid, b->fsuid))
+ return 1;
+
+ if (gid_lt(a->fsgid, b->fsgid))
+ return -1;
+ if (gid_gt(a->fsgid, b->fsgid))
+ return 1;
+
+ ga = a->group_info;
+ gb = b->group_info;
+ if (ga == gb)
+ return 0;
+ if (ga == NULL)
+ return -1;
+ if (gb == NULL)
+ return 1;
+ if (ga->ngroups < gb->ngroups)
+ return -1;
+ if (ga->ngroups > gb->ngroups)
+ return 1;
+
+ for (g = 0; g < ga->ngroups; g++) {
+ if (gid_lt(ga->gid[g], gb->gid[g]))
+ return -1;
+ if (gid_gt(ga->gid[g], gb->gid[g]))
+ return 1;
+ }
+ return 0;
+}
+
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
{
struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
@@ -2662,7 +2750,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
while (n != NULL) {
struct nfs_access_entry *entry =
rb_entry(n, struct nfs_access_entry, rb_node);
- int cmp = cred_fscmp(cred, entry->cred);
+ int cmp = access_cmp(cred, entry);
if (cmp < 0)
n = n->rb_left;
@@ -2674,7 +2762,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
return NULL;
}
-static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
+static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -2704,8 +2792,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *
spin_lock(&inode->i_lock);
retry = false;
}
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
err = 0;
out:
@@ -2717,7 +2804,7 @@ out_zap:
return -ENOENT;
}
-static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
{
/* Only check the most recently returned cache entry,
* but do it without locking.
@@ -2733,35 +2820,36 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
cache = list_entry(lh, struct nfs_access_entry, lru);
if (lh == &nfsi->access_cache_entry_lru ||
- cred_fscmp(cred, cache->cred) != 0)
+ access_cmp(cred, cache) != 0)
cache = NULL;
if (cache == NULL)
goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
err = 0;
out:
rcu_read_unlock();
return err;
}
-int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct
-nfs_access_entry *res, bool may_block)
+int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
+ u32 *mask, bool may_block)
{
int status;
- status = nfs_access_get_cached_rcu(inode, cred, res);
+ status = nfs_access_get_cached_rcu(inode, cred, mask);
if (status != 0)
- status = nfs_access_get_cached_locked(inode, cred, res,
+ status = nfs_access_get_cached_locked(inode, cred, mask,
may_block);
return status;
}
EXPORT_SYMBOL_GPL(nfs_access_get_cached);
-static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_rbtree(struct inode *inode,
+ struct nfs_access_entry *set,
+ const struct cred *cred)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct rb_root *root_node = &nfsi->access_cache;
@@ -2774,7 +2862,7 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *
while (*p != NULL) {
parent = *p;
entry = rb_entry(parent, struct nfs_access_entry, rb_node);
- cmp = cred_fscmp(set->cred, entry->cred);
+ cmp = access_cmp(cred, entry);
if (cmp < 0)
p = &parent->rb_left;
@@ -2796,13 +2884,16 @@ found:
nfs_access_free_entry(entry);
}
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
+ const struct cred *cred)
{
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL)
return;
RB_CLEAR_NODE(&cache->rb_node);
- cache->cred = get_cred(set->cred);
+ cache->fsuid = cred->fsuid;
+ cache->fsgid = cred->fsgid;
+ cache->group_info = get_group_info(cred->group_info);
cache->mask = set->mask;
/* The above field assignments must be visible
@@ -2810,7 +2901,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
* use rcu_assign_pointer, so just force the memory barrier.
*/
smp_wmb();
- nfs_access_add_rbtree(inode, cache);
+ nfs_access_add_rbtree(inode, cache, cred);
/* Update accounting */
smp_mb__before_atomic();
@@ -2875,7 +2966,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached(inode, cred, &cache, may_block);
+ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
if (status == 0)
goto out_cached;
@@ -2895,8 +2986,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
else
cache.mask |= NFS_ACCESS_EXECUTE;
- cache.cred = cred;
- status = NFS_PROTO(inode)->access(inode, &cache);
+ status = NFS_PROTO(inode)->access(inode, &cache, cred);
if (status != 0) {
if (status == -ESTALE) {
if (!S_ISDIR(inode->i_mode))
@@ -2906,7 +2996,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
}
goto out;
}
- nfs_access_add_cache(inode, &cache);
+ nfs_access_add_cache(inode, &cache, cred);
out_cached:
cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 79323b5dab0c..aed0748fd6ec 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr {
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
- struct nfs4_pnfs_ds *ds_list[1];
+ struct nfs4_pnfs_ds *ds_list[];
};
struct nfs4_filelayout_segment {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 86c3f7e69ec4..acf4b88889dc 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -136,9 +136,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_stripe_indices;
}
- dsaddr = kzalloc(sizeof(*dsaddr) +
- (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
- gfp_flags);
+ dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags);
if (!dsaddr)
goto out_err_free_stripe_indices;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 12f6acb483bb..2de7c56a1fbe 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -373,6 +373,7 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
+void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
umode_t, bool);
int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7100514d306b..1597eef40d54 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -220,7 +220,8 @@ static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
task_flags);
}
-static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
@@ -231,7 +232,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred,
+ .rpc_cred = cred,
};
int status = -ENOMEM;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 8b21ff1be717..32129446beca 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -46,7 +46,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
- u32 bitmask[3];
+ u32 bitmask[NFS_BITMASK_SZ];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
@@ -69,9 +69,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
return status;
}
- memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
- if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
- bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+ nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, inode,
+ NFS_INO_INVALID_BLOCKS);
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
@@ -1044,13 +1043,14 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct inode *src_inode = file_inode(src_f);
struct inode *dst_inode = file_inode(dst_f);
struct nfs_server *server = NFS_SERVER(dst_inode);
+ __u32 dst_bitmask[NFS_BITMASK_SZ];
struct nfs42_clone_args args = {
.src_fh = NFS_FH(src_inode),
.dst_fh = NFS_FH(dst_inode),
.src_offset = src_offset,
.dst_offset = dst_offset,
.count = count,
- .dst_bitmask = server->cache_consistency_bitmask,
+ .dst_bitmask = dst_bitmask,
};
struct nfs42_clone_res res = {
.server = server,
@@ -1079,6 +1079,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
if (!res.dst_fattr)
return -ENOMEM;
+ nfs4_bitmask_set(dst_bitmask, server->cache_consistency_bitmask,
+ dst_inode, NFS_INO_INVALID_BLOCKS);
+
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
trace_nfs4_clone(src_inode, dst_inode, &args, status);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ed5eaca6801e..84f39b6f1b1e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -260,8 +260,8 @@ struct nfs4_state_maintenance_ops {
};
struct nfs4_mig_recovery_ops {
- int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
- struct page *, const struct cred *);
+ int (*get_locations)(struct nfs_server *, struct nfs_fh *,
+ struct nfs4_fs_locations *, struct page *, const struct cred *);
int (*fsid_present)(struct inode *, const struct cred *);
};
@@ -280,7 +280,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
-
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+ size_t salen, struct net *net, int port);
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
extern int nfs4_async_handle_error(struct rpc_task *task,
@@ -302,8 +303,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
-extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
- struct page *page, const struct cred *);
+extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *,
+ struct nfs4_fs_locations *,
+ struct page *page, const struct cred *);
extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
struct dentry *,
@@ -315,6 +317,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
+extern void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
+ struct inode *inode, unsigned long cache_validity);
extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct inode *inode);
extern int update_open_stateid(struct nfs4_state *state,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d8b5a250ca05..47a6cf892c95 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1343,8 +1343,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
}
nfs_put_client(clp);
- if (server->nfs_client->cl_hostname == NULL)
+ if (server->nfs_client->cl_hostname == NULL) {
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ if (server->nfs_client->cl_hostname == NULL)
+ return -ENOMEM;
+ }
nfs_server_insert_lists(server);
return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root)));
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 873342308dc0..3680c8da510c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry,
return 0;
}
-static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen, struct net *net)
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+ size_t salen, struct net *net, int port)
{
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
- if (ret < 0)
- ret = 0;
+ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
+ if (ret == 0) {
+ ret = nfs_dns_resolve_name(net, string, len, sa, salen);
+ if (ret < 0)
+ ret = 0;
+ }
+ } else if (port) {
+ rpc_set_port(sa, port);
}
return ret;
}
@@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc,
nfs_parse_server_name(buf->data, buf->len,
&ctx->nfs_server.address,
sizeof(ctx->nfs_server._address),
- fc->net_ns);
+ fc->net_ns, 0);
if (ctx->nfs_server.addrlen == 0)
continue;
@@ -496,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, net);
+ sap, addr_bufsize, net, 0);
if (salen == 0)
continue;
rpc_set_port(sap, NFS_PORT);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ee3bc79f6ca3..b18f31b2c9e7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -108,10 +108,6 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
-static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
- const __u32 *src, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
@@ -2653,9 +2649,8 @@ static int nfs4_opendata_access(const struct cred *cred,
} else if ((fmode & FMODE_READ) && !opendata->file_created)
mask = NFS4_ACCESS_READ;
- cache.cred = cred;
nfs_access_set_mask(&cache, opendata->o_res.access_result);
- nfs_access_add_cache(state->inode, &cache);
+ nfs_access_add_cache(state->inode, &cache, cred);
flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP;
if ((mask & ~cache.mask & flags) == 0)
@@ -3670,7 +3665,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (!nfs4_have_delegation(inode, FMODE_READ)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
- inode, server, NULL);
+ inode, 0);
calldata->arg.bitmask = calldata->arg.bitmask_store;
} else
calldata->arg.bitmask = NULL;
@@ -3841,7 +3836,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_FH_EXPIRE_TYPE |
FATTR4_WORD0_LINK_SUPPORT |
FATTR4_WORD0_SYMLINK_SUPPORT |
- FATTR4_WORD0_ACLSUPPORT;
+ FATTR4_WORD0_ACLSUPPORT |
+ FATTR4_WORD0_CASE_INSENSITIVE |
+ FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
@@ -3870,10 +3867,16 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
+ if (res.case_insensitive)
+ server->caps |= NFS_CAP_CASE_INSENSITIVE;
+ if (res.case_preserving)
+ server->caps |= NFS_CAP_CASE_PRESERVING;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
server->caps |= NFS_CAP_SECURITY_LABEL;
#endif
+ if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS)
+ server->caps |= NFS_CAP_FS_LOCATIONS;
if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
@@ -3932,6 +3935,114 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
return err;
}
+static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
+ struct nfs_client *clp,
+ struct nfs_server *server)
+{
+ int i;
+
+ for (i = 0; i < location->nservers; i++) {
+ struct nfs4_string *srv_loc = &location->servers[i];
+ struct sockaddr addr;
+ size_t addrlen;
+ struct xprt_create xprt_args = {
+ .ident = 0,
+ .net = clp->cl_net,
+ };
+ struct nfs4_add_xprt_data xprtdata = {
+ .clp = clp,
+ };
+ struct rpc_add_xprt_test rpcdata = {
+ .add_xprt_test = clp->cl_mvops->session_trunk,
+ .data = &xprtdata,
+ };
+ char *servername = NULL;
+
+ if (!srv_loc->len)
+ continue;
+
+ addrlen = nfs_parse_server_name(srv_loc->data, srv_loc->len,
+ &addr, sizeof(addr),
+ clp->cl_net, server->port);
+ if (!addrlen)
+ return;
+ xprt_args.dstaddr = &addr;
+ xprt_args.addrlen = addrlen;
+ servername = kmalloc(srv_loc->len + 1, GFP_KERNEL);
+ if (!servername)
+ return;
+ memcpy(servername, srv_loc->data, srv_loc->len);
+ servername[srv_loc->len] = '\0';
+ xprt_args.servername = servername;
+
+ xprtdata.cred = nfs4_get_clid_cred(clp);
+ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+ rpc_clnt_setup_test_and_add_xprt,
+ &rpcdata);
+ if (xprtdata.cred)
+ put_cred(xprtdata.cred);
+ kfree(servername);
+ }
+}
+
+static int _nfs4_discover_trunking(struct nfs_server *server,
+ struct nfs_fh *fhandle)
+{
+ struct nfs4_fs_locations *locations = NULL;
+ struct page *page;
+ const struct cred *cred;
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ int status = -ENOMEM, i;
+
+ cred = ops->get_state_renewal_cred(clp);
+ if (cred == NULL) {
+ cred = nfs4_get_clid_cred(clp);
+ if (cred == NULL)
+ return -ENOKEY;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+ if (page == NULL || locations == NULL)
+ goto out;
+
+ status = nfs4_proc_get_locations(server, fhandle, locations, page,
+ cred);
+ if (status)
+ goto out;
+
+ for (i = 0; i < locations->nlocations; i++)
+ test_fs_location_for_trunking(&locations->locations[i], clp,
+ server);
+out:
+ if (page)
+ __free_page(page);
+ kfree(locations);
+ return status;
+}
+
+static int nfs4_discover_trunking(struct nfs_server *server,
+ struct nfs_fh *fhandle)
+{
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
+ struct nfs_client *clp = server->nfs_client;
+ int err = 0;
+
+ if (!nfs4_has_session(clp))
+ goto out;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_discover_trunking(server, fhandle),
+ &exception);
+ } while (exception.retry);
+out:
+ return err;
+}
+
static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
@@ -4441,7 +4552,8 @@ static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
return err;
}
-static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_accessargs args = {
@@ -4455,7 +4567,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
.rpc_argp = &args,
.rpc_resp = &res,
- .rpc_cred = entry->cred,
+ .rpc_cred = cred,
};
int status = 0;
@@ -4475,14 +4587,15 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
return status;
}
-static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
- err = _nfs4_proc_access(inode, entry);
+ err = _nfs4_proc_access(inode, entry, cred);
trace_nfs4_access(inode, err);
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
@@ -4663,8 +4776,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg,
nfs_fattr_init(res->dir_attr);
- if (inode)
+ if (inode) {
nfs4_inode_return_delegation(inode);
+ nfs_d_prune_case_insensitive_aliases(inode);
+ }
}
static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
@@ -4730,6 +4845,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 0;
if (task->tk_status == 0) {
+ nfs_d_prune_case_insensitive_aliases(d_inode(data->old_dentry));
if (new_dir != old_dir) {
/* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo,
@@ -5422,14 +5538,14 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
- struct inode *inode, struct nfs_server *server,
- struct nfs4_label *label)
+void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
+ struct inode *inode, unsigned long cache_validity)
{
- unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ struct nfs_server *server = NFS_SERVER(inode);
unsigned int i;
memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
+ cache_validity |= READ_ONCE(NFS_I(inode)->cache_validity);
if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE;
@@ -5441,8 +5557,6 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
if (cache_validity & NFS_INO_INVALID_NLINK)
bitmask[1] |= FATTR4_WORD1_NUMLINKS;
- if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
- bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
@@ -5469,7 +5583,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
} else {
nfs4_bitmask_set(hdr->args.bitmask_store,
server->cache_consistency_bitmask,
- hdr->inode, server, NULL);
+ hdr->inode, NFS_INO_INVALID_BLOCKS);
hdr->args.bitmask = hdr->args.bitmask_store;
}
@@ -6507,8 +6621,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
nfs4_bitmask_set(data->args.bitmask_store,
- server->cache_consistency_bitmask, inode, server,
- NULL);
+ server->cache_consistency_bitmask, inode, 0);
data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
@@ -7611,7 +7724,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
const char *key, const void *buf,
size_t buflen, int flags)
{
- struct nfs_access_entry cache;
+ u32 mask;
int ret;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
@@ -7626,8 +7739,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
* do a cached access check for the XA* flags to possibly avoid
* doing an RPC and getting EACCES back.
*/
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XAWRITE))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAWRITE))
return -EACCES;
}
@@ -7648,14 +7761,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *key, void *buf, size_t buflen)
{
- struct nfs_access_entry cache;
+ u32 mask;
ssize_t ret;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
return -EOPNOTSUPP;
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XAREAD))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAREAD))
return -EACCES;
}
@@ -7680,13 +7793,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
ssize_t ret, size;
char *buf;
size_t buflen;
- struct nfs_access_entry cache;
+ u32 mask;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
return 0;
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XALIST))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XALIST))
return 0;
}
@@ -7818,18 +7931,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
* appended to this compound to identify the client ID which is
* performing recovery.
*/
-static int _nfs40_proc_get_locations(struct inode *inode,
+static int _nfs40_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
.clientid = server->nfs_client->cl_clientid,
- .fh = NFS_FH(inode),
+ .fh = fhandle,
.page = page,
.bitmask = bitmask,
.migration = 1, /* skip LOOKUP */
@@ -7875,17 +7988,17 @@ static int _nfs40_proc_get_locations(struct inode *inode,
* When the client supports GETATTR(fs_locations_info), it can
* be plumbed in here.
*/
-static int _nfs41_proc_get_locations(struct inode *inode,
+static int _nfs41_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
- .fh = NFS_FH(inode),
+ .fh = fhandle,
.page = page,
.bitmask = bitmask,
.migration = 1, /* skip LOOKUP */
@@ -7934,11 +8047,11 @@ static int _nfs41_proc_get_locations(struct inode *inode,
* -NFS4ERR_LEASE_MOVED is returned if the server still has leases
* from this client that require migration recovery.
*/
-int nfs4_proc_get_locations(struct inode *inode,
+int nfs4_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
@@ -7951,10 +8064,11 @@ int nfs4_proc_get_locations(struct inode *inode,
(unsigned long long)server->fsid.major,
(unsigned long long)server->fsid.minor,
clp->cl_hostname);
- nfs_display_fhandle(NFS_FH(inode), __func__);
+ nfs_display_fhandle(fhandle, __func__);
do {
- status = ops->get_locations(inode, locations, page, cred);
+ status = ops->get_locations(server, fhandle, locations, page,
+ cred);
if (status != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, status, &exception);
@@ -10423,6 +10537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.free_client = nfs4_free_client,
.create_server = nfs4_create_server,
.clone_server = nfs_clone_server,
+ .discover_trunking = nfs4_discover_trunking,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index d88b779f9dd0..f5a62c0d999b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2098,7 +2098,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
}
inode = d_inode(server->super->s_root);
- result = nfs4_proc_get_locations(inode, locations, page, cred);
+ result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
+ page, cred);
if (result) {
dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
__func__, result);
@@ -2106,6 +2107,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
}
result = -NFS4ERR_NXIO;
+ if (!locations->nlocations)
+ goto out;
+
if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
dprintk("<-- %s: No fs_locations data, migration skipped\n",
__func__);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 69862bf6db00..8e70b92df4cc 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3533,6 +3533,42 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
return 0;
}
+static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_INSENSITIVE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CASE_INSENSITIVE)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE;
+ }
+ dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
+static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_PRESERVING - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CASE_PRESERVING)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING;
+ }
+ dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
{
__be32 *p;
@@ -3696,8 +3732,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
if (unlikely(!p))
goto out_eio;
n = be32_to_cpup(p);
- if (n <= 0)
- goto out_eio;
for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
u32 m;
struct nfs4_fs_location *loc;
@@ -4200,10 +4234,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
} else
printk(KERN_WARNING "%s: label too long (%u)!\n",
__func__, len);
+ if (label && label->label)
+ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n",
+ __func__, label->len, (char *)label->label,
+ label->len, label->pi, label->lfs);
}
- if (label && label->label)
- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
- (char *)label->label, label->len, label->pi, label->lfs);
return status;
}
@@ -4412,6 +4447,10 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
goto xdr_error;
if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_case_insensitive(xdr, bitmap, &res->case_insensitive)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_case_preserving(xdr, bitmap, &res->case_preserving)) != 0)
+ goto xdr_error;
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 8cb70755e3c9..a6f740366963 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -142,10 +142,11 @@ static struct attribute *nfs_netns_client_attrs[] = {
&nfs_netns_client_id.attr,
NULL,
};
+ATTRIBUTE_GROUPS(nfs_netns_client);
static struct kobj_type nfs_netns_client_type = {
.release = nfs_netns_client_release,
- .default_attrs = nfs_netns_client_attrs,
+ .default_groups = nfs_netns_client_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = nfs_netns_client_namespace,
};
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b9f27fbcd768..68b020f2002b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1247,7 +1247,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
clear_ncl(d_inode(dentry));
dget(dentry);
ret = simple_unlink(dir, dentry);
- d_delete(dentry);
+ d_drop(dentry);
+ fsnotify_unlink(dir, dentry);
dput(dentry);
WARN_ON_ONCE(ret);
}
@@ -1338,8 +1339,8 @@ void nfsd_client_rmdir(struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
WARN_ON_ONCE(ret);
+ d_drop(dentry);
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
inode_unlock(dir);
}
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index bc3e2cd4117f..063dd16d75b5 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -195,12 +195,12 @@ void nilfs_page_bug(struct page *page)
*/
static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
{
- struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
+ struct buffer_head *dbh, *dbufs, *sbh;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
BUG_ON(PageWriteback(dst));
- sbh = sbufs = page_buffers(src);
+ sbh = page_buffers(src);
if (!page_has_buffers(dst))
create_empty_buffers(dst, sbh->b_size, 0);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index d5ebebb034ff..829dd4a61b66 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -19,7 +19,25 @@
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
-int dir_notify_enable __read_mostly = 1;
+static int dir_notify_enable __read_mostly = 1;
+#ifdef CONFIG_SYSCTL
+static struct ctl_table dnotify_sysctls[] = {
+ {
+ .procname = "dir-notify-enable",
+ .data = &dir_notify_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+static void __init dnotify_sysctl_init(void)
+{
+ register_sysctl_init("fs", dnotify_sysctls);
+}
+#else
+#define dnotify_sysctl_init() do { } while (0)
+#endif
static struct kmem_cache *dnotify_struct_cache __read_mostly;
static struct kmem_cache *dnotify_mark_cache __read_mostly;
@@ -386,6 +404,7 @@ static int __init dnotify_init(void)
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
+ dnotify_sysctl_init();
return 0;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 73a3e939c921..1026f67b1d1e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -59,7 +59,7 @@ static int fanotify_max_queued_events __read_mostly;
static long ft_zero = 0;
static long ft_int_max = INT_MAX;
-struct ctl_table fanotify_table[] = {
+static struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
@@ -88,6 +88,13 @@ struct ctl_table fanotify_table[] = {
},
{ }
};
+
+static void __init fanotify_sysctls_init(void)
+{
+ register_sysctl("fs/fanotify", fanotify_table);
+}
+#else
+#define fanotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
/*
@@ -151,7 +158,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
struct fanotify_event *event)
{
size_t event_len = FAN_EVENT_METADATA_LEN;
- struct fanotify_info *info;
int fh_len;
int dot_len = 0;
@@ -161,8 +167,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
if (fanotify_is_error_event(event->mask))
event_len += FANOTIFY_ERROR_INFO_LEN;
- info = fanotify_event_info(event);
-
if (fanotify_event_has_any_dir_fh(event)) {
event_len += fanotify_dir_name_info_len(event);
} else if ((info_mode & FAN_REPORT_NAME) &&
@@ -1743,6 +1747,7 @@ static int __init fanotify_user_setup(void)
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
FANOTIFY_DEFAULT_MAX_GROUPS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks;
+ fanotify_sysctls_init();
return 0;
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 29fca3284bb5..54583f62dc44 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
static long it_zero = 0;
static long it_int_max = INT_MAX;
-struct ctl_table inotify_table[] = {
+static struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
@@ -87,6 +87,14 @@ struct ctl_table inotify_table[] = {
},
{ }
};
+
+static void __init inotify_sysctls_init(void)
+{
+ register_sysctl("fs/inotify", inotify_table);
+}
+
+#else
+#define inotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
@@ -849,6 +857,7 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
+ inotify_sysctls_init();
return 0;
}
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 8aaec7e0804e..fb825059d488 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -11,7 +11,6 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
-#include <linux/cleancache.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/kernel.h>
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f89ffcbd585f..a17be1618bf7 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -379,7 +379,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
/* lowest node as master node to make negotiate decision. */
- master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
+ master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES);
if (master_node == o2nm_this_node()) {
if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9f90fc9551e1..c4eccd499db8 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1045,7 +1045,7 @@ static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
int status, ret = 0, i;
char *p;
- if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES)
goto bail;
qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
@@ -1217,7 +1217,7 @@ static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
struct o2nm_node *node;
int ret = 0, status, count, i;
- if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES)
goto bail;
qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9b88219febb5..227da5b1b6ab 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -861,7 +861,7 @@ lookup:
* to see if there are any nodes that still need to be
* considered. these will not appear in the mle nodemap
* but they might own this lockres. wait on them. */
- bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES) {
mlog(0, "%s: res %.*s, At least one node (%d) "
"to recover before lock mastery can begin\n",
@@ -912,7 +912,7 @@ redo_request:
dlm_wait_for_recovery(dlm);
spin_lock(&dlm->spinlock);
- bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES) {
mlog(0, "%s: res %.*s, At least one node (%d) "
"to recover before lock mastery can begin\n",
@@ -1079,7 +1079,7 @@ recheck:
sleep = 1;
/* have all nodes responded? */
if (voting_done && !*blocked) {
- bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (dlm->node_num <= bit) {
/* my node number is lowest.
* now tell other nodes that I am
@@ -1234,8 +1234,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
} else {
mlog(ML_ERROR, "node down! %d\n", node);
if (blocked) {
- int lowest = find_next_bit(mle->maybe_map,
- O2NM_MAX_NODES, 0);
+ int lowest = find_first_bit(mle->maybe_map,
+ O2NM_MAX_NODES);
/* act like it was never there */
clear_bit(node, mle->maybe_map);
@@ -1795,7 +1795,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
"MLE for it! (%.*s)\n", assert->node_idx,
namelen, name);
} else {
- int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0);
+ int bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (bit >= O2NM_MAX_NODES) {
/* not necessarily an error, though less likely.
* could be master just re-asserting. */
@@ -2521,7 +2521,7 @@ static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
}
if (!nonlocal) {
- node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ node_ref = find_first_bit(res->refmap, O2NM_MAX_NODES);
if (node_ref >= O2NM_MAX_NODES)
return 0;
}
@@ -3303,7 +3303,7 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
BUG_ON(mle->type != DLM_MLE_BLOCK);
spin_lock(&mle->spinlock);
- bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (bit != dead_node) {
mlog(0, "mle found, but dead node %u would not have been "
"master\n", dead_node);
@@ -3542,7 +3542,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
spin_lock(&dlm->master_lock);
BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
- BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
+ BUG_ON((find_first_bit(dlm->domain_map, O2NM_MAX_NODES) < O2NM_MAX_NODES));
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_master_hash(dlm, i);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5cd5f7511dac..52ad342fec3e 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -451,7 +451,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
- bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index c350bd4df770..eedf07ca23ca 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -92,7 +92,7 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
return 0;
/* Another node has this resource with this node as the master */
- bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(res->refmap, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES)
return 0;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 16f1bfc407f2..dd77b7aaabf5 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -661,42 +661,8 @@ static struct ctl_table ocfs2_nm_table[] = {
{ }
};
-static struct ctl_table ocfs2_mod_table[] = {
- {
- .procname = "nm",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_nm_table
- },
- { }
-};
-
-static struct ctl_table ocfs2_kern_table[] = {
- {
- .procname = "ocfs2",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_mod_table
- },
- { }
-};
-
-static struct ctl_table ocfs2_root_table[] = {
- {
- .procname = "fs",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_kern_table
- },
- { }
-};
-
static struct ctl_table_header *ocfs2_table_header;
-
/*
* Initialization
*/
@@ -705,7 +671,7 @@ static int __init ocfs2_stack_glue_init(void)
{
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
- ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
+ ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table);
if (!ocfs2_table_header) {
printk(KERN_ERR
"ocfs2 stack glue: unable to register sysctl\n");
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 481017e1dac5..166c8918c825 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
- int ret = 1;
+ int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;
- if (!buffer_jbd(bg_bh))
+ jh = jbd2_journal_grab_journal_head(bg_bh);
+ if (!jh)
return 1;
- jbd_lock_bh_journal_head(bg_bh);
- if (buffer_jbd(bg_bh)) {
- jh = bh2jh(bg_bh);
- spin_lock(&jh->b_state_lock);
- bg = (struct ocfs2_group_desc *) jh->b_committed_data;
- if (bg)
- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
- else
- ret = 1;
- spin_unlock(&jh->b_state_lock);
- }
- jbd_unlock_bh_journal_head(bg_bh);
+ spin_lock(&jh->b_state_lock);
+ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
+ if (bg)
+ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+ else
+ ret = 1;
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
return ret;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 1286b88b6fa1..2772dec9dcea 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -25,7 +25,6 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
-#include <linux/cleancache.h>
#include <linux/signal.h>
#define CREATE_TRACE_POINTS
@@ -2283,7 +2282,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog_errno(status);
goto bail;
}
- cleancache_init_shared_fs(sb);
osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
if (!osb->ocfs2_wq) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 6d4342bad9f1..cc28623a67b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,7 @@
#include <linux/fcntl.h>
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
+#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -50,13 +51,13 @@
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
-unsigned int pipe_max_size = 1048576;
+static unsigned int pipe_max_size = 1048576;
/* Maximum allocatable pages per user. Hard limit is unset by default, soft
* matches default values.
*/
-unsigned long pipe_user_pages_hard;
-unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+static unsigned long pipe_user_pages_hard;
+static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
/*
* We use head and tail indices that aren't masked off, except at the point of
@@ -1428,6 +1429,60 @@ static struct file_system_type pipe_fs_type = {
.kill_sb = kill_anon_super,
};
+#ifdef CONFIG_SYSCTL
+static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
+{
+ if (write) {
+ unsigned int val;
+
+ val = round_pipe_size(*lvalp);
+ if (val == 0)
+ return -EINVAL;
+
+ *valp = val;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long) val;
+ }
+
+ return 0;
+}
+
+static int proc_dopipe_max_size(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_dopipe_max_size_conv, NULL);
+}
+
+static struct ctl_table fs_pipe_sysctls[] = {
+ {
+ .procname = "pipe-max-size",
+ .data = &pipe_max_size,
+ .maxlen = sizeof(pipe_max_size),
+ .mode = 0644,
+ .proc_handler = proc_dopipe_max_size,
+ },
+ {
+ .procname = "pipe-user-pages-hard",
+ .data = &pipe_user_pages_hard,
+ .maxlen = sizeof(pipe_user_pages_hard),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "pipe-user-pages-soft",
+ .data = &pipe_user_pages_soft,
+ .maxlen = sizeof(pipe_user_pages_soft),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ { }
+};
+#endif
+
static int __init init_pipe_fs(void)
{
int err = register_filesystem(&pipe_fs_type);
@@ -1439,6 +1494,9 @@ static int __init init_pipe_fs(void)
unregister_filesystem(&pipe_fs_type);
}
}
+#ifdef CONFIG_SYSCTL
+ register_sysctl_init("fs", fs_pipe_sysctls);
+#endif
return err;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 43a7abde9e42..fd8b0c12b2cb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -92,6 +92,7 @@
#include <linux/string_helpers.h>
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include "internal.h"
@@ -102,6 +103,8 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
if (p->flags & PF_WQ_WORKER)
wq_worker_comm(tcomm, sizeof(tcomm), p);
+ else if (p->flags & PF_KTHREAD)
+ get_kthread_comm(tcomm, sizeof(tcomm), p);
else
__get_task_comm(tcomm, sizeof(tcomm), p);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 13eda8de2998..d654ce7150fd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -670,10 +670,10 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
/************************************************************************/
/* permission checks */
-static int proc_fd_access_allowed(struct inode *inode)
+static bool proc_fd_access_allowed(struct inode *inode)
{
struct task_struct *task;
- int allowed = 0;
+ bool allowed = false;
/* Allow access to a task's file descriptors if it is us or we
* may use ptrace attach to the process and find out that
* information.
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5b78739e60e4..f2132407e133 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -791,12 +791,6 @@ void proc_remove(struct proc_dir_entry *de)
}
EXPORT_SYMBOL(proc_remove);
-void *PDE_DATA(const struct inode *inode)
-{
- return __PDE_DATA(inode);
-}
-EXPORT_SYMBOL(PDE_DATA);
-
/*
* Pull a user buffer into memory and pass it to the file's write handler if
* one is supplied. The ->write() method is permitted to modify the
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 599eb724ff2d..f84355c5a36d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -650,6 +650,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
return NULL;
}
+ inode->i_private = de->data;
inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 03415f3fb3a8..06a80f78433d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -115,11 +115,6 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
return PROC_I(inode)->pde;
}
-static inline void *__PDE_DATA(const struct inode *inode)
-{
- return PDE(inode)->data;
-}
-
static inline struct pid *proc_pid(const struct inode *inode)
{
return PROC_I(inode)->pid;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 39b823ab2564..e1cfeda397f3 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -138,7 +138,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* @parent: The parent directory in which to create.
* @ops: The seq_file ops with which to read the file.
* @write: The write method with which to 'modify' the file.
- * @data: Data for retrieval by PDE_DATA().
+ * @data: Data for retrieval by pde_data().
*
* Create a network namespaced proc file in the @parent directory with the
* specified @name and @mode that allows reading of a file that displays a
@@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* modified by the @write function. @write should return 0 on success.
*
* The @data value is accessible from the @show and @write functions by calling
- * PDE_DATA() on the file inode. The network namespace must be accessed by
+ * pde_data() on the file inode. The network namespace must be accessed by
* calling seq_file_net() on the seq_file struct.
*/
struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode,
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single);
* @parent: The parent directory in which to create.
* @show: The seqfile show method with which to read the file.
* @write: The write method with which to 'modify' the file.
- * @data: Data for retrieval by PDE_DATA().
+ * @data: Data for retrieval by pde_data().
*
* Create a network-namespaced proc file in the @parent directory with the
* specified @name and @mode that allows reading of a file that displays a
@@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single);
* modified by the @write function. @write should return 0 on success.
*
* The @data value is accessible from the @show and @write functions by calling
- * PDE_DATA() on the file inode. The network namespace must be accessed by
+ * pde_data() on the file inode. The network namespace must be accessed by
* calling seq_file_single_net() on the seq_file struct.
*/
struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode,
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5d66faecd4ef..7d9cfc730bd4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include <linux/mount.h>
+#include <linux/kmemleak.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -25,15 +26,32 @@ static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
/* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX, 65535 };
EXPORT_SYMBOL(sysctl_vals);
+const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
+EXPORT_SYMBOL_GPL(sysctl_long_vals);
+
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
{ }
};
+/**
+ * register_sysctl_mount_point() - registers a sysctl mount point
+ * @path: path for the mount point
+ *
+ * Used to create a permanently empty directory to serve as mount point.
+ * There are some subtle but important permission checks this allows in the
+ * case of unprivileged mounts.
+ */
+struct ctl_table_header *register_sysctl_mount_point(const char *path)
+{
+ return register_sysctl(path, sysctl_mount_point);
+}
+EXPORT_SYMBOL(register_sysctl_mount_point);
+
static bool is_empty_dir(struct ctl_table_header *head)
{
return head->ctl_table[0].child == sysctl_mount_point;
@@ -163,7 +181,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
else {
pr_err("sysctl duplicate entry: ");
sysctl_print_dir(head->parent);
- pr_cont("/%s\n", entry->procname);
+ pr_cont("%s\n", entry->procname);
return -EEXIST;
}
}
@@ -1020,8 +1038,8 @@ failed:
if (IS_ERR(subdir)) {
pr_err("sysctl could not get directory: ");
sysctl_print_dir(dir);
- pr_cont("/%*.*s %ld\n",
- namelen, namelen, name, PTR_ERR(subdir));
+ pr_cont("%*.*s %ld\n", namelen, namelen, name,
+ PTR_ERR(subdir));
}
drop_sysctl_table(&dir->header);
if (new)
@@ -1053,7 +1071,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
struct ctl_dir *dir;
int ret;
- ret = 0;
spin_lock(&sysctl_lock);
root = (*pentry)->data;
set = lookup_header_set(root);
@@ -1384,6 +1401,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
}
EXPORT_SYMBOL(register_sysctl);
+/**
+ * __register_sysctl_init() - register sysctl table to path
+ * @path: path name for sysctl base
+ * @table: This is the sysctl table that needs to be registered to the path
+ * @table_name: The name of sysctl table, only used for log printing when
+ * registration fails
+ *
+ * The sysctl interface is used by userspace to query or modify at runtime
+ * a predefined value set on a variable. These variables however have default
+ * values pre-set. Code which depends on these variables will always work even
+ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
+ * ability to query or modify the sysctls dynamically at run time. Chances of
+ * register_sysctl() failing on init are extremely low, and so for both reasons
+ * this function does not return any error as it is used by initialization code.
+ *
+ * Context: Can only be called after your respective sysctl base path has been
+ * registered. So for instance, most base directories are registered early on
+ * init before init levels are processed through proc_sys_init() and
+ * sysctl_init_bases().
+ */
+void __init __register_sysctl_init(const char *path, struct ctl_table *table,
+ const char *table_name)
+{
+ struct ctl_table_header *hdr = register_sysctl(path, table);
+
+ if (unlikely(!hdr)) {
+ pr_err("failed when register_sysctl %s to %s\n", table_name, path);
+ return;
+ }
+ kmemleak_not_leak(hdr);
+}
+
static char *append_path(const char *path, char *pos, const char *name)
{
int namelen;
@@ -1597,6 +1646,15 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
}
EXPORT_SYMBOL(register_sysctl_table);
+int __register_sysctl_base(struct ctl_table *base_table)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_table(base_table);
+ kmemleak_not_leak(hdr);
+ return 0;
+}
+
static void put_links(struct ctl_table_header *header)
{
struct ctl_table_set *root_set = &sysctl_table_root.default_set;
@@ -1626,7 +1684,7 @@ static void put_links(struct ctl_table_header *header)
else {
pr_err("sysctl link missing during unregister: ");
sysctl_print_dir(parent);
- pr_cont("/%s\n", name);
+ pr_cont("%s\n", name);
}
}
}
@@ -1710,7 +1768,7 @@ int __init proc_sys_init(void)
proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
proc_sys_root->nlink = 0;
- return sysctl_init();
+ return sysctl_init_bases();
}
struct sysctl_alias {
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 509f85148fee..702754dd1daf 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -65,8 +65,6 @@ static size_t vmcoredd_orig_sz;
static DECLARE_RWSEM(vmcore_cb_rwsem);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
-/* Whether we had a surprise unregistration of a callback. */
-static bool vmcore_cb_unstable;
/* Whether the vmcore has been opened once. */
static bool vmcore_opened;
@@ -94,10 +92,8 @@ void unregister_vmcore_cb(struct vmcore_cb *cb)
* very unusual (e.g., forced driver removal), but we cannot stop
* unregistering.
*/
- if (vmcore_opened) {
+ if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback unregistration\n");
- vmcore_cb_unstable = true;
- }
up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
@@ -108,8 +104,6 @@ static bool pfn_is_ram(unsigned long pfn)
bool ret = true;
lockdep_assert_held_read(&vmcore_cb_rwsem);
- if (unlikely(vmcore_cb_unstable))
- return false;
list_for_each_entry(cb, &vmcore_cb_list, next) {
if (unlikely(!cb->pfn_is_ram))
@@ -581,7 +575,7 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
* looping over all pages without a reason.
*/
down_read(&vmcore_cb_rwsem);
- if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
+ if (!list_empty(&vmcore_cb_list))
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
else
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h
index 7ccadcbe684b..38b8fc514860 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smbfs_common/smb2pdu.h
@@ -449,7 +449,7 @@ struct smb2_netname_neg_context {
*/
/* Flags */
-#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001
+#define SMB2_ACCEPT_TRANSPORT_LEVEL_SECURITY 0x00000001
struct smb2_transport_capabilities_context {
__le16 ContextType; /* 6 */
diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h
index 926f87cd6af0..d51939c43ad7 100644
--- a/fs/smbfs_common/smbfsctl.h
+++ b/fs/smbfs_common/smbfsctl.h
@@ -95,8 +95,10 @@
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
#define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */
+#define FSCTL_SET_INTEGRITY_INFORMATION_EXT 0x00090380
#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
+#define FSCTL_REFS_STREAM_SNAPSHOT_MANAGEMENT 0x00090440
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
diff --git a/fs/super.c b/fs/super.c
index a6405d44d4ca..7af820ba5ad5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -31,7 +31,6 @@
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
-#include <linux/cleancache.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
@@ -260,7 +259,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_gran = 1000000000;
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
- s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
@@ -330,7 +328,6 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- cleancache_invalidate_fs(s);
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
diff --git a/fs/sysctls.c b/fs/sysctls.c
new file mode 100644
index 000000000000..c701273c9432
--- /dev/null
+++ b/fs/sysctls.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * /proc/sys/fs shared sysctls
+ *
+ * These sysctls are shared between different filesystems.
+ */
+#include <linux/init.h>
+#include <linux/sysctl.h>
+
+static struct ctl_table fs_shared_sysctls[] = {
+ {
+ .procname = "overflowuid",
+ .data = &fs_overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &fs_overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
+ },
+ { }
+};
+
+DECLARE_SYSCTL_BASE(fs, fs_shared_sysctls);
+
+static int __init init_fs_sysctls(void)
+{
+ return register_sysctl_base(fs);
+}
+
+early_initcall(init_fs_sysctls);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d6b7a50736b..ea8f6cd01f50 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode)
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
- struct writeback_control udf_wbc = {
- .sync_mode = WB_SYNC_NONE,
- .nr_to_write = 1,
- };
WARN_ON_ONCE(!inode_is_locked(inode));
if (!iinfo->i_lenAlloc) {
@@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ set_page_dirty(page);
+ unlock_page(page);
up_write(&iinfo->i_data_sem);
- err = inode->i_data.a_ops->writepage(page, &udf_wbc);
+ err = filemap_fdatawrite(inode->i_mapping);
if (err) {
/* Restore everything back so that we don't lose data... */
lock_page(page);
@@ -317,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode)
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
inode->i_data.a_ops = &udf_adinicb_aops;
+ iinfo->i_lenAlloc = inode->i_size;
up_write(&iinfo->i_data_sem);
}
put_page(page);
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index c43877c8a279..505533c43a92 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -93,21 +93,6 @@ struct getbmapx {
#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */
/*
- * Structure for XFS_IOC_FSSETDM.
- * For use by backup and restore programs to set the XFS on-disk inode
- * fields di_dmevmask and di_dmstate. These must be set to exactly and
- * only values previously obtained via xfs_bulkstat! (Specifically the
- * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
- */
-#ifndef HAVE_FSDMIDATA
-struct fsdmidata {
- __u32 fsd_dmevmask; /* corresponds to di_dmevmask */
- __u16 fsd_padding;
- __u16 fsd_dmstate; /* corresponds to di_dmstate */
-};
-#endif
-
-/*
* File segment locking set data type for 64 bit access.
* Also used for all the RESV/FREE interfaces.
*/
@@ -562,16 +547,10 @@ typedef struct xfs_fsop_handlereq {
/*
* Compound structures for passing args through Handle Request interfaces
- * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle
- * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and
- * XFS_IOC_ATTRMULTI_BY_HANDLE
+ * xfs_attrlist_by_handle, xfs_attrmulti_by_handle
+ * - ioctls: XFS_IOC_ATTRLIST_BY_HANDLE, and XFS_IOC_ATTRMULTI_BY_HANDLE
*/
-typedef struct xfs_fsop_setdm_handlereq {
- struct xfs_fsop_handlereq hreq; /* handle information */
- struct fsdmidata __user *data; /* DMAPI data */
-} xfs_fsop_setdm_handlereq_t;
-
/*
* Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface.
*
@@ -781,15 +760,15 @@ struct xfs_scrub_metadata {
* For 'documentation' purposed more than anything else,
* the "cmd #" field reflects the IRIX fcntl number.
*/
-#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
-#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
+/* XFS_IOC_ALLOCSP ------- deprecated 10 */
+/* XFS_IOC_FREESP -------- deprecated 11 */
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
-#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
-#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
+/* XFS_IOC_ALLOCSP64 ----- deprecated 36 */
+/* XFS_IOC_FREESP64 ------ deprecated 37 */
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
-#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata)
+/* XFS_IOC_FSSETDM ------- deprecated 39 */
#define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64)
#define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64)
#define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64)
@@ -831,7 +810,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FREEZE _IOWR('X', 119, int) /* aka FIFREEZE */
#define XFS_IOC_THAW _IOWR('X', 120, int) /* aka FITHAW */
-#define XFS_IOC_FSSETDM_BY_HANDLE _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
+/* XFS_IOC_FSSETDM_BY_HANDLE -- deprecated 121 */
#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 797ea0c8b14e..d4a387d3d0ce 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -771,8 +771,7 @@ int
xfs_alloc_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len,
- int alloc_type)
+ xfs_off_t len)
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
@@ -865,8 +864,8 @@ xfs_alloc_file_space(
goto error;
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, alloc_type, 0, imapp,
- &nimaps);
+ allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+ &nimaps);
if (error)
goto error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 9f993168b55b..24b37d211f1d 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -54,7 +54,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
/* preallocation and hole punch interface */
int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len, int alloc_type);
+ xfs_off_t len);
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8d4c5ca261bd..22ad207bedf4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1051,8 +1051,7 @@ xfs_file_fallocate(
}
if (!xfs_is_always_cow_inode(ip)) {
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
+ error = xfs_alloc_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 2e718728986f..9644f938990c 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1854,28 +1854,20 @@ xfs_inodegc_worker(
}
/*
- * Force all currently queued inode inactivation work to run immediately, and
- * wait for the work to finish. Two pass - queue all the work first pass, wait
- * for it in a second pass.
+ * Force all currently queued inode inactivation work to run immediately and
+ * wait for the work to finish.
*/
void
xfs_inodegc_flush(
struct xfs_mount *mp)
{
- struct xfs_inodegc *gc;
- int cpu;
-
if (!xfs_is_inodegc_enabled(mp))
return;
trace_xfs_inodegc_flush(mp, __return_address);
xfs_inodegc_queue_all(mp);
-
- for_each_online_cpu(cpu) {
- gc = per_cpu_ptr(mp->m_inodegc, cpu);
- flush_work(&gc->work);
- }
+ flush_workqueue(mp->m_inodegc_wq);
}
/*
@@ -1886,18 +1878,12 @@ void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
- struct xfs_inodegc *gc;
- int cpu;
-
if (!xfs_clear_inodegc_enabled(mp))
return;
xfs_inodegc_queue_all(mp);
+ drain_workqueue(mp->m_inodegc_wq);
- for_each_online_cpu(cpu) {
- gc = per_cpu_ptr(mp->m_inodegc, cpu);
- cancel_work_sync(&gc->work);
- }
trace_xfs_inodegc_stop(mp, __return_address);
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8ea47a9d5aad..03a6198c97f6 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -627,87 +627,6 @@ xfs_attrmulti_by_handle(
return error;
}
-int
-xfs_ioc_space(
- struct file *filp,
- xfs_flock64_t *bf)
-{
- struct inode *inode = file_inode(filp);
- struct xfs_inode *ip = XFS_I(inode);
- struct iattr iattr;
- enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR;
- uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
- int error;
-
- if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
- return -EPERM;
-
- if (!(filp->f_mode & FMODE_WRITE))
- return -EBADF;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- if (xfs_is_always_cow_inode(ip))
- return -EOPNOTSUPP;
-
- if (filp->f_flags & O_DSYNC)
- flags |= XFS_PREALLOC_SYNC;
- if (filp->f_mode & FMODE_NOCMTIME)
- flags |= XFS_PREALLOC_INVISIBLE;
-
- error = mnt_want_write_file(filp);
- if (error)
- return error;
-
- xfs_ilock(ip, iolock);
- error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
- if (error)
- goto out_unlock;
- inode_dio_wait(inode);
-
- switch (bf->l_whence) {
- case 0: /*SEEK_SET*/
- break;
- case 1: /*SEEK_CUR*/
- bf->l_start += filp->f_pos;
- break;
- case 2: /*SEEK_END*/
- bf->l_start += XFS_ISIZE(ip);
- break;
- default:
- error = -EINVAL;
- goto out_unlock;
- }
-
- if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) {
- error = -EINVAL;
- goto out_unlock;
- }
-
- if (bf->l_start > XFS_ISIZE(ip)) {
- error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip),
- XFS_BMAPI_PREALLOC);
- if (error)
- goto out_unlock;
- }
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = bf->l_start;
- error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp),
- &iattr);
- if (error)
- goto out_unlock;
-
- error = xfs_update_prealloc_flags(ip, flags);
-
-out_unlock:
- xfs_iunlock(ip, iolock);
- mnt_drop_write_file(filp);
- return error;
-}
-
/* Return 0 on success or positive error */
int
xfs_fsbulkstat_one_fmt(
@@ -1936,6 +1855,15 @@ xfs_fs_eofblocks_from_user(
}
/*
+ * These long-unused ioctls were removed from the official ioctl API in 5.17,
+ * but retain these definitions so that we can log warnings about them.
+ */
+#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
+#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
+#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
+#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
+
+/*
* Note: some of the ioctl's return positive numbers as a
* byte count indicating success, such as readlink_by_handle.
* So we don't "sign flip" like most other routines. This means
@@ -1965,13 +1893,11 @@ xfs_file_ioctl(
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64: {
- xfs_flock64_t bf;
-
- if (copy_from_user(&bf, arg, sizeof(bf)))
- return -EFAULT;
- return xfs_ioc_space(filp, &bf);
- }
+ case XFS_IOC_FREESP64:
+ xfs_warn_once(mp,
+ "%s should use fallocate; XFS_IOC_{ALLOC,FREE}SP ioctl unsupported",
+ current->comm);
+ return -ENOTTY;
case XFS_IOC_DIOINFO: {
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct dioattr da;
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 845d3bcab74b..d4abba2c13c1 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -10,12 +10,6 @@ struct xfs_bstat;
struct xfs_ibulk;
struct xfs_inogrp;
-
-extern int
-xfs_ioc_space(
- struct file *filp,
- xfs_flock64_t *bf);
-
int
xfs_ioc_swapext(
xfs_swapext_t *sxp);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 8783af203cfc..004ed2a251e8 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,22 +28,6 @@
#ifdef BROKEN_X86_ALIGNMENT
STATIC int
-xfs_compat_flock64_copyin(
- xfs_flock64_t *bf,
- compat_xfs_flock64_t __user *arg32)
-{
- if (get_user(bf->l_type, &arg32->l_type) ||
- get_user(bf->l_whence, &arg32->l_whence) ||
- get_user(bf->l_start, &arg32->l_start) ||
- get_user(bf->l_len, &arg32->l_len) ||
- get_user(bf->l_sysid, &arg32->l_sysid) ||
- get_user(bf->l_pid, &arg32->l_pid) ||
- copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
- return -EFAULT;
- return 0;
-}
-
-STATIC int
xfs_compat_ioc_fsgeometry_v1(
struct xfs_mount *mp,
compat_xfs_fsop_geom_v1_t __user *arg32)
@@ -445,17 +429,6 @@ xfs_file_compat_ioctl(
switch (cmd) {
#if defined(BROKEN_X86_ALIGNMENT)
- case XFS_IOC_ALLOCSP_32:
- case XFS_IOC_FREESP_32:
- case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32: {
- struct xfs_flock64 bf;
-
- if (xfs_compat_flock64_copyin(&bf, arg))
- return -EFAULT;
- cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- return xfs_ioc_space(filp, &bf);
- }
case XFS_IOC_FSGEOMETRY_V1_32:
return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg);
case XFS_IOC_FSGROWFSDATA_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index 9929482bf358..c14852362fce 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -142,28 +142,6 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq {
_IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
- __s16 l_type;
- __s16 l_whence;
- __s64 l_start __attribute__((packed));
- /* len == 0 means until end of file */
- __s64 l_len __attribute__((packed));
- __s32 l_sysid;
- __u32 l_pid;
- __s32 l_pad[4]; /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
-
typedef struct compat_xfs_fsop_geom_v1 {
__u32 blocksize; /* filesystem (data) block size */
__u32 rtextsize; /* realtime extent size */
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 3d503e74037f..fd7e8fbaeef1 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -285,7 +285,7 @@ do { \
* write-combining memory accesses before this macro with those after it.
*/
#ifndef io_stop_wc
-#define io_stop_wc do { } while (0)
+#define io_stop_wc() do { } while (0)
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index df9b5bc3d282..a47b8a71d6fe 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -20,7 +20,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 5a28629cbf4d..d51beff60375 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -2,83 +2,19 @@
#ifndef _ASM_GENERIC_BITOPS_LE_H_
#define _ASM_GENERIC_BITOPS_LE_H_
-#include <asm-generic/bitops/find.h>
#include <asm/types.h>
#include <asm/byteorder.h>
-#include <linux/swab.h>
#if defined(__LITTLE_ENDIAN)
#define BITOP_LE_SWIZZLE 0
-static inline unsigned long find_next_zero_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
-{
- return find_next_zero_bit(addr, size, offset);
-}
-
-static inline unsigned long find_next_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
-{
- return find_next_bit(addr, size, offset);
-}
-
-static inline unsigned long find_first_zero_bit_le(const void *addr,
- unsigned long size)
-{
- return find_first_zero_bit(addr, size);
-}
-
#elif defined(__BIG_ENDIAN)
#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
-#ifndef find_next_zero_bit_le
-static inline
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
- long size, unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *(const unsigned long *)addr;
-
- if (unlikely(offset >= size))
- return size;
-
- val = swab(val) | ~GENMASK(size - 1, offset);
- return val == ~0UL ? size : ffz(val);
- }
-
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
-}
-#endif
-
-#ifndef find_next_bit_le
-static inline
-unsigned long find_next_bit_le(const void *addr, unsigned
- long size, unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *(const unsigned long *)addr;
-
- if (unlikely(offset >= size))
- return size;
-
- val = swab(val) & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
- }
-
- return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
-}
#endif
-#ifndef find_first_zero_bit_le
-#define find_first_zero_bit_le(addr, size) \
- find_next_zero_bit_le((addr), (size), 0)
-#endif
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
static inline int test_bit_le(int nr, const void *addr)
{
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 02932efad3ab..977bea16cf1b 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#if CONFIG_PGTABLE_LEVELS > 3
+static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (pud_t *)get_zeroed_page(gfp);
+}
+
#ifndef __HAVE_ARCH_PUD_ALLOC_ONE
/**
* pud_alloc_one - allocate a page for PUD-level page table
@@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
*/
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- gfp_t gfp = GFP_PGTABLE_USER;
-
- if (mm == &init_mm)
- gfp = GFP_PGTABLE_KERNEL;
- return (pud_t *)get_zeroed_page(gfp);
+ return __pud_alloc_one(mm, addr);
}
#endif
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
free_page((unsigned long)pud);
}
+#ifndef __HAVE_ARCH_PUD_FREE
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ __pud_free(mm, pud);
+}
+#endif
+
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#ifndef __HAVE_ARCH_PGD_FREE
diff --git a/include/kunit/assert.h b/include/kunit/assert.h
index ad889b539ab3..ccbc36c0b02f 100644
--- a/include/kunit/assert.h
+++ b/include/kunit/assert.h
@@ -10,7 +10,7 @@
#define _KUNIT_ASSERT_H
#include <linux/err.h>
-#include <linux/kernel.h>
+#include <linux/printk.h>
struct kunit;
struct string_stream;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index b83e68dd006f..86892a4fe7c8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -20,8 +20,4 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req,
kiocb_cancel_fn *cancel) { }
#endif /* CONFIG_AIO */
-/* for sysctl: */
-extern unsigned long aio_nr;
-extern unsigned long aio_max_nr;
-
#endif /* __LINUX__AIO_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a241dcf50f39..7dba0847510c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
#include <linux/align.h>
#include <linux/bitops.h>
+#include <linux/find.h>
#include <linux/limits.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -54,12 +55,6 @@ struct device;
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
* bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above
- * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region
- * bitmap_next_set_region(map, &start, &end, nbits) Find next set region
- * bitmap_for_each_clear_region(map, rs, re, start, end)
- * Iterate over all clear regions
- * bitmap_for_each_set_region(map, rs, re, start, end)
- * Iterate over all set regions
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest
@@ -466,14 +461,6 @@ static inline void bitmap_replace(unsigned long *dst,
__bitmap_replace(dst, old, new, mask, nbits);
}
-static inline void bitmap_next_clear_region(unsigned long *bitmap,
- unsigned int *rs, unsigned int *re,
- unsigned int end)
-{
- *rs = find_next_zero_bit(bitmap, end, *rs);
- *re = find_next_bit(bitmap, end, *rs + 1);
-}
-
static inline void bitmap_next_set_region(unsigned long *bitmap,
unsigned int *rs, unsigned int *re,
unsigned int end)
@@ -482,25 +469,6 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
*re = find_next_zero_bit(bitmap, end, *rs + 1);
}
-/*
- * Bitmap region iterators. Iterates over the bitmap between [@start, @end).
- * @rs and @re should be integer variables and will be set to start and end
- * index of the current clear or set region.
- */
-#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \
- for ((rs) = (start), \
- bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \
- (rs) < (re); \
- (rs) = (re) + 1, \
- bitmap_next_clear_region((bitmap), &(rs), &(re), (end)))
-
-#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \
- for ((rs) = (start), \
- bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \
- (rs) < (re); \
- (rs) = (re) + 1, \
- bitmap_next_set_region((bitmap), &(rs), &(re), (end)))
-
/**
* BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
* @n: u64 value
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5e62e2383b7f..7aaed501f768 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -32,40 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w);
*/
#include <asm/bitops.h>
-#define for_each_set_bit(bit, addr, size) \
- for ((bit) = find_first_bit((addr), (size)); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_from(bit, addr, size) \
- for ((bit) = find_next_bit((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-#define for_each_clear_bit(bit, addr, size) \
- for ((bit) = find_first_zero_bit((addr), (size)); \
- (bit) < (size); \
- (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_clear_bit() but use bit as value to start with */
-#define for_each_clear_bit_from(bit, addr, size) \
- for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
-
-/**
- * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
- * @start: bit offset to start search and to store the current iteration offset
- * @clump: location to store copy of current 8-bit clump
- * @bits: bitmap address to base the search on
- * @size: bitmap size in number of bits
- */
-#define for_each_set_clump8(start, clump, bits, size) \
- for ((start) = find_first_clump8(&(clump), (bits), (size)); \
- (start) < (size); \
- (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
-
static inline int get_bitmask_order(unsigned int count)
{
int order;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9c95df26fc26..f35aea98bc35 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1258,6 +1258,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
unsigned long start_time);
+void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
unsigned long bio_start_io_acct(struct bio *bio);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..fa517ae604ad 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -316,7 +316,12 @@ enum bpf_type_flag {
*/
MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
- __BPF_TYPE_LAST_FLAG = MEM_RDONLY,
+ /* MEM was "allocated" from a different helper, and cannot be mixed
+ * with regular non-MEM_ALLOC'ed MEM types.
+ */
+ MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS),
+
+ __BPF_TYPE_LAST_FLAG = MEM_ALLOC,
};
/* Max number of base types. */
@@ -400,7 +405,7 @@ enum bpf_return_type {
RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
- RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 143401d4c9d9..e9993172f892 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
void
bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
-int check_ctx_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno);
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 309acbcb5a8a..6a89ea410e43 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -295,12 +295,13 @@ extern bool libceph_compatible(void *data);
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid);
struct fs_parameter;
struct fc_log;
struct ceph_options *ceph_alloc_options(void);
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l);
+ struct fc_log *l, char delim);
int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
struct fc_log *l);
int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 0e6e9ad3c3bf..ff99ce094cfa 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count);
+ int max_count, int *count, char delim);
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
deleted file mode 100644
index 5f5730c1d324..000000000000
--- a/include/linux/cleancache.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_CLEANCACHE_H
-#define _LINUX_CLEANCACHE_H
-
-#include <linux/fs.h>
-#include <linux/exportfs.h>
-#include <linux/mm.h>
-
-#define CLEANCACHE_NO_POOL -1
-#define CLEANCACHE_NO_BACKEND -2
-#define CLEANCACHE_NO_BACKEND_SHARED -3
-
-#define CLEANCACHE_KEY_MAX 6
-
-/*
- * cleancache requires every file with a page in cleancache to have a
- * unique key unless/until the file is removed/truncated. For some
- * filesystems, the inode number is unique, but for "modern" filesystems
- * an exportable filehandle is required (see exportfs.h)
- */
-struct cleancache_filekey {
- union {
- ino_t ino;
- __u32 fh[CLEANCACHE_KEY_MAX];
- u32 key[CLEANCACHE_KEY_MAX];
- } u;
-};
-
-struct cleancache_ops {
- int (*init_fs)(size_t);
- int (*init_shared_fs)(uuid_t *uuid, size_t);
- int (*get_page)(int, struct cleancache_filekey,
- pgoff_t, struct page *);
- void (*put_page)(int, struct cleancache_filekey,
- pgoff_t, struct page *);
- void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t);
- void (*invalidate_inode)(int, struct cleancache_filekey);
- void (*invalidate_fs)(int);
-};
-
-extern int cleancache_register_ops(const struct cleancache_ops *ops);
-extern void __cleancache_init_fs(struct super_block *);
-extern void __cleancache_init_shared_fs(struct super_block *);
-extern int __cleancache_get_page(struct page *);
-extern void __cleancache_put_page(struct page *);
-extern void __cleancache_invalidate_page(struct address_space *, struct page *);
-extern void __cleancache_invalidate_inode(struct address_space *);
-extern void __cleancache_invalidate_fs(struct super_block *);
-
-#ifdef CONFIG_CLEANCACHE
-#define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
-{
- return mapping->host->i_sb->cleancache_poolid >= 0;
-}
-static inline bool cleancache_fs_enabled(struct page *page)
-{
- return cleancache_fs_enabled_mapping(page->mapping);
-}
-#else
-#define cleancache_enabled (0)
-#define cleancache_fs_enabled(_page) (0)
-#define cleancache_fs_enabled_mapping(_page) (0)
-#endif
-
-/*
- * The shim layer provided by these inline functions allows the compiler
- * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
- * is disabled, to a single global variable check if CONFIG_CLEANCACHE
- * is enabled but no cleancache "backend" has dynamically enabled it,
- * and, for the most frequent cleancache ops, to a single global variable
- * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
- * and a cleancache backend has dynamically enabled cleancache, but the
- * filesystem referenced by that cleancache op has not enabled cleancache.
- * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
- * no measurable performance impact.
- */
-
-static inline void cleancache_init_fs(struct super_block *sb)
-{
- if (cleancache_enabled)
- __cleancache_init_fs(sb);
-}
-
-static inline void cleancache_init_shared_fs(struct super_block *sb)
-{
- if (cleancache_enabled)
- __cleancache_init_shared_fs(sb);
-}
-
-static inline int cleancache_get_page(struct page *page)
-{
- if (cleancache_enabled && cleancache_fs_enabled(page))
- return __cleancache_get_page(page);
- return -1;
-}
-
-static inline void cleancache_put_page(struct page *page)
-{
- if (cleancache_enabled && cleancache_fs_enabled(page))
- __cleancache_put_page(page);
-}
-
-static inline void cleancache_invalidate_page(struct address_space *mapping,
- struct page *page)
-{
- /* careful... page->mapping is NULL sometimes when this is called */
- if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
- __cleancache_invalidate_page(mapping, page);
-}
-
-static inline void cleancache_invalidate_inode(struct address_space *mapping)
-{
- if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
- __cleancache_invalidate_inode(mapping);
-}
-
-static inline void cleancache_invalidate_fs(struct super_block *sb)
-{
- if (cleancache_enabled)
- __cleancache_invalidate_fs(sb);
-}
-
-#endif /* _LINUX_CLEANCACHE_H */
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 78fcd776b185..248a68c668b4 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,10 +14,6 @@ struct core_vma_metadata {
unsigned long dump_size;
};
-extern int core_uses_pid;
-extern char core_pattern[];
-extern unsigned int core_pipe_limit;
-
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
@@ -37,4 +33,10 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
#endif
+#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
+extern void validate_coredump_safety(void);
+#else
+static inline void validate_coredump_safety(void) {}
+#endif
+
#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1e7399fc69c0..64dae70d31f5 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -123,6 +123,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
return 0;
}
+static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+{
+ return 0;
+}
+
+static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return 0;
+}
+
static inline unsigned int cpumask_last(const struct cpumask *srcp)
{
return 0;
@@ -167,7 +178,7 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p) {
- return cpumask_next_and(-1, src1p, src2p);
+ return cpumask_first_and(src1p, src2p);
}
static inline int cpumask_any_distribute(const struct cpumask *srcp)
@@ -196,6 +207,30 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
}
/**
+ * cpumask_first_zero - get the first unset cpu in a cpumask
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if all cpus are set.
+ */
+static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+{
+ return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
+ */
+static inline
+unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
+{
+ return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+}
+
+/**
* cpumask_last - get the last CPU in a cpumask
* @srcp: - the cpumask pointer
*
@@ -586,15 +621,6 @@ static inline void cpumask_copy(struct cpumask *dstp,
#define cpumask_any(srcp) cpumask_first(srcp)
/**
- * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
- * @src1p: the first input
- * @src2p: the second input
- *
- * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
- */
-#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p))
-
-/**
* cpumask_any_and - pick a "random" cpu from *mask1 & *mask2
* @mask1: the first input cpumask
* @mask2: the second input cpumask
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1..f5bba51480b2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -61,16 +61,6 @@ extern const struct qstr empty_name;
extern const struct qstr slash_name;
extern const struct qstr dotdot_name;
-struct dentry_stat_t {
- long nr_dentry;
- long nr_unused;
- long age_limit; /* age in seconds */
- long want_pages; /* pages requested by system */
- long nr_negative; /* # of unused negative dentries */
- long dummy; /* Reserved for future use */
-};
-extern struct dentry_stat_t dentry_stat;
-
/*
* Try to keep struct dentry aligned on 64 byte cachelines (this will
* give reasonable cacheline footprint with larger lines without the
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index af7e6eb50283..3e03d010bd2e 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -9,18 +9,9 @@
#include <uapi/linux/taskstats.h>
-/*
- * Per-task flags relevant to delay accounting
- * maintained privately to avoid exhausting similar flags in sched.h:PF_*
- * Used to set current->delays->flags
- */
-#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
-#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */
-
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
raw_spinlock_t lock;
- unsigned int flags; /* Private per-task flags */
/* For each stat XXX, add following, aligned appropriately
*
@@ -37,13 +28,13 @@ struct task_delay_info {
* associated with the operation is added to XXX_delay.
* XXX_delay contains the accumulated delay time in nanoseconds.
*/
- u64 blkio_start; /* Shared by blkio, swapin */
+ u64 blkio_start;
u64 blkio_delay; /* wait for sync block io completion */
- u64 swapin_delay; /* wait for swapin block io completion */
+ u64 swapin_start;
+ u64 swapin_delay; /* wait for swapin */
u32 blkio_count; /* total count of the number of sync block */
/* io operations performed */
- u32 swapin_count; /* total count of the number of swapin block */
- /* io operations performed */
+ u32 swapin_count; /* total count of swapin */
u64 freepages_start;
u64 freepages_delay; /* wait for memory reclaim */
@@ -51,8 +42,12 @@ struct task_delay_info {
u64 thrashing_start;
u64 thrashing_delay; /* wait for thrashing page */
+ u64 compact_start;
+ u64 compact_delay; /* wait for memory compact */
+
u32 freepages_count; /* total count of memory reclaim */
u32 thrashing_count; /* total count of thrash waits */
+ u32 compact_count; /* total count of memory compact */
};
#endif
@@ -79,26 +74,10 @@ extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
extern void __delayacct_thrashing_start(void);
extern void __delayacct_thrashing_end(void);
-
-static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
-{
- if (p->delays)
- return (p->delays->flags & DELAYACCT_PF_BLKIO);
- else
- return 0;
-}
-
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{
- if (p->delays)
- p->delays->flags |= flag;
-}
-
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{
- if (p->delays)
- p->delays->flags &= ~flag;
-}
+extern void __delayacct_swapin_start(void);
+extern void __delayacct_swapin_end(void);
+extern void __delayacct_compact_start(void);
+extern void __delayacct_compact_end(void);
static inline void delayacct_tsk_init(struct task_struct *tsk)
{
@@ -123,7 +102,6 @@ static inline void delayacct_blkio_start(void)
if (!static_branch_unlikely(&delayacct_key))
return;
- delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
if (current->delays)
__delayacct_blkio_start();
}
@@ -135,7 +113,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
if (p->delays)
__delayacct_blkio_end(p);
- delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
}
static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
@@ -147,33 +124,77 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
static inline void delayacct_freepages_start(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_freepages_start();
}
static inline void delayacct_freepages_end(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_freepages_end();
}
static inline void delayacct_thrashing_start(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_thrashing_start();
}
static inline void delayacct_thrashing_end(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_thrashing_end();
}
+static inline void delayacct_swapin_start(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_swapin_start();
+}
+
+static inline void delayacct_swapin_end(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_swapin_end();
+}
+
+static inline void delayacct_compact_start(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_compact_start();
+}
+
+static inline void delayacct_compact_end(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_compact_end();
+}
+
#else
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{}
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{}
static inline void delayacct_init(void)
{}
static inline void delayacct_tsk_init(struct task_struct *tsk)
@@ -199,6 +220,14 @@ static inline void delayacct_thrashing_start(void)
{}
static inline void delayacct_thrashing_end(void)
{}
+static inline void delayacct_swapin_start(void)
+{}
+static inline void delayacct_swapin_end(void)
+{}
+static inline void delayacct_compact_start(void)
+{}
+static inline void delayacct_compact_end(void)
+{}
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b87c3b85a166..b1d26f9f1c9f 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -29,7 +29,6 @@ struct dnotify_struct {
FS_CREATE | FS_RENAME |\
FS_MOVED_FROM | FS_MOVED_TO)
-extern int dir_notify_enable;
extern void dnotify_flush(struct file *, fl_owner_t);
extern int fcntl_dirnotify(int, struct file *, unsigned long);
diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index e272c3d452ce..54feb64e9b5d 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -43,6 +43,11 @@ struct compat_elf_prpsinfo
__compat_uid_t pr_uid;
__compat_gid_t pr_gid;
compat_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
+ /*
+ * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+ * changed as it is exposed to userspace. We'd better make it hard-coded
+ * here.
+ */
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 957ebec35aad..746e081879a5 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -65,6 +65,11 @@ struct elf_prpsinfo
__kernel_gid_t pr_gid;
pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
/* Lots missing */
+ /*
+ * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+ * changed as it is exposed to userspace. We'd better make it hard-coded
+ * here.
+ */
char pr_fname[16]; /* filename of executable */
char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
};
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index a26f37a27167..11efc45de66a 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -111,7 +111,7 @@ struct ethtool_link_ext_state_info {
enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity;
enum ethtool_link_ext_substate_cable_issue cable_issue;
enum ethtool_link_ext_substate_module module;
- u8 __link_ext_substate;
+ u32 __link_ext_substate;
};
};
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 3afdf339d53c..419cadcd7ff5 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -5,8 +5,6 @@
#include <linux/sysctl.h>
#include <uapi/linux/fanotify.h>
-extern struct ctl_table fanotify_table[]; /* for sysctl */
-
#define FAN_GROUP_FLAG(group, flag) \
((group)->fanotify_data.flags & (flag))
diff --git a/include/linux/find.h b/include/linux/find.h
new file mode 100644
index 000000000000..5bb6db213bcb
--- /dev/null
+++ b/include/linux/find.h
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_FIND_H_
+#define __LINUX_FIND_H_
+
+#ifndef __LINUX_BITMAP_H
+#error only <linux/bitmap.h> can be included directly
+#endif
+
+#include <linux/bitops.h>
+
+extern unsigned long _find_next_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long nbits,
+ unsigned long start, unsigned long invert, unsigned long le);
+extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size);
+extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
+
+#ifndef find_next_bit
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+}
+#endif
+
+#ifndef find_next_and_bit
+/**
+ * find_next_and_bit - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+}
+#endif
+
+#ifndef find_next_zero_bit
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next zero bit
+ * If no bits are zero, returns @size.
+ */
+static inline
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+}
+#endif
+
+#ifndef find_first_bit
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_bit(addr, size);
+}
+#endif
+
+#ifndef find_first_and_bit
+/**
+ * find_first_and_bit - find the first set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_and_bit(addr1, addr2, size);
+}
+#endif
+
+#ifndef find_first_zero_bit
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
+ */
+static inline
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr | ~GENMASK(size - 1, 0);
+
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_first_zero_bit(addr, size);
+}
+#endif
+
+#ifndef find_last_bit
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The number of bits to search
+ *
+ * Returns the bit number of the last set bit, or size.
+ */
+static inline
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __fls(val) : size;
+ }
+
+ return _find_last_bit(addr, size);
+}
+#endif
+
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+ const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+ find_next_clump8((clump), (bits), (size), 0)
+
+#if defined(__LITTLE_ENDIAN)
+
+static inline unsigned long find_next_zero_bit_le(const void *addr,
+ unsigned long size, unsigned long offset)
+{
+ return find_next_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_next_bit_le(const void *addr,
+ unsigned long size, unsigned long offset)
+{
+ return find_next_bit(addr, size, offset);
+}
+
+static inline unsigned long find_first_zero_bit_le(const void *addr,
+ unsigned long size)
+{
+ return find_first_zero_bit(addr, size);
+}
+
+#elif defined(__BIG_ENDIAN)
+
+#ifndef find_next_zero_bit_le
+static inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+}
+#endif
+
+#ifndef find_next_bit_le
+static inline
+unsigned long find_next_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+}
+#endif
+
+#ifndef find_first_zero_bit_le
+#define find_first_zero_bit_le(addr, size) \
+ find_next_zero_bit_le((addr), (size), 0)
+#endif
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), 0); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+#define for_each_clear_bit(bit, addr, size) \
+ for ((bit) = find_next_zero_bit((addr), (size), 0); \
+ (bit) < (size); \
+ (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_clear_bit() but use bit as value to start with */
+#define for_each_clear_bit_from(bit, addr, size) \
+ for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/**
+ * for_each_set_bitrange - iterate over all set bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit)
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_bitrange(b, e, addr, size) \
+ for ((b) = find_next_bit((addr), (size), 0), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_bit((addr), (size), (e) + 1), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_set_bitrange_from - iterate over all set bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit); must be initialized
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_bitrange_from(b, e, addr, size) \
+ for ((b) = find_next_bit((addr), (size), (b)), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_bit((addr), (size), (e) + 1), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_clear_bitrange - iterate over all unset bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first unset bit)
+ * @e: bit offset of end of current bitrange (first set bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_clear_bitrange(b, e, addr, size) \
+ for ((b) = find_next_zero_bit((addr), (size), 0), \
+ (e) = find_next_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_zero_bit((addr), (size), (e) + 1), \
+ (e) = find_next_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit); must be initialized
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_clear_bitrange_from(b, e, addr, size) \
+ for ((b) = find_next_zero_bit((addr), (size), (b)), \
+ (e) = find_next_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_zero_bit((addr), (size), (e) + 1), \
+ (e) = find_next_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
+ * @start: bit offset to start search and to store the current iteration offset
+ * @clump: location to store copy of current 8-bit clump
+ * @bits: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_clump8(start, clump, bits, size) \
+ for ((start) = find_first_clump8(&(clump), (bits), (size)); \
+ (start) < (size); \
+ (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+
+#endif /*__LINUX_FIND_H_ */
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index b07d88c92bb2..a631bac12220 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -7,31 +7,17 @@
#include <linux/bitops.h>
#include <linux/jump_label.h>
-/*
- * Return code to denote that requested number of
- * frontswap pages are unused(moved to page cache).
- * Used in shmem_unuse and try_to_unuse.
- */
-#define FRONTSWAP_PAGES_UNUSED 2
-
struct frontswap_ops {
void (*init)(unsigned); /* this swap type was just swapon'ed */
int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
- struct frontswap_ops *next; /* private pointer to next ops */
};
-extern void frontswap_register_ops(struct frontswap_ops *ops);
-extern void frontswap_shrink(unsigned long);
-extern unsigned long frontswap_curr_pages(void);
-extern void frontswap_writethrough(bool);
-#define FRONTSWAP_HAS_EXCLUSIVE_GETS
-extern void frontswap_tmem_exclusive_gets(bool);
+int frontswap_register_ops(const struct frontswap_ops *ops);
-extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
-extern void __frontswap_init(unsigned type, unsigned long *map);
+extern void frontswap_init(unsigned type, unsigned long *map);
extern int __frontswap_store(struct page *page);
extern int __frontswap_load(struct page *page);
extern void __frontswap_invalidate_page(unsigned, pgoff_t);
@@ -45,11 +31,6 @@ static inline bool frontswap_enabled(void)
return static_branch_unlikely(&frontswap_enabled_key);
}
-static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
-{
- return __frontswap_test(sis, offset);
-}
-
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
@@ -68,11 +49,6 @@ static inline bool frontswap_enabled(void)
return false;
}
-static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
-{
- return false;
-}
-
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
@@ -112,11 +88,4 @@ static inline void frontswap_invalidate_area(unsigned type)
__frontswap_invalidate_area(type);
}
-static inline void frontswap_init(unsigned type, unsigned long *map)
-{
-#ifdef CONFIG_FRONTSWAP
- __frontswap_init(type, map);
-#endif
-}
-
#endif /* _LINUX_FRONTSWAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c8510da6cc6d..f3daaea16554 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -79,15 +79,8 @@ extern void __init inode_init_early(void);
extern void __init files_init(void);
extern void __init files_maxfiles_init(void);
-extern struct files_stat_struct files_stat;
extern unsigned long get_max_files(void);
extern unsigned int sysctl_nr_open;
-extern struct inodes_stat_t inodes_stat;
-extern int leases_enable, lease_break_time;
-extern int sysctl_protected_symlinks;
-extern int sysctl_protected_hardlinks;
-extern int sysctl_protected_fifos;
-extern int sysctl_protected_regular;
typedef __kernel_rwf_t rwf_t;
@@ -1542,11 +1535,6 @@ struct super_block {
const struct dentry_operations *s_d_op; /* default d_op for dentries */
- /*
- * Saved pool identifier for cleancache (-1 means none)
- */
- int cleancache_poolid;
-
struct shrinker s_shrink; /* per-sb shrinker handle */
/* Number of inodes with nlink == 0 but still referenced */
@@ -3533,12 +3521,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos);
struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int proc_nr_dentry(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int proc_nr_inodes(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
int __init list_bdev_fs_names(char *buf, size_t size);
#define __FMODE_EXEC ((__force int) FMODE_EXEC)
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ede50406bcb0..296c5f1d9f35 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
static inline
void fscache_note_page_release(struct fscache_cookie *cookie)
{
+ /* If we've written data to the cache (HAVE_DATA) and there wasn't any
+ * data in the cache when we started (NO_DATA_TO_READ), it may no
+ * longer be true that we can skip reading from the cache - so clear
+ * the flag that causes reads to be skipped.
+ */
if (cookie &&
test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 3a2d7dc3c607..bb8467cd11ae 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -225,16 +225,53 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
}
/*
+ * fsnotify_delete - @dentry was unlinked and unhashed
+ *
+ * Caller must make sure that dentry->d_name is stable.
+ *
+ * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode
+ * as this may be called after d_delete() and old_dentry may be negative.
+ */
+static inline void fsnotify_delete(struct inode *dir, struct inode *inode,
+ struct dentry *dentry)
+{
+ __u32 mask = FS_DELETE;
+
+ if (S_ISDIR(inode->i_mode))
+ mask |= FS_ISDIR;
+
+ fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name,
+ 0);
+}
+
+/**
+ * d_delete_notify - delete a dentry and call fsnotify_delete()
+ * @dentry: The dentry to delete
+ *
+ * This helper is used to guaranty that the unlinked inode cannot be found
+ * by lookup of this name after fsnotify_delete() event has been delivered.
+ */
+static inline void d_delete_notify(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+
+ ihold(inode);
+ d_delete(dentry);
+ fsnotify_delete(dir, inode, dentry);
+ iput(inode);
+}
+
+/*
* fsnotify_unlink - 'name' was unlinked
*
* Caller must make sure that dentry->d_name is stable.
*/
static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
{
- /* Expected to be called before d_delete() */
- WARN_ON_ONCE(d_is_negative(dentry));
+ if (WARN_ON_ONCE(d_is_negative(dentry)))
+ return;
- fsnotify_dirent(dir, dentry, FS_DELETE);
+ fsnotify_delete(dir, d_inode(dentry), dentry);
}
/*
@@ -258,10 +295,10 @@ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry)
*/
static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry)
{
- /* Expected to be called before d_delete() */
- WARN_ON_ONCE(d_is_negative(dentry));
+ if (WARN_ON_ONCE(d_is_negative(dentry)))
+ return;
- fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR);
+ fsnotify_delete(dir, d_inode(dentry), dentry);
}
/*
diff --git a/include/linux/hash.h b/include/linux/hash.h
index ad6fa21d977b..38edaa08f862 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
return val * GOLDEN_RATIO_32;
}
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
{
/* High bits are more random, so use them. */
return __hash_32(val) >> (32 - bits);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 6a24905f6e1e..8d20caa1b268 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,11 +7,8 @@
#ifndef _LINUX_INOTIFY_H
#define _LINUX_INOTIFY_H
-#include <linux/sysctl.h>
#include <uapi/linux/inotify.h>
-extern struct ctl_table inotify_table[]; /* for sysctl */
-
#define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
IN_MOVED_TO | IN_CREATE | IN_DELETE | \
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 055eb203c00e..33f47a996513 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -1,4 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NOTE:
+ *
+ * This header has combined a lot of unrelated to each other stuff.
+ * The process of splitting its content is in progress while keeping
+ * backward compatibility. That's why it's highly recommended NOT to
+ * include this header inside another header file, especially under
+ * generic or architectural include/ directory.
+ */
#ifndef _LINUX_KERNEL_H
#define _LINUX_KERNEL_H
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8c8f7a4d93af..19b884353b15 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -348,12 +348,6 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
DEFINE_INSN_CACHE_OPS(optinsn);
-#ifdef CONFIG_SYSCTL
-extern int sysctl_kprobes_optimization;
-extern int proc_kprobes_optimization_handler(struct ctl_table *table,
- int write, void *buffer,
- size_t *length, loff_t *ppos);
-#endif /* CONFIG_SYSCTL */
extern void wait_for_kprobe_optimizer(void);
#else /* !CONFIG_OPTPROBES */
static inline void wait_for_kprobe_optimizer(void) { }
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index b6c8aafa8db5..3df4ea04716f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -33,6 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
unsigned int cpu,
const char *namefmt);
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk);
bool set_kthread_struct(struct task_struct *p);
void kthread_set_per_cpu(struct task_struct *k, int cpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d89d564f7c19..06912d6b39d0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,9 +309,6 @@ struct kvm_vcpu {
u64 requests;
unsigned long guest_debug;
- int pre_pcpu;
- struct list_head blocked_vcpu_list;
-
struct mutex mutex;
struct kvm_run *run;
diff --git a/include/linux/list.h b/include/linux/list.h
index 6636fc07f918..dd6c2041d09c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -258,8 +258,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
* @list: the entry to test
* @head: the head of the list
*/
-static inline int list_is_first(const struct list_head *list,
- const struct list_head *head)
+static inline int list_is_first(const struct list_head *list, const struct list_head *head)
{
return list->prev == head;
}
@@ -269,13 +268,22 @@ static inline int list_is_first(const struct list_head *list,
* @list: the entry to test
* @head: the head of the list
*/
-static inline int list_is_last(const struct list_head *list,
- const struct list_head *head)
+static inline int list_is_last(const struct list_head *list, const struct list_head *head)
{
return list->next == head;
}
/**
+ * list_is_head - tests whether @list is the list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_head(const struct list_head *list, const struct list_head *head)
+{
+ return list == head;
+}
+
+/**
* list_empty - tests whether a list is empty
* @head: the list to test.
*/
@@ -318,7 +326,7 @@ static inline void list_del_init_careful(struct list_head *entry)
static inline int list_empty_careful(const struct list_head *head)
{
struct list_head *next = smp_load_acquire(&head->next);
- return (next == head) && (next == head->prev);
+ return list_is_head(next, head) && (next == head->prev);
}
/**
@@ -393,10 +401,9 @@ static inline void list_cut_position(struct list_head *list,
{
if (list_empty(head))
return;
- if (list_is_singular(head) &&
- (head->next != entry && head != entry))
+ if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next))
return;
- if (entry == head)
+ if (list_is_head(entry, head))
INIT_LIST_HEAD(list);
else
__list_cut_position(list, head, entry);
@@ -570,7 +577,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
+ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
/**
* list_for_each_continue - continue iteration over a list
@@ -580,7 +587,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* Continue to iterate over a list, continuing after the current position.
*/
#define list_for_each_continue(pos, head) \
- for (pos = pos->next; pos != (head); pos = pos->next)
+ for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next)
/**
* list_for_each_prev - iterate over a list backwards
@@ -588,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev; pos != (head); pos = pos->prev)
+ for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
/**
* list_for_each_safe - iterate over a list safe against removal of list entry
@@ -597,8 +604,9 @@ static inline void list_splice_tail_init(struct list_head *list,
* @head: the head for your list.
*/
#define list_for_each_safe(pos, n, head) \
- for (pos = (head)->next, n = pos->next; pos != (head); \
- pos = n, n = pos->next)
+ for (pos = (head)->next, n = pos->next; \
+ !list_is_head(pos, (head)); \
+ pos = n, n = pos->next)
/**
* list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
@@ -608,7 +616,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_prev_safe(pos, n, head) \
for (pos = (head)->prev, n = pos->prev; \
- pos != (head); \
+ !list_is_head(pos, (head)); \
pos = n, n = pos->prev)
/**
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index a5a724c308d8..819ec92dc2a8 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -80,7 +80,7 @@ LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb,
unsigned long *set_kern_flags)
LSM_HOOK(int, 0, move_mount, const struct path *from_path,
const struct path *to_path)
-LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,
+LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry,
int mode, const struct qstr *name, const char **xattr_name,
void **ctx, u32 *ctxlen)
LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 0661af17a758..808bb4cee230 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -123,7 +123,11 @@ struct cmos_rtc_board_info {
#define RTC_IO_EXTENT_USED RTC_IO_EXTENT
#endif /* ARCH_RTC_LOCATION */
-unsigned int mc146818_get_time(struct rtc_time *time);
+bool mc146818_does_rtc_work(void);
+int mc146818_get_time(struct rtc_time *time);
int mc146818_set_time(struct rtc_time *time);
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+ void *param);
+
#endif /* _MC146818RTC_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4850cc5bf813..db96e10eb8da 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+ spinlock_t *ptl);
void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
int folio_migrate_mapping(struct address_space *mapping,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aa47705191bc..213cc569b192 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1199,6 +1199,26 @@ static inline void folio_put(struct folio *folio)
__put_page(&folio->page);
}
+/**
+ * folio_put_refs - Reduce the reference count on a folio.
+ * @folio: The folio.
+ * @refs: The amount to subtract from the folio's reference count.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately. Do not access the memory or the struct folio
+ * after calling folio_put_refs() unless you can be sure that these weren't
+ * the last references.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context. May be called while holding a spinlock.
+ */
+static inline void folio_put_refs(struct folio *folio, int refs)
+{
+ if (folio_ref_sub_and_test(folio, refs))
+ __put_page(&folio->page);
+}
+
static inline void put_page(struct page *page)
{
struct folio *folio = page_folio(page);
@@ -1486,11 +1506,18 @@ static inline u8 page_kasan_tag(const struct page *page)
static inline void page_kasan_tag_set(struct page *page, u8 tag)
{
- if (kasan_enabled()) {
- tag ^= 0xff;
- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
- }
+ unsigned long old_flags, flags;
+
+ if (!kasan_enabled())
+ return;
+
+ tag ^= 0xff;
+ old_flags = READ_ONCE(page->flags);
+ do {
+ flags = old_flags;
+ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
}
static inline void page_kasan_tag_reset(struct page *page)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9db36dc5d4cf..5140e5feb486 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -261,6 +261,7 @@ static_assert(sizeof(struct page) == sizeof(struct folio));
static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
FOLIO_MATCH(flags, flags);
FOLIO_MATCH(lru, lru);
+FOLIO_MATCH(mapping, mapping);
FOLIO_MATCH(compound_head, lru);
FOLIO_MATCH(index, index);
FOLIO_MATCH(private, private);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d92a7e1a742..7f18a7555dff 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -113,9 +113,6 @@ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
extern dev_t name_to_dev_t(const char *name);
-
-extern unsigned int sysctl_mount_max;
-
extern bool path_is_mountpoint(const struct path *path);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3213c7227b59..e490b84732d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2548,6 +2548,7 @@ struct packet_type {
struct net_device *);
bool (*id_match)(struct packet_type *ptype,
struct sock *sk);
+ struct net *af_packet_net;
void *af_packet_priv;
struct list_head list;
};
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 00835bacd236..02aa49323d1d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -61,7 +61,9 @@
struct nfs_access_entry {
struct rb_node rb_node;
struct list_head lru;
- const struct cred * cred;
+ kuid_t fsuid;
+ kgid_t fsgid;
+ struct group_info *group_info;
__u32 mask;
struct rcu_head rcu_head;
};
@@ -395,7 +397,7 @@ extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fa
extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_getattr(struct user_namespace *, const struct path *,
struct kstat *, u32, unsigned int);
-extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *);
extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
extern int nfs_permission(struct user_namespace *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
@@ -532,8 +534,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
struct nfs_fattr *fattr);
extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
extern void nfs_access_zap_cache(struct inode *inode);
-extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res,
- bool may_block);
+extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
+ u32 *mask, bool may_block);
/*
* linux/fs/nfs/symlink.c
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 77b2dba27bbb..ca0959e51e81 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -266,6 +266,8 @@ struct nfs_server {
#define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
#define NFS_CAP_LGOPEN (1U << 5)
+#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
+#define NFS_CAP_CASE_PRESERVING (1U << 7)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
@@ -282,5 +284,5 @@ struct nfs_server {
#define NFS_CAP_COPY_NOTIFY (1U << 27)
#define NFS_CAP_XATTR (1U << 28)
#define NFS_CAP_READ_PLUS (1U << 29)
-
+#define NFS_CAP_FS_LOCATIONS (1U << 30)
#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 967a0098f0a9..728cb0c1f0b6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1194,6 +1194,8 @@ struct nfs4_server_caps_res {
u32 has_links;
u32 has_symlinks;
u32 fh_expire_type;
+ u32 case_insensitive;
+ u32 case_preserving;
};
#define NFS4_PATHNAME_MAXCOMPONENTS 512
@@ -1737,7 +1739,7 @@ struct nfs_rpc_ops {
struct nfs_fh *, struct nfs_fattr *);
int (*lookupp) (struct inode *, struct nfs_fh *,
struct nfs_fattr *);
- int (*access) (struct inode *, struct nfs_access_entry *);
+ int (*access) (struct inode *, struct nfs_access_entry *, const struct cred *);
int (*readlink)(struct inode *, struct page *, unsigned int,
unsigned int);
int (*create) (struct inode *, struct dentry *,
@@ -1795,6 +1797,7 @@ struct nfs_rpc_ops {
struct nfs_server *(*create_server)(struct fs_context *);
struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
+ int (*discover_trunking)(struct nfs_server *, struct nfs_fh *);
};
/*
diff --git a/include/linux/of.h b/include/linux/of.h
index ff143a027abc..2dc77430a91a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -364,18 +364,12 @@ extern const struct of_device_id *of_match_node(
const struct of_device_id *matches, const struct device_node *node);
extern int of_modalias_node(struct device_node *node, char *modalias, int len);
extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args);
-extern struct device_node *of_parse_phandle(const struct device_node *np,
- const char *phandle_name,
- int index);
-extern int of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name, const char *cells_name, int index,
- struct of_phandle_args *out_args);
+extern int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name, const char *cells_name, int cell_count,
+ int index, struct of_phandle_args *out_args);
extern int of_parse_phandle_with_args_map(const struct device_node *np,
const char *list_name, const char *stem_name, int index,
struct of_phandle_args *out_args);
-extern int of_parse_phandle_with_fixed_args(const struct device_node *np,
- const char *list_name, int cells_count, int index,
- struct of_phandle_args *out_args);
extern int of_count_phandle_with_args(const struct device_node *np,
const char *list_name, const char *cells_name);
@@ -416,130 +410,6 @@ extern int of_detach_node(struct device_node *);
#define of_match_ptr(_ptr) (_ptr)
-/**
- * of_property_read_u8_array - Find and read an array of u8 from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 8-bit value(s) from
- * it.
- *
- * dts entry of array should be like:
- * ``property = /bits/ 8 <0x50 0x60 0x70>;``
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u8 value can be decoded.
- */
-static inline int of_property_read_u8_array(const struct device_node *np,
- const char *propname,
- u8 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u8_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
-/**
- * of_property_read_u16_array - Find and read an array of u16 from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 16-bit value(s) from
- * it.
- *
- * dts entry of array should be like:
- * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u16 value can be decoded.
- */
-static inline int of_property_read_u16_array(const struct device_node *np,
- const char *propname,
- u16 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u16_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
-/**
- * of_property_read_u32_array - Find and read an array of 32 bit integers
- * from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 32-bit value(s) from
- * it.
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u32 value can be decoded.
- */
-static inline int of_property_read_u32_array(const struct device_node *np,
- const char *propname,
- u32 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u32_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
-/**
- * of_property_read_u64_array - Find and read an array of 64 bit integers
- * from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 64-bit value(s) from
- * it.
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u64 value can be decoded.
- */
-static inline int of_property_read_u64_array(const struct device_node *np,
- const char *propname,
- u64 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u64_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
/*
* struct property *prop;
* const __be32 *p;
@@ -734,32 +604,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np,
return -ENOSYS;
}
-static inline int of_property_read_u8_array(const struct device_node *np,
- const char *propname, u8 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
-static inline int of_property_read_u16_array(const struct device_node *np,
- const char *propname, u16 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
-static inline int of_property_read_u32_array(const struct device_node *np,
- const char *propname,
- u32 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
-static inline int of_property_read_u64_array(const struct device_node *np,
- const char *propname,
- u64 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
static inline int of_property_read_u32_index(const struct device_node *np,
const char *propname, u32 index, u32 *out_value)
{
@@ -865,18 +709,12 @@ static inline int of_property_read_string_helper(const struct device_node *np,
return -ENOSYS;
}
-static inline struct device_node *of_parse_phandle(const struct device_node *np,
- const char *phandle_name,
- int index)
-{
- return NULL;
-}
-
-static inline int of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name,
- const char *cells_name,
- int index,
- struct of_phandle_args *out_args)
+static inline int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count,
+ int index,
+ struct of_phandle_args *out_args)
{
return -ENOSYS;
}
@@ -890,13 +728,6 @@ static inline int of_parse_phandle_with_args_map(const struct device_node *np,
return -ENOSYS;
}
-static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
- const char *list_name, int cells_count, int index,
- struct of_phandle_args *out_args)
-{
- return -ENOSYS;
-}
-
static inline int of_count_phandle_with_args(const struct device_node *np,
const char *list_name,
const char *cells_name)
@@ -1078,6 +909,117 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ
}
/**
+ * of_parse_phandle - Resolve a phandle property to a device_node pointer
+ * @np: Pointer to device node holding phandle property
+ * @phandle_name: Name of property holding a phandle value
+ * @index: For properties holding a table of phandles, this is the index into
+ * the table
+ *
+ * Return: The device_node pointer with refcount incremented. Use
+ * of_node_put() on it when done.
+ */
+static inline struct device_node *of_parse_phandle(const struct device_node *np,
+ const char *phandle_name,
+ int index)
+{
+ struct of_phandle_args args;
+
+ if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
+ index, &args))
+ return NULL;
+
+ return args.np;
+}
+
+/**
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
+ * @np: pointer to a device tree node containing a list
+ * @list_name: property name that contains a list
+ * @cells_name: property name that specifies phandles' arguments count
+ * @index: index of a phandle to parse out
+ * @out_args: optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->np
+ * pointer.
+ *
+ * Example::
+ *
+ * phandle1: node1 {
+ * #list-cells = <2>;
+ * };
+ *
+ * phandle2: node2 {
+ * #list-cells = <1>;
+ * };
+ *
+ * node3 {
+ * list = <&phandle1 1 2 &phandle2 3>;
+ * };
+ *
+ * To get a device_node of the ``node2`` node you may call this:
+ * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
+ */
+static inline int of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int index,
+ struct of_phandle_args *out_args)
+{
+ int cell_count = -1;
+
+ /* If cells_name is NULL we assume a cell count of 0 */
+ if (!cells_name)
+ cell_count = 0;
+
+ return __of_parse_phandle_with_args(np, list_name, cells_name,
+ cell_count, index, out_args);
+}
+
+/**
+ * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
+ * @np: pointer to a device tree node containing a list
+ * @list_name: property name that contains a list
+ * @cell_count: number of argument cells following the phandle
+ * @index: index of a phandle to parse out
+ * @out_args: optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->np
+ * pointer.
+ *
+ * Example::
+ *
+ * phandle1: node1 {
+ * };
+ *
+ * phandle2: node2 {
+ * };
+ *
+ * node3 {
+ * list = <&phandle1 0 2 &phandle2 2 3>;
+ * };
+ *
+ * To get a device_node of the ``node2`` node you may call this:
+ * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
+ */
+static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
+ const char *list_name,
+ int cell_count,
+ int index,
+ struct of_phandle_args *out_args)
+{
+ return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
+ index, out_args);
+}
+
+/**
* of_property_count_u8_elems - Count the number of u8 elements in a property
*
* @np: device node from which the property value is to be read.
@@ -1236,6 +1178,130 @@ static inline bool of_property_read_bool(const struct device_node *np,
return prop ? true : false;
}
+/**
+ * of_property_read_u8_array - Find and read an array of u8 from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 8-bit value(s) from
+ * it.
+ *
+ * dts entry of array should be like:
+ * ``property = /bits/ 8 <0x50 0x60 0x70>;``
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u8 value can be decoded.
+ */
+static inline int of_property_read_u8_array(const struct device_node *np,
+ const char *propname,
+ u8 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u8_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
+/**
+ * of_property_read_u16_array - Find and read an array of u16 from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 16-bit value(s) from
+ * it.
+ *
+ * dts entry of array should be like:
+ * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u16 value can be decoded.
+ */
+static inline int of_property_read_u16_array(const struct device_node *np,
+ const char *propname,
+ u16 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u16_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
+/**
+ * of_property_read_u32_array - Find and read an array of 32 bit integers
+ * from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 32-bit value(s) from
+ * it.
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u32 value can be decoded.
+ */
+static inline int of_property_read_u32_array(const struct device_node *np,
+ const char *propname,
+ u32 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u32_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
+/**
+ * of_property_read_u64_array - Find and read an array of 64 bit integers
+ * from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 64-bit value(s) from
+ * it.
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u64 value can be decoded.
+ */
+static inline int of_property_read_u64_array(const struct device_node *np,
+ const char *propname,
+ u64 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u64_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
static inline int of_property_read_u8(const struct device_node *np,
const char *propname,
u8 *out_value)
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index dda8d5868c81..67b1246f136b 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -111,6 +111,7 @@ static_assert(offsetof(struct pagevec, pages) ==
static inline void folio_batch_init(struct folio_batch *fbatch)
{
fbatch->nr = 0;
+ fbatch->percpu_pvec_drained = false;
}
static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ae4004e7957e..f1ec5ad1351c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -94,10 +94,7 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
extern enum pcpu_fc pcpu_chosen_fc;
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
- size_t align);
-typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
-typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
@@ -111,15 +108,13 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn);
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+void __init pcpu_populate_pte(unsigned long addr);
extern int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn);
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 117f230bcdfd..733649184b27 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -693,18 +693,6 @@ struct perf_event {
u64 total_time_running;
u64 tstamp;
- /*
- * timestamp shadows the actual context timing but it can
- * be safely used in NMI interrupt context. It reflects the
- * context time as it was when the event was last scheduled in,
- * or when ctx_sched_in failed to schedule the event because we
- * run out of PMC.
- *
- * ctx_time already accounts for ctx->timestamp. Therefore to
- * compute ctx_time for a sample, simply add perf_clock().
- */
- u64 shadow_ctx_time;
-
struct perf_event_attr attr;
u16 header_size;
u16 id_header_size;
@@ -852,6 +840,7 @@ struct perf_event_context {
*/
u64 time;
u64 timestamp;
+ u64 timeoffset;
/*
* These fields let us detect when two contexts have both
@@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern {
struct perf_cgroup_info {
u64 time;
u64 timestamp;
+ u64 timeoffset;
+ int active;
};
struct perf_cgroup {
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7c7e627503d2..07481bb87d4e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -86,4 +86,9 @@ extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
void pidhash_init(void);
void pid_idr_init(void);
+static inline bool task_is_in_init_pid_ns(struct task_struct *tsk)
+{
+ return task_active_pid_ns(tsk) == &init_pid_ns;
+}
+
#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index fc5642431b92..c00c618ef290 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -238,10 +238,6 @@ void pipe_lock(struct pipe_inode_info *);
void pipe_unlock(struct pipe_inode_info *);
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
-extern unsigned int pipe_max_size;
-extern unsigned long pipe_user_pages_hard;
-extern unsigned long pipe_user_pages_soft;
-
/* Wait for a pipe to be readable/writable while dropping the pipe lock */
void pipe_wait_readable(struct pipe_inode_info *);
void pipe_wait_writable(struct pipe_inode_info *);
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 1cdc32b1f1b0..a9e0e1c2d1f2 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -8,12 +8,10 @@
#include <linux/wait.h>
#include <linux/string.h>
#include <linux/fs.h>
-#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <uapi/linux/poll.h>
#include <uapi/linux/eventpoll.h>
-extern struct ctl_table epoll_table[]; /* for sysctl */
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
additional memory. */
#ifdef __clang__
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9497f6b98339..1522df223c0f 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -183,10 +183,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
extern int printk_delay_msec;
extern int dmesg_restrict;
-extern int
-devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf,
- size_t *lenp, loff_t *ppos);
-
extern void wake_up_klogd(void);
char *log_buf_addr_get(void);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 069c7fd95396..81d6e4ec2294 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -110,7 +110,16 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t,
struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops);
extern void proc_set_size(struct proc_dir_entry *, loff_t);
extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t);
-extern void *PDE_DATA(const struct inode *);
+
+/*
+ * Obtain the private data passed by user through proc_create_data() or
+ * related.
+ */
+static inline void *pde_data(const struct inode *inode)
+{
+ return inode->i_private;
+}
+
extern void *proc_get_parent_data(const struct inode *);
extern void proc_remove(struct proc_dir_entry *);
extern void remove_proc_entry(const char *, struct proc_dir_entry *);
@@ -178,12 +187,20 @@ static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
#define proc_create_seq(name, mode, parent, ops) ({NULL;})
#define proc_create_single(name, mode, parent, show) ({NULL;})
#define proc_create_single_data(name, mode, parent, show, data) ({NULL;})
-#define proc_create(name, mode, parent, proc_ops) ({NULL;})
-#define proc_create_data(name, mode, parent, proc_ops, data) ({NULL;})
+
+static inline struct proc_dir_entry *
+proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent,
+ const struct proc_ops *proc_ops)
+{ return NULL; }
+
+static inline struct proc_dir_entry *
+proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent,
+ const struct proc_ops *proc_ops, void *data)
+{ return NULL; }
static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {}
static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {}
-static inline void *PDE_DATA(const struct inode *inode) {BUG(); return NULL;}
+static inline void *pde_data(const struct inode *inode) {BUG(); return NULL;}
static inline void *proc_get_parent_data(const struct inode *inode) { BUG(); return NULL; }
static inline void proc_remove(struct proc_dir_entry *de) {}
diff --git a/include/linux/psi.h b/include/linux/psi.h
index a70ca833c6d7..7f7d1d88c3bb 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -25,18 +25,17 @@ void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-
-#ifdef CONFIG_CGROUPS
-int psi_cgroup_alloc(struct cgroup *cgrp);
-void psi_cgroup_free(struct cgroup *cgrp);
-void cgroup_move_task(struct task_struct *p, struct css_set *to);
-
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res);
-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
+void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);
+
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgrp);
+void psi_cgroup_free(struct cgroup *cgrp);
+void cgroup_move_task(struct task_struct *p, struct css_set *to);
#endif
#else /* CONFIG_PSI */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 516c0fe836fd..1a3cef26d129 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -141,9 +141,6 @@ struct psi_trigger {
* events to one per window
*/
u64 last_event_time;
-
- /* Refcounting to prevent premature destruction */
- struct kref refcount;
};
struct psi_group {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 18ebd39c9487..fd692b4a41d5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -91,7 +91,7 @@ extern bool qid_valid(struct kqid qid);
*
* When there is no mapping defined for the user-namespace, type,
* qid tuple an invalid kqid is returned. Callers are expected to
- * test for and handle handle invalid kqids being returned.
+ * test for and handle invalid kqids being returned.
* Invalid kqids may be tested for using qid_valid().
*/
static inline struct kqid make_kqid(struct user_namespace *from,
diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index c11c9db5825c..60f3453be23e 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -4,6 +4,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
struct ref_tracker;
@@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
spin_lock_init(&dir->lock);
dir->quarantine_avail = quarantine_count;
refcount_set(&dir->untracked, 1);
+ stack_depot_init();
}
void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 2c0ad417ce3c..8f416c5e929e 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -55,6 +55,12 @@ do { \
#define write_lock(lock) _raw_write_lock(lock)
#define read_lock(lock) _raw_read_lock(lock)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define write_lock_nested(lock, subclass) _raw_write_lock_nested(lock, subclass)
+#else
+#define write_lock_nested(lock, subclass) _raw_write_lock(lock)
+#endif
+
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
#define read_lock_irqsave(lock, flags) \
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index f1db6f17c4fb..dceb0a59b692 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -17,6 +17,7 @@
void __lockfunc _raw_read_lock(rwlock_t *lock) __acquires(lock);
void __lockfunc _raw_write_lock(rwlock_t *lock) __acquires(lock);
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) __acquires(lock);
void __lockfunc _raw_read_lock_bh(rwlock_t *lock) __acquires(lock);
void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock);
void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires(lock);
@@ -209,6 +210,13 @@ static inline void __raw_write_lock(rwlock_t *lock)
LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
}
+static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+ preempt_disable();
+ rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
+}
+
#endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */
static inline void __raw_write_unlock(rwlock_t *lock)
diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
index 49c1f3842ed5..8544ff05e594 100644
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -28,6 +28,7 @@ extern void rt_read_lock(rwlock_t *rwlock);
extern int rt_read_trylock(rwlock_t *rwlock);
extern void rt_read_unlock(rwlock_t *rwlock);
extern void rt_write_lock(rwlock_t *rwlock);
+extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
extern int rt_write_trylock(rwlock_t *rwlock);
extern void rt_write_unlock(rwlock_t *rwlock);
@@ -83,6 +84,15 @@ static __always_inline void write_lock(rwlock_t *rwlock)
rt_write_lock(rwlock);
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass)
+{
+ rt_write_lock_nested(rwlock, subclass);
+}
+#else
+#define write_lock_nested(lock, subclass) rt_write_lock(((void)(subclass), (lock)))
+#endif
+
static __always_inline void write_lock_bh(rwlock_t *rwlock)
{
local_bh_disable();
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -416,6 +416,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
}
/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users);
+
+/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
* @depth: New number of bits to resize to.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a6a2db5f85ac..f5b2be39a78c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -274,8 +274,13 @@ struct task_group;
#define get_current_state() READ_ONCE(current->__state)
-/* Task command name length: */
-#define TASK_COMM_LEN 16
+/*
+ * Define the task command name length as enum, then it can be visible to
+ * BPF programs.
+ */
+enum {
+ TASK_COMM_LEN = 16,
+};
extern void scheduler_tick(void);
@@ -614,10 +619,6 @@ struct sched_dl_entity {
* task has to wait for a replenishment to be performed at the
* next firing of dl_timer.
*
- * @dl_boosted tells if we are boosted due to DI. If so we are
- * outside bandwidth enforcement mechanism (but only until we
- * exit the critical section);
- *
* @dl_yielded tells if task gave up the CPU before consuming
* all its available runtime during the last job.
*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 304f431178fd..c19dd5a2c05c 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,20 +7,8 @@
struct ctl_table;
#ifdef CONFIG_DETECT_HUNG_TASK
-
-#ifdef CONFIG_SMP
-extern unsigned int sysctl_hung_task_all_cpu_backtrace;
-#else
-#define sysctl_hung_task_all_cpu_backtrace 0
-#endif /* CONFIG_SMP */
-
-extern int sysctl_hung_task_check_count;
-extern unsigned int sysctl_hung_task_panic;
+/* used for hung_task and block/ */
extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
#else
/* Avoid need for ifdefs elsewhere in the code */
enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 72dbb44a4573..88cc16444b43 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -209,7 +209,7 @@ static const struct file_operations __name ## _fops = { \
#define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
- return single_open(file, __name ## _show, PDE_DATA(inode)); \
+ return single_open(file, __name ## _show, pde_data(inode)); \
} \
\
static const struct proc_ops __name ## _proc_ops = { \
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 166158b6e917..e65b80ed09e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -83,8 +83,7 @@ extern void shmem_unlock_mapping(struct address_space *mapping);
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
-extern int shmem_unuse(unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse);
+int shmem_unuse(unsigned int type);
extern bool shmem_is_huge(struct vm_area_struct *vma,
struct inode *inode, pgoff_t index);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bf11e1fbd69b..8a636e678902 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -318,7 +318,7 @@ enum skb_drop_reason {
SKB_DROP_REASON_NO_SOCKET,
SKB_DROP_REASON_PKT_TOO_SMALL,
SKB_DROP_REASON_TCP_CSUM,
- SKB_DROP_REASON_TCP_FILTER,
+ SKB_DROP_REASON_SOCKET_FILTER,
SKB_DROP_REASON_UDP_CSUM,
SKB_DROP_REASON_MAX,
};
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d0d188861ad6..b8ba00ccccde 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -59,6 +59,7 @@
#define _raw_spin_lock_nested(lock, subclass) __LOCK(lock)
#define _raw_read_lock(lock) __LOCK(lock)
#define _raw_write_lock(lock) __LOCK(lock)
+#define _raw_write_lock_nested(lock, subclass) __LOCK(lock)
#define _raw_spin_lock_bh(lock) __LOCK_BH(lock)
#define _raw_read_lock_bh(lock) __LOCK_BH(lock)
#define _raw_write_lock_bh(lock) __LOCK_BH(lock)
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index c34b55a6e554..17f992fe6355 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -19,6 +19,22 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
gfp_t gfp_flags, bool can_alloc);
+/*
+ * Every user of stack depot has to call this during its own init when it's
+ * decided that it will be calling stack_depot_save() later.
+ *
+ * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
+ * enabled as part of mm_init(), for subsystems where it's known at compile time
+ * that stack depot will be used.
+ */
+int stack_depot_init(void);
+
+#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
+static inline int stack_depot_early_init(void) { return stack_depot_init(); }
+#else
+static inline int stack_depot_early_init(void) { return 0; }
+#endif
+
depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries, gfp_t gfp_flags);
@@ -30,13 +46,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
void stack_depot_print(depot_stack_handle_t stack);
-#ifdef CONFIG_STACKDEPOT
-int stack_depot_init(void);
-#else
-static inline int stack_depot_init(void)
-{
- return 0;
-}
-#endif /* CONFIG_STACKDEPOT */
-
#endif
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index a59db2f08e76..ccaab2043fcd 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -23,11 +23,6 @@ static inline void stackleak_task_init(struct task_struct *t)
# endif
}
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-int stack_erasing_sysctl(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
static inline void stackleak_task_init(struct task_struct *t) { }
#endif
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5785d909c321..3e8ecdebe601 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -430,15 +430,7 @@ struct platform_hibernation_ops {
#ifdef CONFIG_HIBERNATION
/* kernel/power/snapshot.c */
-extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
-static inline void __init register_nosave_region(unsigned long b, unsigned long e)
-{
- __register_nosave_region(b, e, 0);
-}
-static inline void __init register_nosave_region_late(unsigned long b, unsigned long e)
-{
- __register_nosave_region(b, e, 1);
-}
+extern void register_nosave_region(unsigned long b, unsigned long e);
extern int swsusp_page_is_forbidden(struct page *);
extern void swsusp_set_page_free(struct page *);
extern void swsusp_unset_page_free(struct page *);
@@ -458,7 +450,6 @@ int pfn_is_nosave(unsigned long pfn);
int hibernate_quiet_exec(int (*func)(void *data), void *data);
#else /* CONFIG_HIBERNATION */
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
-static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
static inline void swsusp_set_page_free(struct page *p) {}
static inline void swsusp_unset_page_free(struct page *p) {}
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e06febf62978..54078542134c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,10 +6,7 @@
* these were static in swapfile.c but frontswap.c needs them and we don't
* want to expose them to the dozens of source files that include swap.h
*/
-extern spinlock_t swap_lock;
-extern struct plist_head swap_active_head;
extern struct swap_info_struct *swap_info[];
-extern int try_to_unuse(unsigned int, bool, unsigned long);
extern unsigned long generic_max_swapfile_size(void);
extern unsigned long max_swapfile_size(void);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1fa2b69c6fc3..6353d6db69b2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,12 +38,28 @@ struct ctl_table_header;
struct ctl_dir;
/* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
-#define SYSCTL_ONE ((void *)&sysctl_vals[1])
-#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
+#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0])
+#define SYSCTL_ZERO ((void *)&sysctl_vals[1])
+#define SYSCTL_ONE ((void *)&sysctl_vals[2])
+#define SYSCTL_TWO ((void *)&sysctl_vals[3])
+#define SYSCTL_FOUR ((void *)&sysctl_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5])
+#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6])
+#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7])
+#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8])
+#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9])
+
+/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+#define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10])
extern const int sysctl_vals[];
+#define SYSCTL_LONG_ZERO ((void *)&sysctl_long_vals[0])
+#define SYSCTL_LONG_ONE ((void *)&sysctl_long_vals[1])
+#define SYSCTL_LONG_MAX ((void *)&sysctl_long_vals[2])
+
+extern const unsigned long sysctl_long_vals[];
+
typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
@@ -178,6 +194,20 @@ struct ctl_path {
#ifdef CONFIG_SYSCTL
+#define DECLARE_SYSCTL_BASE(_name, _table) \
+static struct ctl_table _name##_base_table[] = { \
+ { \
+ .procname = #_name, \
+ .mode = 0555, \
+ .child = _table, \
+ }, \
+ { }, \
+}
+
+extern int __register_sysctl_base(struct ctl_table *base_table);
+
+#define register_sysctl_base(_name) __register_sysctl_base(_name##_base_table)
+
void proc_sys_poll_notify(struct ctl_table_poll *poll);
extern void setup_sysctl_set(struct ctl_table_set *p,
@@ -198,8 +228,19 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
void unregister_sysctl_table(struct ctl_table_header * table);
-extern int sysctl_init(void);
+extern int sysctl_init_bases(void);
+extern void __register_sysctl_init(const char *path, struct ctl_table *table,
+ const char *table_name);
+#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
+
void do_sysctl_args(void);
+int do_proc_douintvec(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data);
extern int pwrsw_enabled;
extern int unaligned_enabled;
@@ -207,16 +248,28 @@ extern int unaligned_dump_stack;
extern int no_unaligned_warning;
extern struct ctl_table sysctl_mount_point[];
-extern struct ctl_table random_table[];
-extern struct ctl_table firmware_config_table[];
-extern struct ctl_table epoll_table[];
#else /* CONFIG_SYSCTL */
+
+#define DECLARE_SYSCTL_BASE(_name, _table)
+
+static inline int __register_sysctl_base(struct ctl_table *base_table)
+{
+ return 0;
+}
+
+#define register_sysctl_base(table) __register_sysctl_base(table)
+
static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
{
return NULL;
}
+static inline struct ctl_table_header *register_sysctl_mount_point(const char *path)
+{
+ return NULL;
+}
+
static inline struct ctl_table_header *register_sysctl_paths(
const struct ctl_path *path, struct ctl_table *table)
{
diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h
index c0d817de4df2..f4c8eaf4d012 100644
--- a/include/linux/unaligned/packed_struct.h
+++ b/include/linux/unaligned/packed_struct.h
@@ -1,7 +1,7 @@
#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
#define _LINUX_UNALIGNED_PACKED_STRUCT_H
-#include <linux/kernel.h>
+#include <linux/types.h>
struct __una_u16 { u16 x; } __packed;
struct __una_u32 { u32 x; } __packed;
diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index 031f148ab373..b5deafd91f67 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -92,6 +92,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node)
static inline void usb_role_switch_put(struct usb_role_switch *sw) { }
static inline struct usb_role_switch *
+usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode)
+{
+ return NULL;
+}
+
+static inline struct usb_role_switch *
usb_role_switch_register(struct device *parent,
const struct usb_role_switch_desc *desc)
{
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 78ea3e332688..e7ce719838b5 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -6,6 +6,8 @@
#define RTR_SOLICITATION_INTERVAL (4*HZ)
#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */
+#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
+
#define TEMP_VALID_LIFETIME (7*86400)
#define TEMP_PREFERRED_LIFETIME (86400)
#define REGEN_MAX_RETRY (3)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index f6ae3a4baea4..83cfd2d70247 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -346,7 +346,7 @@ static inline bool bond_uses_primary(struct bonding *bond)
static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond)
{
- struct slave *slave = rcu_dereference(bond->curr_active_slave);
+ struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave);
return bond_uses_primary(bond) && slave ? slave->dev : NULL;
}
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 48cc5795ceda..63540be0fc34 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
static inline void fqdir_pre_exit(struct fqdir *fqdir)
{
- fqdir->high_thresh = 0; /* prevent creation of new frags */
- fqdir->dead = true;
+ /* Prevent creation of new frags.
+ * Pairs with READ_ONCE() in inet_frag_find().
+ */
+ WRITE_ONCE(fqdir->high_thresh, 0);
+
+ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+ * and ip6frag_expire_frag_queue().
+ */
+ WRITE_ONCE(fqdir->dead, true);
}
void fqdir_exit(struct fqdir *fqdir);
diff --git a/include/net/ip.h b/include/net/ip.h
index 81e23a102a0d..b51bae43b0dd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -525,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
{
struct iphdr *iph = ip_hdr(skb);
+ /* We had many attacks based on IPID, use the private
+ * generator as much as we can.
+ */
+ if (sk && inet_sk(sk)->inet_daddr) {
+ iph->id = htons(inet_sk(sk)->inet_id);
+ inet_sk(sk)->inet_id += segs;
+ return;
+ }
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
- /* This is only to work around buggy Windows95/2000
- * VJ compression implementations. If the ID field
- * does not change, they drop every other packet in
- * a TCP stream using header compression.
- */
- if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
- } else {
- iph->id = 0;
- }
+ iph->id = 0;
} else {
+ /* Unfortunately we need the big hammer to get a suitable IPID */
__ip_select_ident(net, iph, segs);
}
}
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a9a4ccc0cdb5..40ae8f1b18e5 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -282,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
fn = rcu_dereference(f6i->fib6_node);
if (fn) {
- *cookie = fn->fn_sernum;
+ *cookie = READ_ONCE(fn->fn_sernum);
/* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
smp_rmb();
status = true;
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 851029ecff13..0a4779175a52 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
struct sk_buff *head;
rcu_read_lock();
- if (fq->q.fqdir->dead)
+ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(fq->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&fq->q.lock);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ebef45e821af..676cb8ea9e15 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -218,8 +218,10 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
exts->nr_actions = 0;
+ /* Note: we do not own yet a reference on net.
+ * This reference might be taken later from tcf_exts_get_net().
+ */
exts->net = net;
- netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL);
exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
GFP_KERNEL);
if (!exts->actions)
diff --git a/include/net/route.h b/include/net/route.h
index 4c858dcf1aa8..25404fc2b483 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -370,7 +370,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
{
struct neighbour *neigh;
- neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c11dbac5abb2..472843eedbae 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1244,6 +1244,7 @@ struct psched_ratecfg {
u64 rate_bytes_ps; /* bytes per second */
u32 mult;
u16 overhead;
+ u16 mpu;
u8 linklayer;
u8 shift;
};
@@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
{
len += r->overhead;
+ if (len < r->mpu)
+ len = r->mpu;
+
if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
@@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
res->overhead = r->overhead;
+ res->mpu = r->mpu;
res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 44e442bf23f9..b9fc978fb2ca 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1369,6 +1369,7 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+#ifdef CONFIG_INET
void __sk_defer_free_flush(struct sock *sk);
static inline void sk_defer_free_flush(struct sock *sk)
@@ -1377,6 +1378,9 @@ static inline void sk_defer_free_flush(struct sock *sk)
return;
__sk_defer_free_flush(sk);
}
+#else
+static inline void sk_defer_free_flush(struct sock *sk) {}
+#endif
int tcp_filter(struct sock *sk, struct sk_buff *skb);
void tcp_set_state(struct sock *sk, int state);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index ab7557d84f75..647c53b26105 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -415,9 +415,8 @@ extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
int retries, struct scsi_mode_data *data,
struct scsi_sense_hdr *);
extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp,
- int modepage, unsigned char *buffer, int len,
- int timeout, int retries,
- struct scsi_mode_data *data,
+ unsigned char *buffer, int len, int timeout,
+ int retries, struct scsi_mode_data *data,
struct scsi_sense_hdr *);
extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout,
int retries, struct scsi_sense_hdr *sshdr);
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 843cefb8efce..068e35d36557 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -29,10 +29,6 @@
* For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html
*/
-#ifdef __KERNEL__
-extern int sg_big_buff; /* for sysctl */
-#endif
-
typedef struct sg_iovec /* same structure as used by readv() Linux system */
{ /* call. It defines one scatter-gather element. */
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 1172529b5b49..c6f5aa74db89 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref,
TRACE_EVENT(cachefiles_lookup,
TP_PROTO(struct cachefiles_object *obj,
+ struct dentry *dir,
struct dentry *de),
- TP_ARGS(obj, de),
+ TP_ARGS(obj, dir, de),
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(short, error )
+ __field(unsigned long, dino )
__field(unsigned long, ino )
),
TP_fast_assign(
- __entry->obj = obj->debug_id;
+ __entry->obj = obj ? obj->debug_id : 0;
+ __entry->dino = d_backing_inode(dir)->i_ino;
__entry->ino = (!IS_ERR(de) && d_backing_inode(de) ?
d_backing_inode(de)->i_ino : 0);
__entry->error = IS_ERR(de) ? PTR_ERR(de) : 0;
),
- TP_printk("o=%08x i=%lx e=%d",
- __entry->obj, __entry->ino, __entry->error)
+ TP_printk("o=%08x dB=%lx B=%lx e=%d",
+ __entry->obj, __entry->dino, __entry->ino, __entry->error)
+ );
+
+TRACE_EVENT(cachefiles_mkdir,
+ TP_PROTO(struct dentry *dir, struct dentry *subdir),
+
+ TP_ARGS(dir, subdir),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, dir )
+ __field(unsigned int, subdir )
+ ),
+
+ TP_fast_assign(
+ __entry->dir = d_backing_inode(dir)->i_ino;
+ __entry->subdir = d_backing_inode(subdir)->i_ino;
+ ),
+
+ TP_printk("dB=%x sB=%x",
+ __entry->dir,
+ __entry->subdir)
);
TRACE_EVENT(cachefiles_tmpfile,
@@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile,
__entry->backer = backer->i_ino;
),
- TP_printk("o=%08x b=%08x",
+ TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
@@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link,
__entry->backer = backer->i_ino;
),
- TP_printk("o=%08x b=%08x",
+ TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
TRACE_EVENT(cachefiles_unlink,
TP_PROTO(struct cachefiles_object *obj,
- struct dentry *de,
+ ino_t ino,
enum fscache_why_object_killed why),
- TP_ARGS(obj, de, why),
+ TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
- __field(struct dentry *, de )
+ __field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
- __entry->de = de;
+ __entry->ino = ino;
__entry->why = why;
),
- TP_printk("o=%08x d=%p w=%s",
- __entry->obj, __entry->de,
+ TP_printk("o=%08x B=%x w=%s",
+ __entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
TRACE_EVENT(cachefiles_rename,
TP_PROTO(struct cachefiles_object *obj,
- struct dentry *de,
- struct dentry *to,
+ ino_t ino,
enum fscache_why_object_killed why),
- TP_ARGS(obj, de, to, why),
+ TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
- __field(struct dentry *, de )
- __field(struct dentry *, to )
+ __field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
- __entry->de = de;
- __entry->to = to;
+ __entry->ino = ino;
__entry->why = why;
),
- TP_printk("o=%08x d=%p t=%p w=%s",
- __entry->obj, __entry->de, __entry->to,
+ TP_printk("o=%08x B=%x w=%s",
+ __entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
@@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency,
__entry->ino = ino;
),
- TP_printk("o=%08x %s i=%llx c=%u",
+ TP_printk("o=%08x %s B=%llx c=%u",
__entry->obj,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino,
@@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency,
__entry->ino = ino;
),
- TP_printk("V=%08x %s i=%llx",
+ TP_printk("V=%08x %s B=%llx",
__entry->vol,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino)
@@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read,
__entry->cache_inode = cache_inode;
),
- TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x",
+ TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->why, cachefiles_prepare_read_traces),
@@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read,
__entry->len = len;
),
- TP_printk("o=%08x b=%08x s=%llx l=%zx",
+ TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write,
__entry->len = len;
),
- TP_printk("o=%08x b=%08x s=%llx l=%zx",
+ TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc,
__entry->why = why;
),
- TP_printk("o=%08x b=%08x %s l=%llx->%llx",
+ TP_printk("o=%08x B=%x %s l=%llx->%llx",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->why, cachefiles_trunc_traces),
@@ -549,7 +569,28 @@ TRACE_EVENT(cachefiles_mark_active,
__entry->inode = inode->i_ino;
),
- TP_printk("o=%08x i=%lx",
+ TP_printk("o=%08x B=%lx",
+ __entry->obj, __entry->inode)
+ );
+
+TRACE_EVENT(cachefiles_mark_failed,
+ TP_PROTO(struct cachefiles_object *obj,
+ struct inode *inode),
+
+ TP_ARGS(obj, inode),
+
+ /* Note that obj may be NULL */
+ TP_STRUCT__entry(
+ __field(unsigned int, obj )
+ __field(ino_t, inode )
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj ? obj->debug_id : 0;
+ __entry->inode = inode->i_ino;
+ ),
+
+ TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@@ -570,7 +611,7 @@ TRACE_EVENT(cachefiles_mark_inactive,
__entry->inode = inode->i_ino;
),
- TP_printk("o=%08x i=%lx",
+ TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@@ -594,7 +635,7 @@ TRACE_EVENT(cachefiles_vfs_error,
__entry->where = where;
),
- TP_printk("o=%08x b=%08x %s e=%d",
+ TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),
@@ -621,7 +662,7 @@ TRACE_EVENT(cachefiles_io_error,
__entry->where = where;
),
- TP_printk("o=%08x b=%08x %s e=%d",
+ TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),
diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h
index 96f64bf218b2..a1922a800e6f 100644
--- a/include/trace/events/error_report.h
+++ b/include/trace/events/error_report.h
@@ -17,14 +17,16 @@
enum error_detector {
ERROR_DETECTOR_KFENCE,
- ERROR_DETECTOR_KASAN
+ ERROR_DETECTOR_KASAN,
+ ERROR_DETECTOR_WARN,
};
#endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
-#define error_detector_list \
+#define error_detector_list \
EM(ERROR_DETECTOR_KFENCE, "kfence") \
- EMe(ERROR_DETECTOR_KASAN, "kasan")
+ EM(ERROR_DETECTOR_KASAN, "kasan") \
+ EMe(ERROR_DETECTOR_WARN, "warning")
/* Always end the list with an EMe. */
#undef EM
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 3e042ca2cedb..a8a64b97504d 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -14,7 +14,7 @@
EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \
EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \
EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
- EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \
+ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \
EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \
EMe(SKB_DROP_REASON_MAX, MAX)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 1e566ac4b812..29982d60b68a 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -794,6 +794,9 @@ RPC_SHOW_SOCKET
RPC_SHOW_SOCK
+
+#include <trace/events/net_probe_common.h>
+
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings
* that will be printed in the output.
@@ -816,27 +819,32 @@ DECLARE_EVENT_CLASS(xs_socket_event,
__field(unsigned int, socket_state)
__field(unsigned int, sock_state)
__field(unsigned long long, ino)
- __string(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR])
- __string(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT])
+ __array(__u8, saddr, sizeof(struct sockaddr_in6))
+ __array(__u8, daddr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
struct inode *inode = SOCK_INODE(socket);
+ const struct sock *sk = socket->sk;
+ const struct inet_sock *inet = inet_sk(sk);
+
+ memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+ memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+ TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->ino = (unsigned long long)inode->i_ino;
- __assign_str(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR]);
- __assign_str(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT]);
+
),
TP_printk(
- "socket:[%llu] dstaddr=%s/%s "
+ "socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc "
"state=%u (%s) sk_state=%u (%s)",
- __entry->ino, __get_str(dstaddr), __get_str(dstport),
+ __entry->ino,
+ __entry->saddr,
+ __entry->daddr,
__entry->socket_state,
rpc_show_socket_state(__entry->socket_state),
__entry->sock_state,
@@ -866,29 +874,33 @@ DECLARE_EVENT_CLASS(xs_socket_event_done,
__field(unsigned int, socket_state)
__field(unsigned int, sock_state)
__field(unsigned long long, ino)
- __string(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR])
- __string(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT])
+ __array(__u8, saddr, sizeof(struct sockaddr_in6))
+ __array(__u8, daddr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
struct inode *inode = SOCK_INODE(socket);
+ const struct sock *sk = socket->sk;
+ const struct inet_sock *inet = inet_sk(sk);
+
+ memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+ memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+ TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->ino = (unsigned long long)inode->i_ino;
__entry->error = error;
- __assign_str(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR]);
- __assign_str(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT]);
),
TP_printk(
- "error=%d socket:[%llu] dstaddr=%s/%s "
+ "error=%d socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc "
"state=%u (%s) sk_state=%u (%s)",
__entry->error,
- __entry->ino, __get_str(dstaddr), __get_str(dstport),
+ __entry->ino,
+ __entry->saddr,
+ __entry->daddr,
__entry->socket_state,
rpc_show_socket_state(__entry->socket_state),
__entry->sock_state,
@@ -953,7 +965,8 @@ TRACE_EVENT(rpc_socket_nospace,
{ BIT(XPRT_REMOVE), "REMOVE" }, \
{ BIT(XPRT_CONGESTED), "CONGESTED" }, \
{ BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \
- { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" })
+ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \
+ { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" })
DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
TP_PROTO(
@@ -1150,8 +1163,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
__entry->task_id = -1;
__entry->client_id = -1;
}
- __entry->snd_task_id = xprt->snd_task ?
- xprt->snd_task->tk_pid : -1;
+ if (xprt->snd_task &&
+ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ __entry->snd_task_id = xprt->snd_task->tk_pid;
+ else
+ __entry->snd_task_id = -1;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
@@ -1196,8 +1212,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event,
__entry->task_id = -1;
__entry->client_id = -1;
}
- __entry->snd_task_id = xprt->snd_task ?
- xprt->snd_task->tk_pid : -1;
+ if (xprt->snd_task &&
+ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ __entry->snd_task_id = xprt->snd_task->tk_pid;
+ else
+ __entry->snd_task_id = -1;
+
__entry->cong = xprt->cong;
__entry->cwnd = xprt->cwnd;
__entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
diff --git a/include/trace/perf.h b/include/trace/perf.h
index ea4405de175a..5d48c46a3008 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -23,8 +23,9 @@
#undef __get_rel_dynamic_array
#define __get_rel_dynamic_array(field) \
- ((void *)(&__entry->__rel_loc_##field) + \
- sizeof(__entry->__rel_loc_##field) + \
+ ((void *)__entry + \
+ offsetof(typeof(*__entry), __rel_loc_##field) + \
+ sizeof(__entry->__rel_loc_##field) + \
(__entry->__rel_loc_##field & 0xffff))
#undef __get_rel_dynamic_array_len
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 8c6f7c433518..3d29919045af 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -128,7 +128,7 @@ TRACE_MAKE_SYSTEM_STR();
struct trace_event_raw_##name { \
struct trace_entry ent; \
tstruct \
- char __data[0]; \
+ char __data[]; \
}; \
\
static struct trace_event_class event_class_##name;
@@ -318,9 +318,10 @@ TRACE_MAKE_SYSTEM_STR();
#define __get_str(field) ((char *)__get_dynamic_array(field))
#undef __get_rel_dynamic_array
-#define __get_rel_dynamic_array(field) \
- ((void *)(&__entry->__rel_loc_##field) + \
- sizeof(__entry->__rel_loc_##field) + \
+#define __get_rel_dynamic_array(field) \
+ ((void *)__entry + \
+ offsetof(typeof(*__entry), __rel_loc_##field) + \
+ sizeof(__entry->__rel_loc_##field) + \
(__entry->__rel_loc_##field & 0xffff))
#undef __get_rel_dynamic_array_len
diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h
new file mode 100644
index 000000000000..6225c5aebe06
--- /dev/null
+++ b/include/uapi/linux/cyclades.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_CYCLADES_H
+#define _UAPI_LINUX_CYCLADES_H
+
+#warning "Support for features provided by this header has been removed"
+#warning "Please consider updating your code"
+
+struct cyclades_monitor {
+ unsigned long int_count;
+ unsigned long char_count;
+ unsigned long char_max;
+ unsigned long char_last;
+};
+
+#define CYGETMON 0x435901
+#define CYGETTHRESH 0x435902
+#define CYSETTHRESH 0x435903
+#define CYGETDEFTHRESH 0x435904
+#define CYSETDEFTHRESH 0x435905
+#define CYGETTIMEOUT 0x435906
+#define CYSETTIMEOUT 0x435907
+#define CYGETDEFTIMEOUT 0x435908
+#define CYSETDEFTIMEOUT 0x435909
+#define CYSETRFLOW 0x43590a
+#define CYGETRFLOW 0x43590b
+#define CYSETRTSDTR_INV 0x43590c
+#define CYGETRTSDTR_INV 0x43590d
+#define CYZSETPOLLCYCLE 0x43590e
+#define CYZGETPOLLCYCLE 0x43590f
+#define CYGETCD1400VER 0x435910
+#define CYSETWAIT 0x435912
+#define CYGETWAIT 0x435913
+
+#endif /* _UAPI_LINUX_CYCLADES_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9563d294f181..b46bcdb0cab1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#define KVM_CAP_VM_GPA_BITS 207
#define KVM_CAP_XSAVE2 208
+#define KVM_CAP_SYS_ATTRIBUTES 209
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index dae5df88ab10..0425cd79af9a 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -6,6 +6,7 @@
#define AFFS_SUPER_MAGIC 0xadff
#define AFS_SUPER_MAGIC 0x5346414F
#define AUTOFS_SUPER_MAGIC 0x0187
+#define CEPH_SUPER_MAGIC 0x00c36400
#define CODA_SUPER_MAGIC 0x73757245
#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */
#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index ccbd08709321..12327d32378f 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
*/
-#define TASKSTATS_VERSION 10
+#define TASKSTATS_VERSION 11
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -172,6 +172,10 @@ struct taskstats {
/* v10: 64-bit btime to avoid overflow */
__u64 ac_btime64; /* 64-bit begin time */
+
+ /* Delay waiting for memory compact */
+ __u64 compact_count;
+ __u64 compact_delay_total;
};
diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index e5a7eecef7c3..c0f4bd9b040e 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -1,18 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* DO NOT USE in new code! This is solely for MEI due to legacy reasons */
/*
* UUID/GUID definition
*
* Copyright (C) 2010, Intel Corp.
* Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation;
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#ifndef _UAPI_LINUX_UUID_H_
diff --git a/init/main.c b/init/main.c
index bb984ed79de0..65fa2e41a9c0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -834,12 +834,15 @@ static void __init mm_init(void)
init_mem_debugging_and_hardening();
kfence_alloc_pool();
report_meminit();
- stack_depot_init();
+ stack_depot_early_init();
mem_init();
mem_init_print_info();
- /* page_owner must be initialized after buddy is ready */
- page_ext_init_flatmem_late();
kmem_cache_init();
+ /*
+ * page_owner must be initialized after buddy is ready, and also after
+ * slab is ready so that stack_depot_init() works properly
+ */
+ page_ext_init_flatmem_late();
kmemleak_init();
pgtable_init();
debug_objects_mem_init();
diff --git a/ipc/util.c b/ipc/util.c
index fa2d86ef3fb8..a2208d0f26b2 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -894,7 +894,7 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file)
if (!iter)
return -ENOMEM;
- iter->iface = PDE_DATA(inode);
+ iter->iface = pde_data(inode);
iter->ns = get_ipc_ns(current->nsproxy->ipc_ns);
iter->pid_ns = get_pid_ns(task_active_pid_ns(current));
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 33bb8ae4a804..e16dafeb2450 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5686,7 +5686,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
i, btf_type_str(t));
return -EINVAL;
}
- if (check_ctx_reg(env, reg, regno))
+ if (check_ptr_off_reg(env, reg, regno))
return -EINVAL;
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
const struct btf_type *reg_ref_t;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 80da1db47c68..5a8d9f7467bf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
int opt;
opt = fs_parse(fc, bpf_fs_parameters, param, &result);
- if (opt < 0)
+ if (opt < 0) {
/* We might like to report bad mount options here, but
* traditionally we've ignored all mount options, so we'd
* better continue to ignore non-existing options for bpf.
*/
- return opt == -ENOPARAM ? 0 : opt;
+ if (opt == -ENOPARAM) {
+ opt = vfs_parse_fs_param_source(fc, param);
+ if (opt != -ENOPARAM)
+ return opt;
+
+ return 0;
+ }
+
+ if (opt < 0)
+ return opt;
+ }
switch (opt) {
case OPT_MODE:
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 49e567209c6b..22c8ae94e4c1 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
u32, size, u64, flags)
{
struct pt_regs *regs;
- long res;
+ long res = -EINVAL;
if (!try_get_task_stack(task))
return -EFAULT;
regs = task_pt_regs(task);
- res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
+ if (regs)
+ res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
put_task_stack(task);
return res;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bfb45381fb3f..a39eedecc93a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -570,6 +570,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
if (type & MEM_RDONLY)
strncpy(prefix, "rdonly_", 16);
+ if (type & MEM_ALLOC)
+ strncpy(prefix, "alloc_", 16);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -616,7 +618,7 @@ static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
{
- env->scratched_stack_slots |= 1UL << spi;
+ env->scratched_stack_slots |= 1ULL << spi;
}
static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
@@ -637,14 +639,14 @@ static bool verifier_state_scratched(const struct bpf_verifier_env *env)
static void mark_verifier_state_clean(struct bpf_verifier_env *env)
{
env->scratched_regs = 0U;
- env->scratched_stack_slots = 0UL;
+ env->scratched_stack_slots = 0ULL;
}
/* Used for printing the entire verifier state. */
static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
{
env->scratched_regs = ~0U;
- env->scratched_stack_slots = ~0UL;
+ env->scratched_stack_slots = ~0ULL;
}
/* The reg state of a pointer or a bounded scalar was saved when
@@ -3969,16 +3971,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
}
#endif
-int check_ctx_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno)
+static int __check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ bool fixed_off_ok)
{
- /* Access to ctx or passing it to a helper is only allowed in
- * its original, unmodified form.
+ /* Access to this pointer-typed register or passing it to a helper
+ * is only allowed in its original, unmodified form.
*/
- if (reg->off) {
- verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
- regno, reg->off);
+ if (!fixed_off_ok && reg->off) {
+ verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
return -EACCES;
}
@@ -3986,13 +3989,20 @@ int check_ctx_reg(struct bpf_verifier_env *env,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
+ verbose(env, "variable %s access var_off=%s disallowed\n",
+ reg_type_str(env, reg->type), tn_buf);
return -EACCES;
}
return 0;
}
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno)
+{
+ return __check_ptr_off_reg(env, reg, regno, false);
+}
+
static int __check_buffer_access(struct bpf_verifier_env *env,
const char *buf_info,
const struct bpf_reg_state *reg,
@@ -4437,7 +4447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- err = check_ctx_reg(env, reg, regno);
+ err = check_ptr_off_reg(env, reg, regno);
if (err < 0)
return err;
@@ -5127,6 +5137,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
+ PTR_TO_MEM | MEM_ALLOC,
PTR_TO_BUF,
},
};
@@ -5144,7 +5155,7 @@ static const struct bpf_reg_types int_ptr_types = {
static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5244,12 +5255,6 @@ found:
kernel_type_name(btf_vmlinux, *arg_btf_id));
return -EACCES;
}
-
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
- verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
- regno);
- return -EACCES;
- }
}
return 0;
@@ -5304,10 +5309,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
if (err)
return err;
- if (type == PTR_TO_CTX) {
- err = check_ctx_reg(env, reg, regno);
+ switch ((u32)type) {
+ case SCALAR_VALUE:
+ /* Pointer types where reg offset is explicitly allowed: */
+ case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_MEM | MEM_RDONLY:
+ case PTR_TO_MEM | MEM_ALLOC:
+ case PTR_TO_BUF:
+ case PTR_TO_BUF | MEM_RDONLY:
+ case PTR_TO_STACK:
+ /* Some of the argument types nevertheless require a
+ * zero register offset.
+ */
+ if (arg_type == ARG_PTR_TO_ALLOC_MEM)
+ goto force_off_check;
+ break;
+ /* All the rest must be rejected: */
+ default:
+force_off_check:
+ err = __check_ptr_off_reg(env, reg, regno,
+ type == PTR_TO_BTF_ID);
if (err < 0)
return err;
+ break;
}
skip_type_check:
@@ -9507,9 +9535,13 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
- if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
- mark_reg_known_zero(env, regs, insn->dst_reg);
+ /* All special src_reg cases are listed below. From this point onwards
+ * we either succeed and assign a corresponding dst_reg->type after
+ * zeroing the offset, or fail and reject the program.
+ */
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
dst_reg->type = aux->btf_var.reg_type;
switch (base_type(dst_reg->type)) {
case PTR_TO_MEM:
@@ -9547,7 +9579,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
map = env->used_maps[aux->map_index];
- mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
@@ -9651,7 +9682,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
}
- err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
+ err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
if (err < 0)
return err;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index b31e1465868a..9d05c3ca2d5e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
cgroup_get(cgrp);
cgroup_kn_unlock(of->kn);
+ /* Allow only one trigger per file descriptor */
+ if (ctx->psi.trigger) {
+ cgroup_put(cgrp);
+ return -EBUSY;
+ }
+
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
new = psi_trigger_create(psi, buf, nbytes, res);
if (IS_ERR(new)) {
@@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
return PTR_ERR(new);
}
- psi_trigger_replace(&ctx->psi.trigger, new);
-
+ smp_store_release(&ctx->psi.trigger, new);
cgroup_put(cgrp);
return nbytes;
@@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
- psi_trigger_replace(&ctx->psi.trigger, NULL);
+ psi_trigger_destroy(ctx->psi.trigger);
}
bool cgroup_psi_enabled(void)
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
new file mode 100644
index 000000000000..e9ffb0cc1eec
--- /dev/null
+++ b/kernel/configs/debug.config
@@ -0,0 +1,105 @@
+# The config is based on running daily CI for enterprise Linux distros to
+# seek regressions on linux-next builds on different bare-metal and virtual
+# platforms. It can be used for example,
+#
+# $ make ARCH=arm64 defconfig debug.config
+#
+# Keep alphabetically sorted inside each section.
+#
+# printk and dmesg options
+#
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_PRINTK_CALLER=y
+CONFIG_PRINTK_TIME=y
+CONFIG_SYMBOLIC_ERRNAME=y
+#
+# Compile-time checks and compiler options
+#
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_FRAME_WARN=2048
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+#
+# Generic Kernel Debugging Instruments
+#
+# CONFIG_UBSAN_ALIGNMENT is not set
+# CONFIG_UBSAN_DIV_ZERO is not set
+# CONFIG_UBSAN_TRAP is not set
+# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_FS_ALLOW_ALL=y
+CONFIG_DEBUG_IRQFLAGS=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_BOOL=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_UBSAN_ENUM=y
+CONFIG_UBSAN_SHIFT=y
+CONFIG_UBSAN_UNREACHABLE=y
+#
+# Memory Debugging
+#
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
+# CONFIG_DEBUG_RODATA_TEST is not set
+# CONFIG_DEBUG_WX is not set
+# CONFIG_KFENCE is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_SLUB_STATS is not set
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_OWNER=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VIRTUAL=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_GENERIC_PTDUMP=y
+CONFIG_KASAN=y
+CONFIG_KASAN_GENERIC=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SLUB_DEBUG_ON=y
+#
+# Debug Oops, Lockups and Hangs
+#
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PANIC_TIMEOUT=0
+CONFIG_SOFTLOCKUP_DETECTOR=y
+#
+# Lock Debugging (spinlocks, mutexes, etc...)
+#
+# CONFIG_PROVE_RAW_LOCK_NESTING is not set
+CONFIG_PROVE_LOCKING=y
+#
+# Debug kernel data structures
+#
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+#
+# RCU Debugging
+#
+CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
+#
+# Tracers
+#
+CONFIG_BRANCH_PROFILE_NONE=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 51530d5b15a8..c5e8cea9e05f 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -100,19 +100,10 @@ void __delayacct_blkio_start(void)
*/
void __delayacct_blkio_end(struct task_struct *p)
{
- struct task_delay_info *delays = p->delays;
- u64 *total;
- u32 *count;
-
- if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
- total = &delays->swapin_delay;
- count = &delays->swapin_count;
- } else {
- total = &delays->blkio_delay;
- count = &delays->blkio_count;
- }
-
- delayacct_end(&delays->lock, &delays->blkio_start, total, count);
+ delayacct_end(&p->delays->lock,
+ &p->delays->blkio_start,
+ &p->delays->blkio_delay,
+ &p->delays->blkio_count);
}
int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -164,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
+ tmp = d->compact_delay_total + tsk->delays->compact_delay;
+ d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count;
d->thrashing_count += tsk->delays->thrashing_count;
+ d->compact_count += tsk->delays->compact_count;
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0;
@@ -179,8 +173,7 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
unsigned long flags;
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
- ret = nsec_to_clock_t(tsk->delays->blkio_delay +
- tsk->delays->swapin_delay);
+ ret = nsec_to_clock_t(tsk->delays->blkio_delay);
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return ret;
}
@@ -210,3 +203,29 @@ void __delayacct_thrashing_end(void)
&current->delays->thrashing_delay,
&current->delays->thrashing_count);
}
+
+void __delayacct_swapin_start(void)
+{
+ current->delays->swapin_start = local_clock();
+}
+
+void __delayacct_swapin_end(void)
+{
+ delayacct_end(&current->delays->lock,
+ &current->delays->swapin_start,
+ &current->delays->swapin_delay,
+ &current->delays->swapin_count);
+}
+
+void __delayacct_compact_start(void)
+{
+ current->delays->compact_start = local_clock();
+}
+
+void __delayacct_compact_end(void)
+{
+ delayacct_end(&current->delays->lock,
+ &current->delays->compact_start,
+ &current->delays->compact_delay,
+ &current->delays->compact_count);
+}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc18664f49b0..76c754e45d01 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
WRITE_ONCE(event->state, state);
}
+/*
+ * UP store-release, load-acquire
+ */
+
+#define __store_release(ptr, val) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(ptr), (val)); \
+} while (0)
+
+#define __load_acquire(ptr) \
+({ \
+ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
+ barrier(); \
+ ___p; \
+})
+
#ifdef CONFIG_CGROUP_PERF
static inline bool
@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
return t->time;
}
-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
- struct perf_cgroup_info *info;
- u64 now;
-
- now = perf_clock();
+ struct perf_cgroup_info *t;
- info = this_cpu_ptr(cgrp->info);
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ if (!__load_acquire(&t->active))
+ return t->time;
+ now += READ_ONCE(t->timeoffset);
+ return now;
+}
- info->time += now - info->timestamp;
+static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
+{
+ if (adv)
+ info->time += now - info->timestamp;
info->timestamp = now;
+ /*
+ * see update_context_time()
+ */
+ WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
}
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
{
struct perf_cgroup *cgrp = cpuctx->cgrp;
struct cgroup_subsys_state *css;
+ struct perf_cgroup_info *info;
if (cgrp) {
+ u64 now = perf_clock();
+
for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css);
- __update_cgrp_time(cgrp);
+ info = this_cpu_ptr(cgrp->info);
+
+ __update_cgrp_time(info, now, true);
+ if (final)
+ __store_release(&info->active, 0);
}
}
}
static inline void update_cgrp_time_from_event(struct perf_event *event)
{
+ struct perf_cgroup_info *info;
struct perf_cgroup *cgrp;
/*
@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
/*
* Do not update time when cgroup is not active
*/
- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
- __update_cgrp_time(event->cgrp);
+ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
+ info = this_cpu_ptr(event->cgrp->info);
+ __update_cgrp_time(info, perf_clock(), true);
+ }
}
static inline void
@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css);
info = this_cpu_ptr(cgrp->info);
- info->timestamp = ctx->timestamp;
+ __update_cgrp_time(info, ctx->timestamp, false);
+ __store_release(&info->active, 1);
}
}
@@ -982,14 +1019,6 @@ out:
}
static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
- struct perf_cgroup_info *t;
- t = per_cpu_ptr(event->cgrp->info, event->cpu);
- event->shadow_ctx_time = now - t->timestamp;
-}
-
-static inline void
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
struct perf_cpu_context *cpuctx;
@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
{
}
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
+ bool final)
{
}
@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
{
}
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+static inline u64 perf_cgroup_event_time(struct perf_event *event)
{
+ return 0;
}
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
return 0;
}
@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
/*
* Update the record of the current time in a context.
*/
-static void update_context_time(struct perf_event_context *ctx)
+static void __update_context_time(struct perf_event_context *ctx, bool adv)
{
u64 now = perf_clock();
- ctx->time += now - ctx->timestamp;
+ if (adv)
+ ctx->time += now - ctx->timestamp;
ctx->timestamp = now;
+
+ /*
+ * The above: time' = time + (now - timestamp), can be re-arranged
+ * into: time` = now + (time - timestamp), which gives a single value
+ * offset to compute future time without locks on.
+ *
+ * See perf_event_time_now(), which can be used from NMI context where
+ * it's (obviously) not possible to acquire ctx->lock in order to read
+ * both the above values in a consistent manner.
+ */
+ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
+}
+
+static void update_context_time(struct perf_event_context *ctx)
+{
+ __update_context_time(ctx, true);
}
static u64 perf_event_time(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
+ if (unlikely(!ctx))
+ return 0;
+
if (is_cgroup_event(event))
return perf_cgroup_event_time(event);
- return ctx ? ctx->time : 0;
+ return ctx->time;
+}
+
+static u64 perf_event_time_now(struct perf_event *event, u64 now)
+{
+ struct perf_event_context *ctx = event->ctx;
+
+ if (unlikely(!ctx))
+ return 0;
+
+ if (is_cgroup_event(event))
+ return perf_cgroup_event_time_now(event, now);
+
+ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
+ return ctx->time;
+
+ now += READ_ONCE(ctx->timeoffset);
+ return now;
}
static enum event_type_t get_event_type(struct perf_event *event)
@@ -2350,7 +2417,7 @@ __perf_remove_from_context(struct perf_event *event,
if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
+ update_cgrp_time_from_cpuctx(cpuctx, false);
}
event_sched_out(event, cpuctx, ctx);
@@ -2361,6 +2428,9 @@ __perf_remove_from_context(struct perf_event *event,
list_del_event(event, ctx);
if (!ctx->nr_events && ctx->is_active) {
+ if (ctx == &cpuctx->ctx)
+ update_cgrp_time_from_cpuctx(cpuctx, true);
+
ctx->is_active = 0;
ctx->rotate_necessary = 0;
if (ctx->task) {
@@ -2392,7 +2462,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
* event_function_call() user.
*/
raw_spin_lock_irq(&ctx->lock);
- if (!ctx->is_active) {
+ /*
+ * Cgroup events are per-cpu events, and must IPI because of
+ * cgrp_cpuctx_list.
+ */
+ if (!ctx->is_active && !is_cgroup_event(event)) {
__perf_remove_from_context(event, __get_cpu_context(ctx),
ctx, (void *)flags);
raw_spin_unlock_irq(&ctx->lock);
@@ -2482,40 +2556,6 @@ void perf_event_disable_inatomic(struct perf_event *event)
irq_work_queue(&event->pending);
}
-static void perf_set_shadow_time(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- /*
- * use the correct time source for the time snapshot
- *
- * We could get by without this by leveraging the
- * fact that to get to this function, the caller
- * has most likely already called update_context_time()
- * and update_cgrp_time_xx() and thus both timestamp
- * are identical (or very close). Given that tstamp is,
- * already adjusted for cgroup, we could say that:
- * tstamp - ctx->timestamp
- * is equivalent to
- * tstamp - cgrp->timestamp.
- *
- * Then, in perf_output_read(), the calculation would
- * work with no changes because:
- * - event is guaranteed scheduled in
- * - no scheduled out in between
- * - thus the timestamp would be the same
- *
- * But this is a bit hairy.
- *
- * So instead, we have an explicit cgroup call to remain
- * within the time source all along. We believe it
- * is cleaner and simpler to understand.
- */
- if (is_cgroup_event(event))
- perf_cgroup_set_shadow_time(event, event->tstamp);
- else
- event->shadow_ctx_time = event->tstamp - ctx->timestamp;
-}
-
#define MAX_INTERRUPTS (~0ULL)
static void perf_log_throttle(struct perf_event *event, int enable);
@@ -2556,8 +2596,6 @@ event_sched_in(struct perf_event *event,
perf_pmu_disable(event->pmu);
- perf_set_shadow_time(event, ctx);
-
perf_log_itrace_start(event);
if (event->pmu->add(event, PERF_EF_START)) {
@@ -2861,11 +2899,14 @@ perf_install_in_context(struct perf_event_context *ctx,
* perf_event_attr::disabled events will not run and can be initialized
* without IPI. Except when this is the first event for the context, in
* that case we need the magic of the IPI to set ctx->is_active.
+ * Similarly, cgroup events for the context also needs the IPI to
+ * manipulate the cgrp_cpuctx_list.
*
* The IOC_ENABLE that is sure to follow the creation of a disabled
* event will issue the IPI and reprogram the hardware.
*/
- if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
+ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF &&
+ ctx->nr_events && !is_cgroup_event(event)) {
raw_spin_lock_irq(&ctx->lock);
if (ctx->task == TASK_TOMBSTONE) {
raw_spin_unlock_irq(&ctx->lock);
@@ -3251,16 +3292,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
return;
}
- ctx->is_active &= ~event_type;
- if (!(ctx->is_active & EVENT_ALL))
- ctx->is_active = 0;
-
- if (ctx->task) {
- WARN_ON_ONCE(cpuctx->task_ctx != ctx);
- if (!ctx->is_active)
- cpuctx->task_ctx = NULL;
- }
-
/*
* Always update time if it was set; not only when it changes.
* Otherwise we can 'forget' to update time for any but the last
@@ -3274,7 +3305,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (is_active & EVENT_TIME) {
/* update (and stop) ctx time */
update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
+ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
+ /*
+ * CPU-release for the below ->is_active store,
+ * see __load_acquire() in perf_event_time_now()
+ */
+ barrier();
+ }
+
+ ctx->is_active &= ~event_type;
+ if (!(ctx->is_active & EVENT_ALL))
+ ctx->is_active = 0;
+
+ if (ctx->task) {
+ WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+ if (!ctx->is_active)
+ cpuctx->task_ctx = NULL;
}
is_active ^= ctx->is_active; /* changed bits */
@@ -3711,13 +3757,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
return 0;
}
+/*
+ * Because the userpage is strictly per-event (there is no concept of context,
+ * so there cannot be a context indirection), every userpage must be updated
+ * when context time starts :-(
+ *
+ * IOW, we must not miss EVENT_TIME edges.
+ */
static inline bool event_update_userpage(struct perf_event *event)
{
if (likely(!atomic_read(&event->mmap_count)))
return false;
perf_event_update_time(event);
- perf_set_shadow_time(event, event->ctx);
perf_event_update_userpage(event);
return true;
@@ -3801,13 +3853,23 @@ ctx_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
int is_active = ctx->is_active;
- u64 now;
lockdep_assert_held(&ctx->lock);
if (likely(!ctx->nr_events))
return;
+ if (is_active ^ EVENT_TIME) {
+ /* start ctx time */
+ __update_context_time(ctx, false);
+ perf_cgroup_set_timestamp(task, ctx);
+ /*
+ * CPU-release for the below ->is_active store,
+ * see __load_acquire() in perf_event_time_now()
+ */
+ barrier();
+ }
+
ctx->is_active |= (event_type | EVENT_TIME);
if (ctx->task) {
if (!is_active)
@@ -3818,13 +3880,6 @@ ctx_sched_in(struct perf_event_context *ctx,
is_active ^= ctx->is_active; /* changed bits */
- if (is_active & EVENT_TIME) {
- /* start ctx time */
- now = perf_clock();
- ctx->timestamp = now;
- perf_cgroup_set_timestamp(task, ctx);
- }
-
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
@@ -4418,6 +4473,18 @@ static inline u64 perf_event_count(struct perf_event *event)
return local64_read(&event->count) + atomic64_read(&event->child_count);
}
+static void calc_timer_values(struct perf_event *event,
+ u64 *now,
+ u64 *enabled,
+ u64 *running)
+{
+ u64 ctx_time;
+
+ *now = perf_clock();
+ ctx_time = perf_event_time_now(event, *now);
+ __perf_update_times(event, ctx_time, enabled, running);
+}
+
/*
* NMI-safe method to read a local event, that is an event that
* is:
@@ -4477,10 +4544,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
*value = local64_read(&event->count);
if (enabled || running) {
- u64 now = event->shadow_ctx_time + perf_clock();
- u64 __enabled, __running;
+ u64 __enabled, __running, __now;;
- __perf_update_times(event, now, &__enabled, &__running);
+ calc_timer_values(event, &__now, &__enabled, &__running);
if (enabled)
*enabled = __enabled;
if (running)
@@ -5802,18 +5868,6 @@ static int perf_event_index(struct perf_event *event)
return event->pmu->event_idx(event);
}
-static void calc_timer_values(struct perf_event *event,
- u64 *now,
- u64 *enabled,
- u64 *running)
-{
- u64 ctx_time;
-
- *now = perf_clock();
- ctx_time = event->shadow_ctx_time + *now;
- __perf_update_times(event, ctx_time, enabled, running);
-}
-
static void perf_event_init_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
@@ -5938,6 +5992,8 @@ static void ring_buffer_attach(struct perf_event *event,
struct perf_buffer *old_rb = NULL;
unsigned long flags;
+ WARN_ON_ONCE(event->parent);
+
if (event->rb) {
/*
* Should be impossible, we set this when removing
@@ -5995,6 +6051,9 @@ static void ring_buffer_wakeup(struct perf_event *event)
{
struct perf_buffer *rb;
+ if (event->parent)
+ event = event->parent;
+
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
@@ -6008,6 +6067,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event)
{
struct perf_buffer *rb;
+ if (event->parent)
+ event = event->parent;
+
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
@@ -6353,7 +6415,6 @@ accounting:
ring_buffer_attach(event, rb);
perf_event_update_time(event);
- perf_set_shadow_time(event, event->ctx);
perf_event_init_userpage(event);
perf_event_update_userpage(event);
} else {
@@ -6717,7 +6778,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event,
if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
goto out;
- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
+ rb = ring_buffer_get(sampler);
if (!rb)
goto out;
@@ -6783,7 +6844,7 @@ static void perf_aux_sample_output(struct perf_event *event,
if (WARN_ON_ONCE(!sampler || !data->aux_size))
return;
- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
+ rb = ring_buffer_get(sampler);
if (!rb)
return;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 9888e2bc8c76..52501e5f7655 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -63,7 +63,9 @@ static struct task_struct *watchdog_task;
* Should we dump all CPUs backtraces in a hung task event?
* Defaults to 0, can be changed via sysctl.
*/
-unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
+static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
+#else
+#define sysctl_hung_task_all_cpu_backtrace 0
#endif /* CONFIG_SMP */
/*
@@ -222,11 +224,13 @@ static long hung_timeout_jiffies(unsigned long last_checked,
MAX_SCHEDULE_TIMEOUT;
}
+#ifdef CONFIG_SYSCTL
/*
* Process updating of timeout sysctl
*/
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
{
int ret;
@@ -241,6 +245,76 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
return ret;
}
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static struct ctl_table hung_task_sysctls[] = {
+#ifdef CONFIG_SMP
+ {
+ .procname = "hung_task_all_cpu_backtrace",
+ .data = &sysctl_hung_task_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
+ {
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "hung_task_check_count",
+ .data = &sysctl_hung_task_check_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "hung_task_timeout_secs",
+ .data = &sysctl_hung_task_timeout_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = (void *)&hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = (void *)&hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_warnings",
+ .data = &sysctl_hung_task_warnings,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_NEG_ONE,
+ },
+ {}
+};
+
+static void __init hung_task_sysctl_init(void)
+{
+ register_sysctl_init("kernel", hung_task_sysctls);
+}
+#else
+#define hung_task_sysctl_init() do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+
static atomic_t reset_hung_task = ATOMIC_INIT(0);
void reset_hung_task_detector(void)
@@ -310,6 +384,7 @@ static int __init hung_task_init(void)
pm_notifier(hungtask_pm_notify, 0);
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+ hung_task_sysctl_init();
return 0;
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ee595ec09778..623b8136e9af 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -137,7 +137,7 @@ static inline int irq_select_affinity_usr(unsigned int irq)
static ssize_t write_irq_affinity(int type, struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
- unsigned int irq = (int)(long)PDE_DATA(file_inode(file));
+ unsigned int irq = (int)(long)pde_data(file_inode(file));
cpumask_var_t new_value;
int err;
@@ -190,12 +190,12 @@ static ssize_t irq_affinity_list_proc_write(struct file *file,
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_affinity_proc_show, PDE_DATA(inode));
+ return single_open(file, irq_affinity_proc_show, pde_data(inode));
}
static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode));
+ return single_open(file, irq_affinity_list_proc_show, pde_data(inode));
}
static const struct proc_ops irq_affinity_proc_ops = {
@@ -265,7 +265,7 @@ out:
static int default_affinity_open(struct inode *inode, struct file *file)
{
- return single_open(file, default_affinity_show, PDE_DATA(inode));
+ return single_open(file, default_affinity_show, pde_data(inode));
}
static const struct proc_ops default_affinity_proc_ops = {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 21eccc961bba..94cab8c9ce56 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -48,6 +48,9 @@
#define KPROBE_HASH_BITS 6
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+#if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL)
+#define kprobe_sysctls_init() do { } while (0)
+#endif
static int kprobes_initialized;
/* kprobe_table can be accessed by
@@ -938,10 +941,10 @@ static void unoptimize_all_kprobes(void)
}
static DEFINE_MUTEX(kprobe_sysctl_mutex);
-int sysctl_kprobes_optimization;
-int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
- void *buffer, size_t *length,
- loff_t *ppos)
+static int sysctl_kprobes_optimization;
+static int proc_kprobes_optimization_handler(struct ctl_table *table,
+ int write, void *buffer,
+ size_t *length, loff_t *ppos)
{
int ret;
@@ -957,6 +960,24 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
return ret;
}
+
+static struct ctl_table kprobe_sysctls[] = {
+ {
+ .procname = "kprobes-optimization",
+ .data = &sysctl_kprobes_optimization,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_kprobes_optimization_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static void __init kprobe_sysctls_init(void)
+{
+ register_sysctl_init("debug", kprobe_sysctls);
+}
#endif /* CONFIG_SYSCTL */
/* Put a breakpoint for a probe. */
@@ -2584,6 +2605,7 @@ static int __init init_kprobes(void)
err = register_module_notifier(&kprobe_module_nb);
kprobes_initialized = (err == 0);
+ kprobe_sysctls_init();
return err;
}
early_initcall(init_kprobes);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index a2c156ee8275..38c6dd822da8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -61,6 +61,8 @@ struct kthread {
#ifdef CONFIG_BLK_CGROUP
struct cgroup_subsys_state *blkcg_css;
#endif
+ /* To store the full name if task comm is truncated. */
+ char *full_name;
};
enum KTHREAD_BITS {
@@ -94,6 +96,18 @@ static inline struct kthread *__to_kthread(struct task_struct *p)
return kthread;
}
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
+{
+ struct kthread *kthread = to_kthread(tsk);
+
+ if (!kthread || !kthread->full_name) {
+ __get_task_comm(buf, buf_size, tsk);
+ return;
+ }
+
+ strscpy_pad(buf, kthread->full_name, buf_size);
+}
+
bool set_kthread_struct(struct task_struct *p)
{
struct kthread *kthread;
@@ -121,10 +135,14 @@ void free_kthread_struct(struct task_struct *k)
* Can be NULL if kmalloc() in set_kthread_struct() failed.
*/
kthread = to_kthread(k);
+ if (!kthread)
+ return;
+
#ifdef CONFIG_BLK_CGROUP
- WARN_ON_ONCE(kthread && kthread->blkcg_css);
+ WARN_ON_ONCE(kthread->blkcg_css);
#endif
k->worker_private = NULL;
+ kfree(kthread->full_name);
kfree(kthread);
}
@@ -438,12 +456,22 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
task = create->result;
if (!IS_ERR(task)) {
char name[TASK_COMM_LEN];
+ va_list aq;
+ int len;
/*
* task is already visible to other tasks, so updating
* COMM must be protected.
*/
- vsnprintf(name, sizeof(name), namefmt, args);
+ va_copy(aq, args);
+ len = vsnprintf(name, sizeof(name), namefmt, aq);
+ va_end(aq);
+ if (len >= TASK_COMM_LEN) {
+ struct kthread *kthread = to_kthread(task);
+
+ /* leave it truncated when out of memory. */
+ kthread->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
+ }
set_task_comm(task, name);
}
kfree(create);
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index b562f9289372..7f49baaa4979 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -300,6 +300,16 @@ void __lockfunc _raw_write_lock(rwlock_t *lock)
__raw_write_lock(lock);
}
EXPORT_SYMBOL(_raw_write_lock);
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+#define __raw_write_lock_nested(lock, subclass) __raw_write_lock(((void)(subclass), (lock)))
+#endif
+
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+ __raw_write_lock_nested(lock, subclass);
+}
+EXPORT_SYMBOL(_raw_write_lock_nested);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
index 9e396a09fe0f..48a19ed8486d 100644
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -239,6 +239,18 @@ void __sched rt_write_lock(rwlock_t *rwlock)
}
EXPORT_SYMBOL(rt_write_lock);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass)
+{
+ rtlock_might_resched();
+ rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
+ rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+ rcu_read_lock();
+ migrate_disable();
+}
+EXPORT_SYMBOL(rt_write_lock_nested);
+#endif
+
void __sched rt_read_unlock(rwlock_t *rwlock)
{
rwlock_release(&rwlock->dep_map, _RET_IP_);
diff --git a/kernel/panic.c b/kernel/panic.c
index cefd7d82366f..55b50e052ec3 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -32,6 +32,7 @@
#include <linux/bug.h>
#include <linux/ratelimit.h>
#include <linux/debugfs.h>
+#include <trace/events/error_report.h>
#include <asm/sections.h>
#define PANIC_TIMER_STEP 100
@@ -533,26 +534,9 @@ void oops_enter(void)
trigger_all_cpu_backtrace();
}
-/*
- * 64-bit random ID for oopses:
- */
-static u64 oops_id;
-
-static int init_oops_id(void)
-{
- if (!oops_id)
- get_random_bytes(&oops_id, sizeof(oops_id));
- else
- oops_id++;
-
- return 0;
-}
-late_initcall(init_oops_id);
-
static void print_oops_end_marker(void)
{
- init_oops_id();
- pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
+ pr_warn("---[ end trace %016llx ]---\n", 0ULL);
}
/*
@@ -609,6 +593,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
print_irqtrace_events(current);
print_oops_end_marker();
+ trace_error_report_end(ERROR_DETECTOR_WARN, (unsigned long)caller);
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index f7a986078213..330d49937692 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm)
* Register a range of page frames the contents of which should not be saved
* during hibernation (to be used in the early initialization code).
*/
-void __init __register_nosave_region(unsigned long start_pfn,
- unsigned long end_pfn, int use_kmalloc)
+void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
{
struct nosave_region *region;
@@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn,
goto Report;
}
}
- if (use_kmalloc) {
- /* During init, this shouldn't fail */
- region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
- BUG_ON(!region);
- } else {
- /* This allocation cannot fail */
- region = memblock_alloc(sizeof(struct nosave_region),
- SMP_CACHE_BYTES);
- if (!region)
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- sizeof(struct nosave_region));
- }
+ /* This allocation cannot fail */
+ region = memblock_alloc(sizeof(struct nosave_region),
+ SMP_CACHE_BYTES);
+ if (!region)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct nosave_region));
region->start_pfn = start_pfn;
region->end_pfn = end_pfn;
list_add_tail(&region->list, &nosave_regions);
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 105df4dfc783..52571dcad768 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active)
{
struct rb_node *node;
struct wakelock *wl;
- char *str = buf;
- char *end = buf + PAGE_SIZE;
+ int len = 0;
mutex_lock(&wakelocks_lock);
for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
wl = rb_entry(node, struct wakelock, node);
if (wl->ws->active == show_active)
- str += scnprintf(str, end - str, "%s ", wl->name);
+ len += sysfs_emit_at(buf, len, "%s ", wl->name);
}
- if (str > buf)
- str--;
- str += scnprintf(str, end - str, "\n");
+ len += sysfs_emit_at(buf, len, "\n");
mutex_unlock(&wakelocks_lock);
- return (str - buf);
+ return len;
}
#if CONFIG_PM_WAKELOCKS_LIMIT > 0
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index d118739874c0..f5b388e810b9 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -2,5 +2,8 @@
obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
-obj-$(CONFIG_PRINTK) += printk_ringbuffer.o
obj-$(CONFIG_PRINTK_INDEX) += index.o
+
+obj-$(CONFIG_PRINTK) += printk_support.o
+printk_support-y := printk_ringbuffer.o
+printk_support-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 9f3ed2fdb721..d947ca6c84f9 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -4,6 +4,14 @@
*/
#include <linux/percpu.h>
+#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
+void __init printk_sysctl_init(void);
+int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+#else
+#define printk_sysctl_init() do { } while (0)
+#endif
+
#ifdef CONFIG_PRINTK
/* Flags for a single printk record. */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 155229f0cf0f..82abfaf3c2aa 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -171,7 +171,7 @@ static int __init control_devkmsg(char *str)
__setup("printk.devkmsg=", control_devkmsg);
char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
-
+#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -210,6 +210,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
return 0;
}
+#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */
/* Number of registered extended console drivers. */
static int nr_ext_console_drivers;
@@ -3211,6 +3212,7 @@ static int __init printk_late_init(void)
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
console_cpu_notify, NULL);
WARN_ON(ret < 0);
+ printk_sysctl_init();
return 0;
}
late_initcall(printk_late_init);
diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
new file mode 100644
index 000000000000..653ae04aab7f
--- /dev/null
+++ b/kernel/printk/sysctl.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sysctl.c: General linux system control interface
+ */
+
+#include <linux/sysctl.h>
+#include <linux/printk.h>
+#include <linux/capability.h>
+#include <linux/ratelimit.h>
+#include "internal.h"
+
+static const int ten_thousand = 10000;
+
+static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
+static struct ctl_table printk_sysctls[] = {
+ {
+ .procname = "printk",
+ .data = &console_loglevel,
+ .maxlen = 4*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_ratelimit",
+ .data = &printk_ratelimit_state.interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "printk_ratelimit_burst",
+ .data = &printk_ratelimit_state.burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_delay",
+ .data = &printk_delay_msec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&ten_thousand,
+ },
+ {
+ .procname = "printk_devkmsg",
+ .data = devkmsg_log_str,
+ .maxlen = DEVKMSG_STR_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = devkmsg_sysctl_set_loglvl,
+ },
+ {
+ .procname = "dmesg_restrict",
+ .data = &dmesg_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "kptr_restrict",
+ .data = &kptr_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {}
+};
+
+void __init printk_sysctl_init(void)
+{
+ register_sysctl_init("kernel", printk_sysctls);
+}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 84f1d91604cc..d64f0b1d8cd3 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \
.call_func = call, \
.rtpcpu = &rt_name ## __percpu, \
.name = n, \
- .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \
+ .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \
.percpu_enqueue_lim = 1, \
.percpu_dequeue_lim = 1, \
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
@@ -216,6 +216,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
int cpu;
unsigned long flags;
int lim;
+ int shift;
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rcu_task_enqueue_lim < 0) {
@@ -229,7 +230,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
if (lim > nr_cpu_ids)
lim = nr_cpu_ids;
- WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim));
+ shift = ilog2(nr_cpu_ids / lim);
+ if (((nr_cpu_ids - 1) >> shift) >= lim)
+ shift++;
+ WRITE_ONCE(rtp->percpu_enqueue_shift, shift);
WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
smp_store_release(&rtp->percpu_enqueue_lim, lim);
for_each_possible_cpu(cpu) {
@@ -298,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
if (unlikely(needadjust)) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
- WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
+ WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
@@ -413,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim > 1) {
- WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
+ WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
smp_store_release(&rtp->percpu_enqueue_lim, 1);
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
diff --git a/kernel/resource.c b/kernel/resource.c
index 5ad3eba619ba..9c08d6e9eef2 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -99,7 +99,7 @@ enum { MAX_IORES_LEVEL = 5 };
static void *r_start(struct seq_file *m, loff_t *pos)
__acquires(resource_lock)
{
- struct resource *p = PDE_DATA(file_inode(m->file));
+ struct resource *p = pde_data(file_inode(m->file));
loff_t l = 0;
read_lock(&resource_lock);
for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
@@ -115,7 +115,7 @@ static void r_stop(struct seq_file *m, void *v)
static int r_show(struct seq_file *m, void *v)
{
- struct resource *root = PDE_DATA(file_inode(m->file));
+ struct resource *root = pde_data(file_inode(m->file));
struct resource *r = v, *p;
unsigned long long start, end;
int width = root->end < 0x10000 ? 4 : 8;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2e4ae00e52d1..848eaa0efe0e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5822,8 +5822,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
if (schedstat_enabled() && rq->core->core_forceidle_count) {
- if (cookie)
- rq->core->core_forceidle_start = rq_clock(rq->core);
+ rq->core->core_forceidle_start = rq_clock(rq->core);
rq->core->core_forceidle_occupation = occ;
}
@@ -8219,9 +8218,7 @@ int __cond_resched_lock(spinlock_t *lock)
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
spin_lock(lock);
@@ -8239,9 +8236,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock)
if (rwlock_needbreak(lock) || resched) {
read_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
read_lock(lock);
@@ -8259,9 +8254,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
if (rwlock_needbreak(lock) || resched) {
write_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
write_lock(lock);
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index 1fb45672ec85..c8746a9a7ada 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -277,7 +277,7 @@ void __sched_core_account_forceidle(struct rq *rq)
rq_i = cpu_rq(i);
p = rq_i->core_pick ?: rq_i->curr;
- if (!p->core_cookie)
+ if (p == rq_i->idle)
continue;
__schedstat_add(p->stats.core_forceidle_sum, delta);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 095b0aa378df..5146163bfabb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- u32 divider = get_pelt_divider(&se->avg);
sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
+ sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
+ cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
}
#else
static inline void
@@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se,
se->avg.last_update_time = n_last_update_time;
}
-
/*
* When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
* propagate its contribution. The key to this propagation is the invariant
@@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se,
* XXX: only do this for the part of runnable > running ?
*
*/
-
static inline void
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
- u32 divider;
+ long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg;
+ u32 new_sum, divider;
/* Nothing to update */
- if (!delta)
+ if (!delta_avg)
return;
/*
@@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
*/
divider = get_pelt_divider(&cfs_rq->avg);
+
/* Set new sched_entity's utilization */
se->avg.util_avg = gcfs_rq->avg.util_avg;
- se->avg.util_sum = se->avg.util_avg * divider;
+ new_sum = se->avg.util_avg * divider;
+ delta_sum = (long)new_sum - (long)se->avg.util_sum;
+ se->avg.util_sum = new_sum;
/* Update parent cfs_rq utilization */
- add_positive(&cfs_rq->avg.util_avg, delta);
- cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
+ add_positive(&cfs_rq->avg.util_avg, delta_avg);
+ add_positive(&cfs_rq->avg.util_sum, delta_sum);
+
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
+ cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
}
static inline void
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
- u32 divider;
+ long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+ u32 new_sum, divider;
/* Nothing to update */
- if (!delta)
+ if (!delta_avg)
return;
/*
@@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
/* Set new sched_entity's runnable */
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
- se->avg.runnable_sum = se->avg.runnable_avg * divider;
+ new_sum = se->avg.runnable_avg * divider;
+ delta_sum = (long)new_sum - (long)se->avg.runnable_sum;
+ se->avg.runnable_sum = new_sum;
/* Update parent cfs_rq runnable */
- add_positive(&cfs_rq->avg.runnable_avg, delta);
- cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
+ add_positive(&cfs_rq->avg.runnable_avg, delta_avg);
+ add_positive(&cfs_rq->avg.runnable_sum, delta_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
+ cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
}
static inline void
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+ long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
unsigned long load_avg;
u64 load_sum = 0;
+ s64 delta_sum;
u32 divider;
if (!runnable_sum)
@@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
* assuming all tasks are equally runnable.
*/
if (scale_load_down(gcfs_rq->load.weight)) {
- load_sum = div_s64(gcfs_rq->avg.load_sum,
+ load_sum = div_u64(gcfs_rq->avg.load_sum,
scale_load_down(gcfs_rq->load.weight));
}
@@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
runnable_sum = max(runnable_sum, running_sum);
- load_sum = (s64)se_weight(se) * runnable_sum;
- load_avg = div_s64(load_sum, divider);
-
- se->avg.load_sum = runnable_sum;
+ load_sum = se_weight(se) * runnable_sum;
+ load_avg = div_u64(load_sum, divider);
- delta = load_avg - se->avg.load_avg;
- if (!delta)
+ delta_avg = load_avg - se->avg.load_avg;
+ if (!delta_avg)
return;
- se->avg.load_avg = load_avg;
+ delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
- add_positive(&cfs_rq->avg.load_avg, delta);
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
+ se->avg.load_sum = runnable_sum;
+ se->avg.load_avg = load_avg;
+ add_positive(&cfs_rq->avg.load_avg, delta_avg);
+ add_positive(&cfs_rq->avg.load_sum, delta_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
+ cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
}
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
*
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
*
- * Returns true if the load decayed or we removed load.
+ * Return: true if the load decayed or we removed load.
*
* Since both these conditions indicate a changed cfs_rq->avg.load we should
* call update_tg_load_avg() when this function returns true.
@@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
r = removed_load;
sub_positive(&sa->load_avg, r);
- sa->load_sum = sa->load_avg * divider;
+ sub_positive(&sa->load_sum, r * divider);
+ /* See sa->util_sum below */
+ sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER);
r = removed_util;
sub_positive(&sa->util_avg, r);
- sa->util_sum = sa->util_avg * divider;
+ sub_positive(&sa->util_sum, r * divider);
+ /*
+ * Because of rounding, se->util_sum might ends up being +1 more than
+ * cfs->util_sum. Although this is not a problem by itself, detaching
+ * a lot of tasks with the rounding problem between 2 updates of
+ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
+ * cfs_util_avg is not.
+ * Check that util_sum is still above its lower bound for the new
+ * util_avg. Given that period_contrib might have moved since the last
+ * sync, we are only sure that util_sum must be above or equal to
+ * util_avg * minimum possible divider
+ */
+ sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
r = removed_runnable;
sub_positive(&sa->runnable_avg, r);
- sa->runnable_sum = sa->runnable_avg * divider;
+ sub_positive(&sa->runnable_sum, r * divider);
+ /* See sa->util_sum above */
+ sa->runnable_sum = max_t(u32, sa->runnable_sum,
+ sa->runnable_avg * PELT_MIN_DIVIDER);
/*
* removed_runnable is the unweighted version of removed_load so we
@@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
*/
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- /*
- * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
- * See ___update_load_avg() for details.
- */
- u32 divider = get_pelt_divider(&cfs_rq->avg);
-
dequeue_load_avg(cfs_rq, se);
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
- cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
+ sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
+ cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
+
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
- cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
+ sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
+ cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
@@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct,
*
* If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
* of @dst_cpu are idle and @sg has lower priority.
+ *
+ * Return: true if @dst_cpu can pull tasks, false otherwise.
*/
static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
struct sg_lb_stats *sgs,
@@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @env: The load balancing environment.
+ * @sds: Load-balancing data with statistics of the local group.
* @group: sched_group whose statistics are to be updated.
* @sgs: variable to hold the statistics for this group.
* @sg_status: Holds flag indicating the status of the sched_group
@@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
/**
* find_busiest_group - Returns the busiest group within the sched_domain
* if there is an imbalance.
+ * @env: The load balancing environment.
*
* Also calculates the amount of runnable load which should be moved
* to restore balance.
*
- * @env: The load balancing environment.
- *
* Return: - The busiest group if imbalance exists.
*/
static struct sched_group *find_busiest_group(struct lb_env *env)
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index b5add64d9698..3d2825408e3a 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -147,11 +147,11 @@
#endif
#ifdef CONFIG_RSEQ
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
- | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
#else
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
#endif
#define MEMBARRIER_CMD_BITMASK \
@@ -159,7 +159,8 @@
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
- | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
+ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
static void ipi_mb(void *info)
{
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index e06071bf3472..c336f5f481bc 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
}
#endif
+#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
+
static inline u32 get_pelt_divider(struct sched_avg *avg)
{
- return LOAD_AVG_MAX - 1024 + avg->period_contrib;
+ return PELT_MIN_DIVIDER + avg->period_contrib;
}
static inline void cfs_se_util_change(struct sched_avg *avg)
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index a679613a7cb7..e14358178849 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1082,44 +1082,6 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
return 0;
}
-static int psi_io_show(struct seq_file *m, void *v)
-{
- return psi_show(m, &psi_system, PSI_IO);
-}
-
-static int psi_memory_show(struct seq_file *m, void *v)
-{
- return psi_show(m, &psi_system, PSI_MEM);
-}
-
-static int psi_cpu_show(struct seq_file *m, void *v)
-{
- return psi_show(m, &psi_system, PSI_CPU);
-}
-
-static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
-{
- if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- return single_open(file, psi_show, NULL);
-}
-
-static int psi_io_open(struct inode *inode, struct file *file)
-{
- return psi_open(file, psi_io_show);
-}
-
-static int psi_memory_open(struct inode *inode, struct file *file)
-{
- return psi_open(file, psi_memory_show);
-}
-
-static int psi_cpu_open(struct inode *inode, struct file *file)
-{
- return psi_open(file, psi_cpu_show);
-}
-
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res)
{
@@ -1162,7 +1124,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->event = 0;
t->last_event_time = 0;
init_waitqueue_head(&t->event_wait);
- kref_init(&t->refcount);
mutex_lock(&group->trigger_lock);
@@ -1191,15 +1152,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
return t;
}
-static void psi_trigger_destroy(struct kref *ref)
+void psi_trigger_destroy(struct psi_trigger *t)
{
- struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
- struct psi_group *group = t->group;
+ struct psi_group *group;
struct task_struct *task_to_destroy = NULL;
- if (static_branch_likely(&psi_disabled))
+ /*
+ * We do not check psi_disabled since it might have been disabled after
+ * the trigger got created.
+ */
+ if (!t)
return;
+ group = t->group;
/*
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
* from under a polling process.
@@ -1235,9 +1200,9 @@ static void psi_trigger_destroy(struct kref *ref)
mutex_unlock(&group->trigger_lock);
/*
- * Wait for both *trigger_ptr from psi_trigger_replace and
- * poll_task RCUs to complete their read-side critical sections
- * before destroying the trigger and optionally the poll_task
+ * Wait for psi_schedule_poll_work RCU to complete its read-side
+ * critical section before destroying the trigger and optionally the
+ * poll_task.
*/
synchronize_rcu();
/*
@@ -1254,18 +1219,6 @@ static void psi_trigger_destroy(struct kref *ref)
kfree(t);
}
-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
-{
- struct psi_trigger *old = *trigger_ptr;
-
- if (static_branch_likely(&psi_disabled))
- return;
-
- rcu_assign_pointer(*trigger_ptr, new);
- if (old)
- kref_put(&old->refcount, psi_trigger_destroy);
-}
-
__poll_t psi_trigger_poll(void **trigger_ptr,
struct file *file, poll_table *wait)
{
@@ -1275,27 +1228,57 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (static_branch_likely(&psi_disabled))
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- rcu_read_lock();
-
- t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
- if (!t) {
- rcu_read_unlock();
+ t = smp_load_acquire(trigger_ptr);
+ if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- }
- kref_get(&t->refcount);
-
- rcu_read_unlock();
poll_wait(file, &t->event_wait, wait);
if (cmpxchg(&t->event, 1, 0) == 1)
ret |= EPOLLPRI;
- kref_put(&t->refcount, psi_trigger_destroy);
-
return ret;
}
+#ifdef CONFIG_PROC_FS
+static int psi_io_show(struct seq_file *m, void *v)
+{
+ return psi_show(m, &psi_system, PSI_IO);
+}
+
+static int psi_memory_show(struct seq_file *m, void *v)
+{
+ return psi_show(m, &psi_system, PSI_MEM);
+}
+
+static int psi_cpu_show(struct seq_file *m, void *v)
+{
+ return psi_show(m, &psi_system, PSI_CPU);
+}
+
+static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
+{
+ if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ return single_open(file, psi_show, NULL);
+}
+
+static int psi_io_open(struct inode *inode, struct file *file)
+{
+ return psi_open(file, psi_io_show);
+}
+
+static int psi_memory_open(struct inode *inode, struct file *file)
+{
+ return psi_open(file, psi_memory_show);
+}
+
+static int psi_cpu_open(struct inode *inode, struct file *file)
+{
+ return psi_open(file, psi_cpu_show);
+}
+
static ssize_t psi_write(struct file *file, const char __user *user_buf,
size_t nbytes, enum psi_res res)
{
@@ -1316,14 +1299,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
buf[buf_size - 1] = '\0';
- new = psi_trigger_create(&psi_system, buf, nbytes, res);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
seq = file->private_data;
+
/* Take seq->lock to protect seq->private from concurrent writes */
mutex_lock(&seq->lock);
- psi_trigger_replace(&seq->private, new);
+
+ /* Allow only one trigger per file descriptor */
+ if (seq->private) {
+ mutex_unlock(&seq->lock);
+ return -EBUSY;
+ }
+
+ new = psi_trigger_create(&psi_system, buf, nbytes, res);
+ if (IS_ERR(new)) {
+ mutex_unlock(&seq->lock);
+ return PTR_ERR(new);
+ }
+
+ smp_store_release(&seq->private, new);
mutex_unlock(&seq->lock);
return nbytes;
@@ -1358,7 +1351,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
- psi_trigger_replace(&seq->private, NULL);
+ psi_trigger_destroy(seq->private);
return single_release(inode, file);
}
@@ -1400,3 +1393,5 @@ static int __init psi_proc_init(void)
return 0;
}
module_init(psi_proc_init);
+
+#endif /* CONFIG_PROC_FS */
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index ce161a8e8d97..66b8af394e58 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -16,11 +16,13 @@
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
#include <linux/jump_label.h>
#include <linux/sysctl.h>
+#include <linux/init.h>
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
-int stack_erasing_sysctl(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+#ifdef CONFIG_SYSCTL
+static int stack_erasing_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
int state = !static_branch_unlikely(&stack_erasing_bypass);
@@ -42,6 +44,26 @@ int stack_erasing_sysctl(struct ctl_table *table, int write,
state ? "enabled" : "disabled");
return ret;
}
+static struct ctl_table stackleak_sysctls[] = {
+ {
+ .procname = "stack_erasing",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = stack_erasing_sysctl,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static int __init stackleak_sysctls_init(void)
+{
+ register_sysctl_init("kernel", stackleak_sysctls);
+ return 0;
+}
+late_initcall(stackleak_sysctls_init);
+#endif /* CONFIG_SYSCTL */
#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
#else
diff --git a/kernel/sys.c b/kernel/sys.c
index 2450a9f33cb0..ecc4cf019242 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -220,7 +220,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
niceval = MAX_NICE;
rcu_read_lock();
- read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
@@ -235,9 +234,11 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
error = set_one_prio(p, niceval, error);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
@@ -249,16 +250,15 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
if (!user)
goto out_unlock; /* No processes for this user */
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
error = set_one_prio(p, niceval, error);
- } while_each_thread(g, p);
+ }
if (!uid_eq(uid, cred->uid))
free_uid(user); /* For find_user() */
break;
}
out_unlock:
- read_unlock(&tasklist_lock);
rcu_read_unlock();
out:
return error;
@@ -283,7 +283,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
return -EINVAL;
rcu_read_lock();
- read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
@@ -301,11 +300,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
@@ -317,19 +318,18 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
if (!user)
goto out_unlock; /* No processes for this user */
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
}
- } while_each_thread(g, p);
+ }
if (!uid_eq(uid, cred->uid))
free_uid(user); /* for find_user() */
break;
}
out_unlock:
- read_unlock(&tasklist_lock);
rcu_read_unlock();
return retval;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ef77be575d87..5ae443b2882e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -20,7 +20,6 @@
*/
#include <linux/module.h>
-#include <linux/aio.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
@@ -50,7 +49,6 @@
#include <linux/times.h>
#include <linux/limits.h>
#include <linux/dcache.h>
-#include <linux/dnotify.h>
#include <linux/syscalls.h>
#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
@@ -58,19 +56,15 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/perf_event.h>
-#include <linux/kprobes.h>
-#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/kmod.h>
#include <linux/capability.h>
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
-#include <linux/sched/coredump.h>
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
-#include <linux/coredump.h>
#include <linux/latencytop.h>
#include <linux/pid.h>
#include <linux/delayacct.h>
@@ -97,65 +91,21 @@
#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
#include <linux/lockdep.h>
#endif
-#ifdef CONFIG_CHR_DEV_SG
-#include <scsi/sg.h>
-#endif
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-#include <linux/stackleak.h>
-#endif
-#ifdef CONFIG_LOCKUP_DETECTOR
-#include <linux/nmi.h>
-#endif
#if defined(CONFIG_SYSCTL)
/* Constants used for minimum and maximum */
-#ifdef CONFIG_LOCKUP_DETECTOR
-static int sixty = 60;
-#endif
-
-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
-static unsigned long zero_ul;
-static unsigned long one_ul = 1;
-static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int two_hundred = 200;
-static int one_thousand = 1000;
-static int three_thousand = 3000;
-#ifdef CONFIG_PRINTK
-static int ten_thousand = 10000;
-#endif
+
#ifdef CONFIG_PERF_EVENTS
-static int six_hundred_forty_kb = 640 * 1024;
+static const int six_hundred_forty_kb = 640 * 1024;
#endif
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
-static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-
-/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
-static int maxolduid = 65535;
-static int minolduid;
+static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-static int ngroups_max = NGROUPS_MAX;
+static const int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
-/*
- * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
- * and hung_task_check_interval_secs
- */
-#ifdef CONFIG_DETECT_HUNG_TASK
-static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
-#endif
-
-#ifdef CONFIG_INOTIFY_USER
-#include <linux/inotify.h>
-#endif
-#ifdef CONFIG_FANOTIFY
-#include <linux/fanotify.h>
-#endif
-
#ifdef CONFIG_PROC_SYSCTL
/**
@@ -192,8 +142,8 @@ int sysctl_legacy_va_layout;
#endif
#ifdef CONFIG_COMPACTION
-static int min_extfrag_threshold;
-static int max_extfrag_threshold = 1000;
+/* min_extfrag_threshold is SYSCTL_ZERO */;
+static const int max_extfrag_threshold = 1000;
#endif
#endif /* CONFIG_SYSCTL */
@@ -804,12 +754,12 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
}
-static int do_proc_douintvec(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos,
- int (*conv)(unsigned long *lvalp,
- unsigned int *valp,
- int write, void *data),
- void *data)
+int do_proc_douintvec(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
{
return __do_proc_douintvec(table->data, table, write,
buffer, lenp, ppos, conv, data);
@@ -938,17 +888,6 @@ static int proc_taint(struct ctl_table *table, int write,
return err;
}
-#ifdef CONFIG_PRINTK
-static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- if (write && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-#endif
-
/**
* struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
* @min: pointer to minimum allowable value
@@ -1144,67 +1083,6 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,
}
EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
-static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
- unsigned int *valp,
- int write, void *data)
-{
- if (write) {
- unsigned int val;
-
- val = round_pipe_size(*lvalp);
- if (val == 0)
- return -EINVAL;
-
- *valp = val;
- } else {
- unsigned int val = *valp;
- *lvalp = (unsigned long) val;
- }
-
- return 0;
-}
-
-static int proc_dopipe_max_size(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- return do_proc_douintvec(table, write, buffer, lenp, ppos,
- do_proc_dopipe_max_size_conv, NULL);
-}
-
-static void validate_coredump_safety(void)
-{
-#ifdef CONFIG_COREDUMP
- if (suid_dumpable == SUID_DUMP_ROOT &&
- core_pattern[0] != '/' && core_pattern[0] != '|') {
- printk(KERN_WARNING
-"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
-"Pipe handler or fully qualified core dump path required.\n"
-"Set kernel.core_pattern before fs.suid_dumpable.\n"
- );
- }
-#endif
-}
-
-static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (!error)
- validate_coredump_safety();
- return error;
-}
-
-#ifdef CONFIG_COREDUMP
-static int proc_dostring_coredump(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int error = proc_dostring(table, write, buffer, lenp, ppos);
- if (!error)
- validate_coredump_safety();
- return error;
-}
-#endif
-
#ifdef CONFIG_MAGIC_SYSRQ
static int sysrq_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -1267,10 +1145,11 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
err = proc_get_long(&p, &left, &val, &neg,
proc_wspace_sep,
sizeof(proc_wspace_sep), NULL);
- if (err)
+ if (err || neg) {
+ err = -EINVAL;
break;
- if (neg)
- continue;
+ }
+
val = convmul * val / convdiv;
if ((min && val < *min) || (max && val > *max)) {
err = -EINVAL;
@@ -1928,29 +1807,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#ifdef CONFIG_COREDUMP
- {
- .procname = "core_uses_pid",
- .data = &core_uses_pid,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "core_pattern",
- .data = core_pattern,
- .maxlen = CORENAME_MAX_SIZE,
- .mode = 0644,
- .proc_handler = proc_dostring_coredump,
- },
- {
- .procname = "core_pipe_limit",
- .data = &core_pipe_limit,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
@@ -1964,7 +1820,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
+ .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
#endif
@@ -2131,15 +1987,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dostring,
},
#endif
-#ifdef CONFIG_CHR_DEV_SG
- {
- .procname = "sg-big-buff",
- .data = &sg_big_buff,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
{
.procname = "acct",
@@ -2175,30 +2022,18 @@ static struct ctl_table kern_table[] = {
.proc_handler = sysctl_max_threads,
},
{
- .procname = "random",
- .mode = 0555,
- .child = random_table,
- },
- {
.procname = "usermodehelper",
.mode = 0555,
.child = usermodehelper_table,
},
-#ifdef CONFIG_FW_LOADER_USER_HELPER
- {
- .procname = "firmware_config",
- .mode = 0555,
- .child = firmware_config_table,
- },
-#endif
{
.procname = "overflowuid",
.data = &overflowuid,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
},
{
.procname = "overflowgid",
@@ -2206,8 +2041,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
},
#ifdef CONFIG_S390
{
@@ -2252,66 +2087,9 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
-#if defined CONFIG_PRINTK
- {
- .procname = "printk",
- .data = &console_loglevel,
- .maxlen = 4*sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "printk_ratelimit",
- .data = &printk_ratelimit_state.interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "printk_ratelimit_burst",
- .data = &printk_ratelimit_state.burst,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "printk_delay",
- .data = &printk_delay_msec,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &ten_thousand,
- },
- {
- .procname = "printk_devkmsg",
- .data = devkmsg_log_str,
- .maxlen = DEVKMSG_STR_MAX_SIZE,
- .mode = 0644,
- .proc_handler = devkmsg_sysctl_set_loglvl,
- },
- {
- .procname = "dmesg_restrict",
- .data = &dmesg_restrict,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax_sysadmin,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "kptr_restrict",
- .data = &kptr_restrict,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax_sysadmin,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
-#endif
{
.procname = "ngroups_max",
- .data = &ngroups_max,
+ .data = (void *)&ngroups_max,
.maxlen = sizeof (int),
.mode = 0444,
.proc_handler = proc_dointvec,
@@ -2323,96 +2101,6 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_LOCKUP_DETECTOR)
- {
- .procname = "watchdog",
- .data = &watchdog_user_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_watchdog,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "watchdog_thresh",
- .data = &watchdog_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_watchdog_thresh,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &sixty,
- },
- {
- .procname = "nmi_watchdog",
- .data = &nmi_watchdog_user_enabled,
- .maxlen = sizeof(int),
- .mode = NMI_WATCHDOG_SYSCTL_PERM,
- .proc_handler = proc_nmi_watchdog,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "watchdog_cpumask",
- .data = &watchdog_cpumask_bits,
- .maxlen = NR_CPUS,
- .mode = 0644,
- .proc_handler = proc_watchdog_cpumask,
- },
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
- {
- .procname = "soft_watchdog",
- .data = &soft_watchdog_user_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_soft_watchdog,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "softlockup_panic",
- .data = &softlockup_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#ifdef CONFIG_SMP
- {
- .procname = "softlockup_all_cpu_backtrace",
- .data = &sysctl_softlockup_all_cpu_backtrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
- {
- .procname = "hardlockup_panic",
- .data = &hardlockup_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#ifdef CONFIG_SMP
- {
- .procname = "hardlockup_all_cpu_backtrace",
- .data = &sysctl_hardlockup_all_cpu_backtrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname = "unknown_nmi_panic",
@@ -2515,60 +2203,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_DETECT_HUNG_TASK
-#ifdef CONFIG_SMP
- {
- .procname = "hung_task_all_cpu_backtrace",
- .data = &sysctl_hung_task_all_cpu_backtrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif /* CONFIG_SMP */
- {
- .procname = "hung_task_panic",
- .data = &sysctl_hung_task_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "hung_task_check_count",
- .data = &sysctl_hung_task_check_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "hung_task_timeout_secs",
- .data = &sysctl_hung_task_timeout_secs,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_dohung_task_timeout_secs,
- .extra2 = &hung_task_timeout_max,
- },
- {
- .procname = "hung_task_check_interval_secs",
- .data = &sysctl_hung_task_check_interval_secs,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_dohung_task_timeout_secs,
- .extra2 = &hung_task_timeout_max,
- },
- {
- .procname = "hung_task_warnings",
- .data = &sysctl_hung_task_warnings,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
- },
-#endif
#ifdef CONFIG_RT_MUTEXES
{
.procname = "max_lock_depth",
@@ -2628,7 +2262,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "perf_event_max_stack",
@@ -2637,7 +2271,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &six_hundred_forty_kb,
+ .extra2 = (void *)&six_hundred_forty_kb,
},
{
.procname = "perf_event_max_contexts_per_stack",
@@ -2646,7 +2280,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
#endif
{
@@ -2677,7 +2311,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
@@ -2709,17 +2343,6 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_INT_MAX,
},
#endif
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
- {
- .procname = "stack_erasing",
- .data = NULL,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = stack_erasing_sysctl,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
{ }
};
@@ -2731,7 +2354,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = overcommit_policy_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "panic_on_oom",
@@ -2740,7 +2363,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "oom_kill_allocating_task",
@@ -2785,7 +2408,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = dirty_background_ratio_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "dirty_background_bytes",
@@ -2793,7 +2416,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(dirty_background_bytes),
.mode = 0644,
.proc_handler = dirty_background_bytes_handler,
- .extra1 = &one_ul,
+ .extra1 = SYSCTL_LONG_ONE,
},
{
.procname = "dirty_ratio",
@@ -2802,7 +2425,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = dirty_ratio_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "dirty_bytes",
@@ -2810,7 +2433,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(vm_dirty_bytes),
.mode = 0644,
.proc_handler = dirty_bytes_handler,
- .extra1 = &dirty_bytes_min,
+ .extra1 = (void *)&dirty_bytes_min,
},
{
.procname = "dirty_writeback_centisecs",
@@ -2842,7 +2465,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two_hundred,
+ .extra2 = SYSCTL_TWO_HUNDRED,
},
#ifdef CONFIG_HUGETLB_PAGE
{
@@ -2899,7 +2522,7 @@ static struct ctl_table vm_table[] = {
.mode = 0200,
.proc_handler = drop_caches_sysctl_handler,
.extra1 = SYSCTL_ONE,
- .extra2 = &four,
+ .extra2 = SYSCTL_FOUR,
},
#ifdef CONFIG_COMPACTION
{
@@ -2916,7 +2539,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = compaction_proactiveness_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "extfrag_threshold",
@@ -2924,8 +2547,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &min_extfrag_threshold,
- .extra2 = &max_extfrag_threshold,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&max_extfrag_threshold,
},
{
.procname = "compact_unevictable_allowed",
@@ -2961,7 +2584,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = watermark_scale_factor_sysctl_handler,
.extra1 = SYSCTL_ONE,
- .extra2 = &three_thousand,
+ .extra2 = SYSCTL_THREE_THOUSAND,
},
{
.procname = "percpu_pagelist_high_fraction",
@@ -3040,7 +2663,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "min_slab_ratio",
@@ -3049,7 +2672,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = sysctl_min_slab_ratio_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
#endif
#ifdef CONFIG_SMP
@@ -3183,221 +2806,6 @@ static struct ctl_table vm_table[] = {
{ }
};
-static struct ctl_table fs_table[] = {
- {
- .procname = "inode-nr",
- .data = &inodes_stat,
- .maxlen = 2*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_inodes,
- },
- {
- .procname = "inode-state",
- .data = &inodes_stat,
- .maxlen = 7*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_inodes,
- },
- {
- .procname = "file-nr",
- .data = &files_stat,
- .maxlen = sizeof(files_stat),
- .mode = 0444,
- .proc_handler = proc_nr_files,
- },
- {
- .procname = "file-max",
- .data = &files_stat.max_files,
- .maxlen = sizeof(files_stat.max_files),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero_ul,
- .extra2 = &long_max,
- },
- {
- .procname = "nr_open",
- .data = &sysctl_nr_open,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &sysctl_nr_open_min,
- .extra2 = &sysctl_nr_open_max,
- },
- {
- .procname = "dentry-state",
- .data = &dentry_stat,
- .maxlen = 6*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_dentry,
- },
- {
- .procname = "overflowuid",
- .data = &fs_overflowuid,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
- },
- {
- .procname = "overflowgid",
- .data = &fs_overflowgid,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
- },
-#ifdef CONFIG_FILE_LOCKING
- {
- .procname = "leases-enable",
- .data = &leases_enable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_DNOTIFY
- {
- .procname = "dir-notify-enable",
- .data = &dir_notify_enable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_MMU
-#ifdef CONFIG_FILE_LOCKING
- {
- .procname = "lease-break-time",
- .data = &lease_break_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_AIO
- {
- .procname = "aio-nr",
- .data = &aio_nr,
- .maxlen = sizeof(aio_nr),
- .mode = 0444,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "aio-max-nr",
- .data = &aio_max_nr,
- .maxlen = sizeof(aio_max_nr),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
-#endif /* CONFIG_AIO */
-#ifdef CONFIG_INOTIFY_USER
- {
- .procname = "inotify",
- .mode = 0555,
- .child = inotify_table,
- },
-#endif
-#ifdef CONFIG_FANOTIFY
- {
- .procname = "fanotify",
- .mode = 0555,
- .child = fanotify_table,
- },
-#endif
-#ifdef CONFIG_EPOLL
- {
- .procname = "epoll",
- .mode = 0555,
- .child = epoll_table,
- },
-#endif
-#endif
- {
- .procname = "protected_symlinks",
- .data = &sysctl_protected_symlinks,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "protected_hardlinks",
- .data = &sysctl_protected_hardlinks,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "protected_fifos",
- .data = &sysctl_protected_fifos,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
- {
- .procname = "protected_regular",
- .data = &sysctl_protected_regular,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
- {
- .procname = "suid_dumpable",
- .data = &suid_dumpable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax_coredump,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
- {
- .procname = "binfmt_misc",
- .mode = 0555,
- .child = sysctl_mount_point,
- },
-#endif
- {
- .procname = "pipe-max-size",
- .data = &pipe_max_size,
- .maxlen = sizeof(pipe_max_size),
- .mode = 0644,
- .proc_handler = proc_dopipe_max_size,
- },
- {
- .procname = "pipe-user-pages-hard",
- .data = &pipe_user_pages_hard,
- .maxlen = sizeof(pipe_user_pages_hard),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "pipe-user-pages-soft",
- .data = &pipe_user_pages_soft,
- .maxlen = sizeof(pipe_user_pages_soft),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "mount-max",
- .data = &sysctl_mount_max,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- },
- { }
-};
-
static struct ctl_table debug_table[] = {
#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
{
@@ -3408,17 +2816,6 @@ static struct ctl_table debug_table[] = {
.proc_handler = proc_dointvec
},
#endif
-#if defined(CONFIG_OPTPROBES)
- {
- .procname = "kprobes-optimization",
- .data = &sysctl_kprobes_optimization,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_kprobes_optimization_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
{ }
};
@@ -3426,41 +2823,18 @@ static struct ctl_table dev_table[] = {
{ }
};
-static struct ctl_table sysctl_base_table[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = kern_table,
- },
- {
- .procname = "vm",
- .mode = 0555,
- .child = vm_table,
- },
- {
- .procname = "fs",
- .mode = 0555,
- .child = fs_table,
- },
- {
- .procname = "debug",
- .mode = 0555,
- .child = debug_table,
- },
- {
- .procname = "dev",
- .mode = 0555,
- .child = dev_table,
- },
- { }
-};
+DECLARE_SYSCTL_BASE(kernel, kern_table);
+DECLARE_SYSCTL_BASE(vm, vm_table);
+DECLARE_SYSCTL_BASE(debug, debug_table);
+DECLARE_SYSCTL_BASE(dev, dev_table);
-int __init sysctl_init(void)
+int __init sysctl_init_bases(void)
{
- struct ctl_table_header *hdr;
+ register_sysctl_base(kernel);
+ register_sysctl_base(vm);
+ register_sysctl_base(debug);
+ register_sysctl_base(dev);
- hdr = register_sysctl_table(sysctl_base_table);
- kmemleak_not_leak(hdr);
return 0;
}
#endif /* CONFIG_SYSCTL */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b7e52a642948..1cf73807b450 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -285,7 +285,7 @@ static void clocksource_verify_choose_cpus(void)
return;
/* Make sure to select at least one CPU other than the current CPU. */
- cpu = cpumask_next(-1, cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
if (cpu == smp_processor_id())
cpu = cpumask_next(cpu, cpu_online_mask);
if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
@@ -307,7 +307,7 @@ static void clocksource_verify_choose_cpus(void)
cpu = prandom_u32() % nr_cpu_ids;
cpu = cpumask_next(cpu - 1, cpu_online_mask);
if (cpu >= nr_cpu_ids)
- cpu = cpumask_next(-1, cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
cpumask_set_cpu(cpu, &cpus_chosen);
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f468767bc287..a5eb5e7fd624 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -70,6 +70,19 @@ config HAVE_C_RECORDMCOUNT
help
C version of recordmcount available?
+config HAVE_BUILDTIME_MCOUNT_SORT
+ bool
+ help
+ An architecture selects this if it sorts the mcount_loc section
+ at build time.
+
+config BUILDTIME_MCOUNT_SORT
+ bool
+ default y
+ depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE
+ help
+ Sort the mcount_loc section at build time.
+
config TRACER_MAX_TRACE
bool
@@ -918,7 +931,7 @@ config EVENT_TRACE_TEST_SYSCALLS
config FTRACE_SORT_STARTUP_TEST
bool "Verify compile time sorting of ftrace functions"
depends on DYNAMIC_FTRACE
- depends on BUILDTIME_TABLE_SORT
+ depends on BUILDTIME_MCOUNT_SORT
help
Sorting of the mcount_loc sections that is used to find the
where the ftrace knows where to patch functions for tracing
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6163b6f762f7..f9feb197b2da 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6435,10 +6435,10 @@ static int ftrace_process_locs(struct module *mod,
/*
* Sorting mcount in vmlinux at build time depend on
- * CONFIG_BUILDTIME_TABLE_SORT, while mcount loc in
+ * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
* modules can not be sorted at build time.
*/
- if (!IS_ENABLED(CONFIG_BUILDTIME_TABLE_SORT) || mod) {
+ if (!IS_ENABLED(CONFIG_BUILDTIME_MCOUNT_SORT) || mod) {
sort(start, count, sizeof(*start),
ftrace_cmp_ips, NULL);
} else {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a569a0cb81ee..c860f582b078 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7740,7 +7740,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
err = kzalloc(sizeof(*err), GFP_KERNEL);
if (!err)
err = ERR_PTR(-ENOMEM);
- tr->n_err_log_entries++;
+ else
+ tr->n_err_log_entries++;
return err;
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5e6a988a8a51..ada87bfb5bb8 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2503,6 +2503,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
expr->fn = hist_field_unary_minus;
expr->operands[0] = operand1;
+ expr->size = operand1->size;
+ expr->is_signed = operand1->is_signed;
expr->operator = FIELD_OP_UNARY_MINUS;
expr->name = expr_str(expr, 0);
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
@@ -2719,6 +2721,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
/* The operand sizes should be the same, so just pick one */
expr->size = operand1->size;
+ expr->is_signed = operand1->is_signed;
expr->operator = field_op;
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
@@ -3935,6 +3938,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
var_ref_idx = find_var_ref_idx(hist_data, var_ref);
if (WARN_ON(var_ref_idx < 0)) {
+ kfree(p);
ret = var_ref_idx;
goto err;
}
@@ -6163,7 +6167,9 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
lockdep_assert_held(&event_mutex);
- if (glob && strlen(glob)) {
+ WARN_ON(!glob);
+
+ if (strlen(glob)) {
hist_err_clear();
last_cmd_set(file, param);
}
@@ -6196,7 +6202,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
continue;
}
break;
- } while (p);
+ } while (1);
if (!p)
param = NULL;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 7b32c356ebc5..65b597431c86 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
kfree(new);
} else {
hlist_add_head(&new->node, hashent);
+ get_user_ns(new->ns);
spin_unlock_irq(&ucounts_lock);
return new;
}
@@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts)
if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
hlist_del_init(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
+ put_user_ns(ucounts->ns);
kfree(ucounts);
}
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ad912511a0c0..99afb88d2e85 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -740,6 +740,106 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
mutex_unlock(&watchdog_mutex);
return err;
}
+
+static const int sixty = 60;
+
+static struct ctl_table watchdog_sysctls[] = {
+ {
+ .procname = "watchdog",
+ .data = &watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "watchdog_thresh",
+ .data = &watchdog_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_watchdog_thresh,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&sixty,
+ },
+ {
+ .procname = "nmi_watchdog",
+ .data = &nmi_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = NMI_WATCHDOG_SYSCTL_PERM,
+ .proc_handler = proc_nmi_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+ {
+ .procname = "soft_watchdog",
+ .data = &soft_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_soft_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#ifdef CONFIG_SMP
+ {
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+ {
+ .procname = "hardlockup_panic",
+ .data = &hardlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#ifdef CONFIG_SMP
+ {
+ .procname = "hardlockup_all_cpu_backtrace",
+ .data = &sysctl_hardlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
+#endif
+ {}
+};
+
+static void __init watchdog_sysctl_init(void)
+{
+ register_sysctl_init("kernel", watchdog_sysctls);
+}
+#else
+#define watchdog_sysctl_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
@@ -753,4 +853,5 @@ void __init lockup_detector_init(void)
if (!watchdog_nmi_probe())
nmi_watchdog_available = true;
lockup_detector_setup();
+ watchdog_sysctl_init();
}
diff --git a/lib/Kconfig b/lib/Kconfig
index c20b68ad2bc3..c80fde816a7e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -65,9 +65,6 @@ config GENERIC_STRNLEN_USER
config GENERIC_NET_UTILS
bool
-config GENERIC_FIND_FIRST_BIT
- bool
-
source "lib/math/Kconfig"
config NO_GENERIC_PCI_IOPORT_MAP
@@ -673,6 +670,10 @@ config STACKDEPOT
bool
select STACKTRACE
+config STACKDEPOT_ALWAYS_INIT
+ bool
+ select STACKDEPOT
+
config STACK_HASH_ORDER
int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
range 12 20
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c77fe36bb3d8..14b89aa37c5c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1984,6 +1984,8 @@ config KCOV
bool "Code coverage for fuzzing"
depends on ARCH_HAS_KCOV
depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
+ depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \
+ GCC_VERSION >= 120000 || CLANG_VERSION >= 130000
select DEBUG_FS
select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
help
@@ -2222,12 +2224,11 @@ config TEST_RHASHTABLE
If unsure, say N.
-config TEST_HASH
- tristate "Perform selftest on hash functions"
+config TEST_SIPHASH
+ tristate "Perform selftest on siphash functions"
help
- Enable this option to test the kernel's integer (<linux/hash.h>),
- string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
- hash functions on boot (or module load).
+ Enable this option to test the kernel's siphash (<linux/siphash.h>) hash
+ functions on boot (or module load).
This is intended to help people writing architecture-specific
optimized versions. If unsure, say N.
@@ -2371,6 +2372,25 @@ config BITFIELD_KUNIT
If unsure, say N.
+config HASH_KUNIT_TEST
+ tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to test the kernel's string (<linux/stringhash.h>), and
+ integer (<linux/hash.h>) hash functions on boot.
+
+ KUnit tests run during boot and output the results to the debug log
+ in TAP format (https://testanything.org/). Only useful for kernel devs
+ running the KUnit test harness, and not intended for inclusion into a
+ production build.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ This is intended to help people writing architecture-specific
+ optimized versions. If unsure, say N.
+
config RESOURCE_KUNIT_TEST
tristate "KUnit test for resource API"
depends on KUNIT
@@ -2502,6 +2522,7 @@ config TEST_KMOD
depends on m
depends on NETDEVICES && NET_CORE && INET # for TUN
depends on BLOCK
+ depends on PAGE_SIZE_LESS_THAN_256KB # for BTRFS
select TEST_LKM
select XFS_FS
select TUN
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index cdc842d090db..879757b6dd14 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -38,7 +38,7 @@ menuconfig KASAN
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
- select STACKDEPOT
+ select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (KernelAddressSANitizer) - runtime memory debugger,
designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index e5372a13511d..236c5cefc4cc 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -112,19 +112,6 @@ config UBSAN_UNREACHABLE
This option enables -fsanitize=unreachable which checks for control
flow reaching an expected-to-be-unreachable position.
-config UBSAN_OBJECT_SIZE
- bool "Perform checking for accesses beyond the end of objects"
- default UBSAN
- # gcc hugely expands stack usage with -fsanitize=object-size
- # https://lore.kernel.org/lkml/CAHk-=wjPasyJrDuwDnpHJS2TuQfExwe=px-SzLeN8GFMAQJPmQ@mail.gmail.com/
- depends on !CC_IS_GCC
- depends on $(cc-option,-fsanitize=object-size)
- help
- This option enables -fsanitize=object-size which checks for accesses
- beyond the end of objects where the optimizer can determine both the
- object being operated on and its size, usually seen with bad downcasts,
- or access to struct members from NULL pointers.
-
config UBSAN_BOOL
bool "Perform checking for non-boolean values used as boolean"
default UBSAN
diff --git a/lib/Makefile b/lib/Makefile
index b213a7bbf3fd..300f569c626b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -61,7 +61,8 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
CFLAGS_test_bitops.o += -Werror
obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
+obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o
+obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
obj-$(CONFIG_TEST_IDA) += test_ida.o
obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
CFLAGS_test_kasan.o += -fno-builtin
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 0f8e2e369b1d..1b8e4b2a9cba 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -89,6 +89,27 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
EXPORT_SYMBOL(_find_first_bit);
#endif
+#ifndef find_first_and_bit
+/*
+ * Find the first set bit in two memory regions.
+ */
+unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ unsigned long idx, val;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ val = addr1[idx] & addr2[idx];
+ if (val)
+ return min(idx * BITS_PER_LONG + __ffs(val), size);
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(_find_first_and_bit);
+#endif
+
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index 5637c5711db9..db904b57d4b8 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -49,6 +49,25 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len)
return 0;
}
+static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len)
+{
+ static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
+ unsigned long i, cnt;
+ ktime_t time;
+
+ bitmap_copy(cp, bitmap, BITMAP_LEN);
+
+ time = ktime_get();
+ for (cnt = i = 0; i < len; cnt++) {
+ i = find_first_and_bit(cp, bitmap2, len);
+ __clear_bit(i, cp);
+ }
+ time = ktime_get() - time;
+ pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
static int __init test_find_next_bit(const void *bitmap, unsigned long len)
{
unsigned long i, cnt;
@@ -129,6 +148,7 @@ static int __init find_bit_test(void)
* traverse only part of bitmap to avoid soft lockup.
*/
test_find_first_bit(bitmap, BITMAP_LEN / 10);
+ test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
@@ -145,6 +165,7 @@ static int __init find_bit_test(void)
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);
+ test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
/*
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 9a57257988c7..00fc50d0a640 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool)
list_del(&chunk->next_chunk);
end_bit = chunk_size(chunk) >> order;
- bit = find_next_bit(chunk->bits, end_bit, 0);
+ bit = find_first_bit(chunk->bits, end_bit);
BUG_ON(bit < end_bit);
vfree(chunk);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 059b8b00dc53..886510d248e5 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -22,6 +22,7 @@
#include "kstrtox.h"
+noinline
const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
{
if (*base == 0) {
@@ -47,6 +48,7 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
*
* Don't you dare use this function.
*/
+noinline
unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
size_t max_chars)
{
@@ -85,6 +87,7 @@ unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned lon
return rv;
}
+noinline
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
{
return _parse_integer_limit(s, base, p, INT_MAX);
@@ -125,6 +128,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtoull(). Return code must be checked.
*/
+noinline
int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
if (s[0] == '+')
@@ -148,6 +152,7 @@ EXPORT_SYMBOL(kstrtoull);
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtoll(). Return code must be checked.
*/
+noinline
int kstrtoll(const char *s, unsigned int base, long long *res)
{
unsigned long long tmp;
@@ -219,6 +224,7 @@ EXPORT_SYMBOL(_kstrtol);
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtoul(). Return code must be checked.
*/
+noinline
int kstrtouint(const char *s, unsigned int base, unsigned int *res)
{
unsigned long long tmp;
@@ -249,6 +255,7 @@ EXPORT_SYMBOL(kstrtouint);
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtol(). Return code must be checked.
*/
+noinline
int kstrtoint(const char *s, unsigned int base, int *res)
{
long long tmp;
@@ -264,6 +271,7 @@ int kstrtoint(const char *s, unsigned int base, int *res)
}
EXPORT_SYMBOL(kstrtoint);
+noinline
int kstrtou16(const char *s, unsigned int base, u16 *res)
{
unsigned long long tmp;
@@ -279,6 +287,7 @@ int kstrtou16(const char *s, unsigned int base, u16 *res)
}
EXPORT_SYMBOL(kstrtou16);
+noinline
int kstrtos16(const char *s, unsigned int base, s16 *res)
{
long long tmp;
@@ -294,6 +303,7 @@ int kstrtos16(const char *s, unsigned int base, s16 *res)
}
EXPORT_SYMBOL(kstrtos16);
+noinline
int kstrtou8(const char *s, unsigned int base, u8 *res)
{
unsigned long long tmp;
@@ -309,6 +319,7 @@ int kstrtou8(const char *s, unsigned int base, u8 *res)
}
EXPORT_SYMBOL(kstrtou8);
+noinline
int kstrtos8(const char *s, unsigned int base, s8 *res)
{
long long tmp;
@@ -333,6 +344,7 @@ EXPORT_SYMBOL(kstrtos8);
* [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
* pointed to by res is updated upon finding a match.
*/
+noinline
int kstrtobool(const char *s, bool *res)
{
if (!s)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 5d5424b51b74..9daa3fb9d1cd 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -49,11 +49,11 @@ bool __list_del_entry_valid(struct list_head *entry)
"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
entry, LIST_POISON2) ||
CHECK_DATA_CORRUPTION(prev->next != entry,
- "list_del corruption. prev->next should be %px, but was %px\n",
- entry, prev->next) ||
+ "list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n",
+ entry, prev->next, prev) ||
CHECK_DATA_CORRUPTION(next->prev != entry,
- "list_del corruption. next->prev should be %px, but was %px\n",
- entry, next->prev))
+ "list_del corruption. next->prev should be %px, but was %px. (next=%px)\n",
+ entry, next->prev, next))
return false;
return true;
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 673bd206aa98..330aa539b46e 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -36,6 +36,8 @@
*/
#include <asm/unaligned.h>
+
+#include <linux/bitops.h>
#include <linux/string.h> /* memset, memcpy */
#define FORCE_INLINE __always_inline
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 0ae2e66dcf0f..a6789c0c626b 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -69,9 +69,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
unsigned long entries[REF_TRACKER_STACK_ENTRIES];
struct ref_tracker *tracker;
unsigned int nr_entries;
+ gfp_t gfp_mask = gfp;
unsigned long flags;
- *trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL);
+ if (gfp & __GFP_DIRECT_RECLAIM)
+ gfp_mask |= __GFP_NOFAIL;
+ *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask);
if (unlikely(!tracker)) {
pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
refcount_inc(&dir->untracked);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2709ab825499..09d293c30fd2 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
- unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int wake_batch)
{
- unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,30 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
+{
+ unsigned int wake_batch;
+
+ wake_batch = sbq_calc_wake_batch(sbq, depth);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users)
+{
+ unsigned int wake_batch;
+ unsigned int min_batch;
+ unsigned int depth = (sbq->sb.depth + users - 1) / users;
+
+ min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1;
+
+ wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES,
+ min_batch, SBQ_WAKE_BATCH);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index b437ae79aca1..bf5ba9af0500 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -23,6 +23,7 @@
#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/printk.h>
#include <linux/slab.h>
@@ -161,18 +162,40 @@ static int __init is_stack_depot_disabled(char *str)
}
early_param("stack_depot_disable", is_stack_depot_disabled);
-int __init stack_depot_init(void)
+/*
+ * __ref because of memblock_alloc(), which will not be actually called after
+ * the __init code is gone, because at that point slab_is_available() is true
+ */
+__ref int stack_depot_init(void)
{
- if (!stack_depot_disable) {
+ static DEFINE_MUTEX(stack_depot_init_mutex);
+
+ mutex_lock(&stack_depot_init_mutex);
+ if (!stack_depot_disable && !stack_table) {
size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
int i;
- stack_table = memblock_alloc(size, size);
- for (i = 0; i < STACK_HASH_SIZE; i++)
- stack_table[i] = NULL;
+ if (slab_is_available()) {
+ pr_info("Stack Depot allocating hash table with kvmalloc\n");
+ stack_table = kvmalloc(size, GFP_KERNEL);
+ } else {
+ pr_info("Stack Depot allocating hash table with memblock_alloc\n");
+ stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
+ }
+ if (stack_table) {
+ for (i = 0; i < STACK_HASH_SIZE; i++)
+ stack_table[i] = NULL;
+ } else {
+ pr_err("Stack Depot hash table allocation failed, disabling\n");
+ stack_depot_disable = true;
+ mutex_unlock(&stack_depot_init_mutex);
+ return -ENOMEM;
+ }
}
+ mutex_unlock(&stack_depot_init_mutex);
return 0;
}
+EXPORT_SYMBOL_GPL(stack_depot_init);
/* Calculate hash for a stack */
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
@@ -305,6 +328,9 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
* (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
* any allocations and will fail if no space is left to store the stack trace.
*
+ * If the stack trace in @entries is from an interrupt, only the portion up to
+ * interrupt entry is saved.
+ *
* Context: Any context, but setting @can_alloc to %false is required if
* alloc_pages() cannot be used from the current context. Currently
* this is the case from contexts where neither %GFP_ATOMIC nor
@@ -323,6 +349,16 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned long flags;
u32 hash;
+ /*
+ * If this stack trace is from an interrupt, including anything before
+ * interrupt entry usually leads to unbounded stackdepot growth.
+ *
+ * Because use of filter_irq_stacks() is a requirement to ensure
+ * stackdepot can efficiently deduplicate interrupt stacks, always
+ * filter_irq_stacks() to simplify all callers' use of stackdepot.
+ */
+ nr_entries = filter_irq_stacks(entries, nr_entries);
+
if (unlikely(nr_entries == 0) || stack_depot_disable)
goto fast_exit;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index d33fa5a61b95..0c82f07f74fc 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -446,6 +446,42 @@ static void __init test_bitmap_parselist(void)
}
}
+static void __init test_bitmap_printlist(void)
+{
+ unsigned long *bmap = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ char expected[256];
+ int ret, slen;
+ ktime_t time;
+
+ if (!buf || !bmap)
+ goto out;
+
+ memset(bmap, -1, PAGE_SIZE);
+ slen = snprintf(expected, 256, "0-%ld", PAGE_SIZE * 8 - 1);
+ if (slen < 0)
+ goto out;
+
+ time = ktime_get();
+ ret = bitmap_print_to_pagebuf(true, buf, bmap, PAGE_SIZE * 8);
+ time = ktime_get() - time;
+
+ if (ret != slen + 1) {
+ pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen);
+ goto out;
+ }
+
+ if (strncmp(buf, expected, slen)) {
+ pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected);
+ goto out;
+ }
+
+ pr_err("bitmap_print_to_pagebuf: input is '%s', Time: %llu\n", buf, time);
+out:
+ kfree(buf);
+ kfree(bmap);
+}
+
static const unsigned long parse_test[] __initconst = {
BITMAP_FROM_U64(0),
BITMAP_FROM_U64(1),
@@ -818,6 +854,7 @@ static void __init selftest(void)
test_bitmap_arr32();
test_bitmap_parse();
test_bitmap_parselist();
+ test_bitmap_printlist();
test_mem_optimisations();
test_for_each_set_clump8();
test_bitmap_cut();
diff --git a/lib/test_hash.c b/lib/test_hash.c
index 0ee40b4a56dd..bb25fda34794 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -14,17 +14,15 @@
* and hash_64().
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
-
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/hash.h>
#include <linux/stringhash.h>
-#include <linux/printk.h>
+#include <kunit/test.h>
/* 32-bit XORSHIFT generator. Seed must not be zero. */
-static u32 __init __attribute_const__
+static u32 __attribute_const__
xorshift(u32 seed)
{
seed ^= seed << 13;
@@ -34,7 +32,7 @@ xorshift(u32 seed)
}
/* Given a non-zero x, returns a non-zero byte. */
-static u8 __init __attribute_const__
+static u8 __attribute_const__
mod255(u32 x)
{
x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */
@@ -45,8 +43,7 @@ mod255(u32 x)
}
/* Fill the buffer with non-zero bytes. */
-static void __init
-fill_buf(char *buf, size_t len, u32 seed)
+static void fill_buf(char *buf, size_t len, u32 seed)
{
size_t i;
@@ -56,6 +53,50 @@ fill_buf(char *buf, size_t len, u32 seed)
}
}
+/* Holds most testing variables for the int test. */
+struct test_hash_params {
+ /* Pointer to integer to be hashed. */
+ unsigned long long *h64;
+ /* Low 32-bits of integer to be hashed. */
+ u32 h0;
+ /* Arch-specific hash result. */
+ u32 h1;
+ /* Generic hash result. */
+ u32 h2;
+ /* ORed hashes of given size (in bits). */
+ u32 (*hash_or)[33];
+};
+
+#ifdef HAVE_ARCH__HASH_32
+static void
+test_int__hash_32(struct kunit *test, struct test_hash_params *params)
+{
+ params->hash_or[1][0] |= params->h2 = __hash_32_generic(params->h0);
+#if HAVE_ARCH__HASH_32 == 1
+ KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2,
+ "__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
+ params->h0, params->h1, params->h2);
+#endif
+}
+#endif
+
+#ifdef HAVE_ARCH_HASH_64
+static void
+test_int_hash_64(struct kunit *test, struct test_hash_params *params, u32 const *m, int *k)
+{
+ params->h2 = hash_64_generic(*params->h64, *k);
+#if HAVE_ARCH_HASH_64 == 1
+ KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2,
+ "hash_64(%#llx, %d) = %#x != hash_64_generic() = %#x",
+ *params->h64, *k, params->h1, params->h2);
+#else
+ KUNIT_EXPECT_LE_MSG(test, params->h1, params->h2,
+ "hash_64_generic(%#llx, %d) = %#x > %#x",
+ *params->h64, *k, params->h1, *m);
+#endif
+}
+#endif
+
/*
* Test the various integer hash functions. h64 (or its low-order bits)
* is the integer to hash. hash_or accumulates the OR of the hash values,
@@ -65,23 +106,16 @@ fill_buf(char *buf, size_t len, u32 seed)
* inline, the code being tested is actually in the module, and you can
* recompile and re-test the module without rebooting.
*/
-static bool __init
-test_int_hash(unsigned long long h64, u32 hash_or[2][33])
+static void
+test_int_hash(struct kunit *test, unsigned long long h64, u32 hash_or[2][33])
{
int k;
- u32 h0 = (u32)h64, h1, h2;
+ struct test_hash_params params = { &h64, (u32)h64, 0, 0, hash_or };
/* Test __hash32 */
- hash_or[0][0] |= h1 = __hash_32(h0);
+ hash_or[0][0] |= params.h1 = __hash_32(params.h0);
#ifdef HAVE_ARCH__HASH_32
- hash_or[1][0] |= h2 = __hash_32_generic(h0);
-#if HAVE_ARCH__HASH_32 == 1
- if (h1 != h2) {
- pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
- h0, h1, h2);
- return false;
- }
-#endif
+ test_int__hash_32(test, &params);
#endif
/* Test k = 1..32 bits */
@@ -89,63 +123,53 @@ test_int_hash(unsigned long long h64, u32 hash_or[2][33])
u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */
/* Test hash_32 */
- hash_or[0][k] |= h1 = hash_32(h0, k);
- if (h1 > m) {
- pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
- return false;
- }
-#ifdef HAVE_ARCH_HASH_32
- h2 = hash_32_generic(h0, k);
-#if HAVE_ARCH_HASH_32 == 1
- if (h1 != h2) {
- pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
- " = %#x", h0, k, h1, h2);
- return false;
- }
-#else
- if (h2 > m) {
- pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
- h0, k, h1, m);
- return false;
- }
-#endif
-#endif
+ hash_or[0][k] |= params.h1 = hash_32(params.h0, k);
+ KUNIT_EXPECT_LE_MSG(test, params.h1, m,
+ "hash_32(%#x, %d) = %#x > %#x",
+ params.h0, k, params.h1, m);
+
/* Test hash_64 */
- hash_or[1][k] |= h1 = hash_64(h64, k);
- if (h1 > m) {
- pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
- return false;
- }
+ hash_or[1][k] |= params.h1 = hash_64(h64, k);
+ KUNIT_EXPECT_LE_MSG(test, params.h1, m,
+ "hash_64(%#llx, %d) = %#x > %#x",
+ h64, k, params.h1, m);
#ifdef HAVE_ARCH_HASH_64
- h2 = hash_64_generic(h64, k);
-#if HAVE_ARCH_HASH_64 == 1
- if (h1 != h2) {
- pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
- "= %#x", h64, k, h1, h2);
- return false;
- }
-#else
- if (h2 > m) {
- pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
- h64, k, h1, m);
- return false;
- }
-#endif
+ test_int_hash_64(test, &params, &m, &k);
#endif
}
-
- (void)h2; /* Suppress unused variable warning */
- return true;
}
#define SIZE 256 /* Run time is cubic in SIZE */
-static int __init
-test_hash_init(void)
+static void test_string_or(struct kunit *test)
{
char buf[SIZE+1];
- u32 string_or = 0, hash_or[2][33] = { { 0, } };
- unsigned tests = 0;
+ u32 string_or = 0;
+ int i, j;
+
+ fill_buf(buf, SIZE, 1);
+
+ /* Test every possible non-empty substring in the buffer. */
+ for (j = SIZE; j > 0; --j) {
+ buf[j] = '\0';
+
+ for (i = 0; i <= j; i++) {
+ u32 h0 = full_name_hash(buf+i, buf+i, j-i);
+
+ string_or |= h0;
+ } /* i */
+ } /* j */
+
+ /* The OR of all the hash values should cover all the bits */
+ KUNIT_EXPECT_EQ_MSG(test, string_or, -1u,
+ "OR of all string hash results = %#x != %#x",
+ string_or, -1u);
+}
+
+static void test_hash_or(struct kunit *test)
+{
+ char buf[SIZE+1];
+ u32 hash_or[2][33] = { { 0, } };
unsigned long long h64 = 0;
int i, j;
@@ -160,46 +184,27 @@ test_hash_init(void)
u32 h0 = full_name_hash(buf+i, buf+i, j-i);
/* Check that hashlen_string gets the length right */
- if (hashlen_len(hashlen) != j-i) {
- pr_err("hashlen_string(%d..%d) returned length"
- " %u, expected %d",
- i, j, hashlen_len(hashlen), j-i);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hashlen_len(hashlen), j-i,
+ "hashlen_string(%d..%d) returned length %u, expected %d",
+ i, j, hashlen_len(hashlen), j-i);
/* Check that the hashes match */
- if (hashlen_hash(hashlen) != h0) {
- pr_err("hashlen_string(%d..%d) = %08x != "
- "full_name_hash() = %08x",
- i, j, hashlen_hash(hashlen), h0);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hashlen_hash(hashlen), h0,
+ "hashlen_string(%d..%d) = %08x != full_name_hash() = %08x",
+ i, j, hashlen_hash(hashlen), h0);
- string_or |= h0;
h64 = h64 << 32 | h0; /* For use with hash_64 */
- if (!test_int_hash(h64, hash_or))
- return -EINVAL;
- tests++;
+ test_int_hash(test, h64, hash_or);
} /* i */
} /* j */
- /* The OR of all the hash values should cover all the bits */
- if (~string_or) {
- pr_err("OR of all string hash results = %#x != %#x",
- string_or, -1u);
- return -EINVAL;
- }
- if (~hash_or[0][0]) {
- pr_err("OR of all __hash_32 results = %#x != %#x",
- hash_or[0][0], -1u);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[0][0], -1u,
+ "OR of all __hash_32 results = %#x != %#x",
+ hash_or[0][0], -1u);
#ifdef HAVE_ARCH__HASH_32
#if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */
- if (~hash_or[1][0]) {
- pr_err("OR of all __hash_32_generic results = %#x != %#x",
- hash_or[1][0], -1u);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[1][0], -1u,
+ "OR of all __hash_32_generic results = %#x != %#x",
+ hash_or[1][0], -1u);
#endif
#endif
@@ -207,51 +212,27 @@ test_hash_init(void)
for (i = 1; i <= 32; i++) {
u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */
- if (hash_or[0][i] != m) {
- pr_err("OR of all hash_32(%d) results = %#x "
- "(%#x expected)", i, hash_or[0][i], m);
- return -EINVAL;
- }
- if (hash_or[1][i] != m) {
- pr_err("OR of all hash_64(%d) results = %#x "
- "(%#x expected)", i, hash_or[1][i], m);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[0][i], m,
+ "OR of all hash_32(%d) results = %#x (%#x expected)",
+ i, hash_or[0][i], m);
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[1][i], m,
+ "OR of all hash_64(%d) results = %#x (%#x expected)",
+ i, hash_or[1][i], m);
}
+}
- /* Issue notices about skipped tests. */
-#ifdef HAVE_ARCH__HASH_32
-#if HAVE_ARCH__HASH_32 != 1
- pr_info("__hash_32() is arch-specific; not compared to generic.");
-#endif
-#else
- pr_info("__hash_32() has no arch implementation to test.");
-#endif
-#ifdef HAVE_ARCH_HASH_32
-#if HAVE_ARCH_HASH_32 != 1
- pr_info("hash_32() is arch-specific; not compared to generic.");
-#endif
-#else
- pr_info("hash_32() has no arch implementation to test.");
-#endif
-#ifdef HAVE_ARCH_HASH_64
-#if HAVE_ARCH_HASH_64 != 1
- pr_info("hash_64() is arch-specific; not compared to generic.");
-#endif
-#else
- pr_info("hash_64() has no arch implementation to test.");
-#endif
-
- pr_notice("%u tests passed.", tests);
+static struct kunit_case hash_test_cases[] __refdata = {
+ KUNIT_CASE(test_string_or),
+ KUNIT_CASE(test_hash_or),
+ {}
+};
- return 0;
-}
+static struct kunit_suite hash_test_suite = {
+ .name = "hash",
+ .test_cases = hash_test_cases,
+};
-static void __exit test_hash_exit(void)
-{
-}
-module_init(test_hash_init); /* Does everything */
-module_exit(test_hash_exit); /* Does nothing */
+kunit_test_suite(hash_test_suite);
MODULE_LICENSE("GPL");
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 847cdbefab46..26a5c9007653 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -492,6 +492,7 @@ static void kmalloc_oob_in_memset(struct kunit *test)
ptr = kmalloc(size, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(size);
KUNIT_EXPECT_KASAN_FAIL(test,
memset(ptr, 0, size + KASAN_GRANULE_SIZE));
@@ -515,6 +516,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset((char *)ptr, 0, 64);
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(invalid_size);
KUNIT_EXPECT_KASAN_FAIL(test,
memmove((char *)ptr, (char *)ptr + 4, invalid_size));
@@ -531,6 +533,7 @@ static void kmalloc_memmove_invalid_size(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset((char *)ptr, 0, 64);
+ OPTIMIZER_HIDE_VAR(ptr);
KUNIT_EXPECT_KASAN_FAIL(test,
memmove((char *)ptr, (char *)ptr + 4, invalid_size));
kfree(ptr);
@@ -893,6 +896,7 @@ static void kasan_memchr(struct kunit *test)
ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(size);
KUNIT_EXPECT_KASAN_FAIL(test,
kasan_ptr_result = memchr(ptr, '1', size + 1));
@@ -919,6 +923,7 @@ static void kasan_memcmp(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset(arr, 0, sizeof(arr));
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(size);
KUNIT_EXPECT_KASAN_FAIL(test,
kasan_int_result = memcmp(ptr, arr, size+1));
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index e4f706a404b3..3ca717f11397 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -337,6 +337,7 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
if (num)
kmem_cache_free_bulk(c, num, objects);
}
+ kmem_cache_destroy(c);
*total_failures += fail;
return 1;
}
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 3750323973f4..a5a3d6c27e1f 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -128,26 +128,6 @@ static struct ctl_table test_table[] = {
{ }
};
-static struct ctl_table test_sysctl_table[] = {
- {
- .procname = "test_sysctl",
- .maxlen = 0,
- .mode = 0555,
- .child = test_table,
- },
- { }
-};
-
-static struct ctl_table test_sysctl_root_table[] = {
- {
- .procname = "debug",
- .maxlen = 0,
- .mode = 0555,
- .child = test_sysctl_table,
- },
- { }
-};
-
static struct ctl_table_header *test_sysctl_header;
static int __init test_sysctl_init(void)
@@ -155,7 +135,7 @@ static int __init test_sysctl_init(void)
test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
if (!test_data.bitmap_0001)
return -ENOMEM;
- test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+ test_sysctl_header = register_sysctl("debug/test_sysctl", test_table);
if (!test_sysctl_header) {
kfree(test_data.bitmap_0001);
return -ENOMEM;
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 7e7bbd0f3fd2..2062be1f2e80 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -79,15 +79,6 @@ static void test_ubsan_load_invalid_value(void)
eval2 = eval;
}
-static void test_ubsan_null_ptr_deref(void)
-{
- volatile int *ptr = NULL;
- int val;
-
- UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
- val = *ptr;
-}
-
static void test_ubsan_misaligned_access(void)
{
volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
@@ -98,29 +89,16 @@ static void test_ubsan_misaligned_access(void)
*ptr = val;
}
-static void test_ubsan_object_size_mismatch(void)
-{
- /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
- volatile int val __aligned(8) = 4;
- volatile long long *ptr, val2;
-
- UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
- ptr = (long long *)&val;
- val2 = *ptr;
-}
-
static const test_ubsan_fp test_ubsan_array[] = {
test_ubsan_shift_out_of_bounds,
test_ubsan_out_of_bounds,
test_ubsan_load_invalid_value,
test_ubsan_misaligned_access,
- test_ubsan_object_size_mismatch,
};
/* Excluded because they Oops the module. */
static const test_ubsan_fp skip_ubsan_array[] = {
test_ubsan_divrem_overflow,
- test_ubsan_null_ptr_deref,
};
static int __init test_ubsan_init(void)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 53d6081f9e8b..3b8129dd374c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1241,20 +1241,13 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
struct printf_spec spec, const char *fmt)
{
int nr_bits = max_t(int, spec.field_width, 0);
- /* current bit is 'cur', most recently seen range is [rbot, rtop] */
- int cur, rbot, rtop;
bool first = true;
+ int rbot, rtop;
if (check_pointer(&buf, end, bitmap, spec))
return buf;
- rbot = cur = find_first_bit(bitmap, nr_bits);
- while (cur < nr_bits) {
- rtop = cur;
- cur = find_next_bit(bitmap, nr_bits, cur + 1);
- if (cur < nr_bits && cur <= rtop + 1)
- continue;
-
+ for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) {
if (!first) {
if (buf < end)
*buf = ',';
@@ -1263,15 +1256,12 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
first = false;
buf = number(buf, end, rbot, default_dec_spec);
- if (rbot < rtop) {
- if (buf < end)
- *buf = '-';
- buf++;
-
- buf = number(buf, end, rtop, default_dec_spec);
- }
+ if (rtop == rbot + 1)
+ continue;
- rbot = cur;
+ if (buf < end)
+ *buf = '-';
+ buf = number(++buf, end, rtop - 1, default_dec_spec);
}
return buf;
}
diff --git a/mm/Kconfig b/mm/Kconfig
index 53d7485fc38f..3326ee3903f3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -432,43 +432,20 @@ config NEED_PER_CPU_KM
bool
default y
-config CLEANCACHE
- bool "Enable cleancache driver to cache clean pages if tmem is present"
- help
- Cleancache can be thought of as a page-granularity victim cache
- for clean pages that the kernel's pageframe replacement algorithm
- (PFRA) would like to keep around, but can't since there isn't enough
- memory. So when the PFRA "evicts" a page, it first attempts to use
- cleancache code to put the data contained in that page into
- "transcendent memory", memory that is not directly accessible or
- addressable by the kernel and is of unknown and possibly
- time-varying size. And when a cleancache-enabled
- filesystem wishes to access a page in a file on disk, it first
- checks cleancache to see if it already contains it; if it does,
- the page is copied into the kernel and a disk access is avoided.
- When a transcendent memory driver is available (such as zcache or
- Xen transcendent memory), a significant I/O reduction
- may be achieved. When none is available, all cleancache calls
- are reduced to a single pointer-compare-against-NULL resulting
- in a negligible performance hit.
-
- If unsure, say Y to enable cleancache
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ bool
-config FRONTSWAP
- bool "Enable frontswap to cache swap pages if tmem is present"
- depends on SWAP
- help
- Frontswap is so named because it can be thought of as the opposite
- of a "backing" store for a swap device. The data is stored into
- "transcendent memory", memory that is not directly accessible or
- addressable by the kernel and is of unknown and possibly
- time-varying size. When space in transcendent memory is available,
- a significant swap I/O reduction may be achieved. When none is
- available, all frontswap calls are reduced to a single pointer-
- compare-against-NULL resulting in a negligible performance hit
- and swap data is stored as normal on the matching swap device.
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ bool
+
+config USE_PERCPU_NUMA_NODE_ID
+ bool
+
+config HAVE_SETUP_PER_CPU_AREA
+ bool
- If unsure, say Y to enable frontswap.
+config FRONTSWAP
+ bool
config CMA
bool "Contiguous Memory Allocator"
@@ -533,7 +510,8 @@ config MEM_SOFT_DIRTY
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
- depends on FRONTSWAP && CRYPTO=y
+ depends on SWAP && CRYPTO=y
+ select FRONTSWAP
select ZPOOL
help
A lightweight compressed cache for swap pages. It takes
diff --git a/mm/Makefile b/mm/Makefile
index 588d3113f3b0..70d4309c9ce3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,7 +104,6 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o
obj-$(CONFIG_PAGE_OWNER) += page_owner.o
-obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
obj-$(CONFIG_ZPOOL) += zpool.o
obj-$(CONFIG_ZBUD) += zbud.o
diff --git a/mm/cleancache.c b/mm/cleancache.c
deleted file mode 100644
index db7eee9c0886..000000000000
--- a/mm/cleancache.c
+++ /dev/null
@@ -1,315 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Cleancache frontend
- *
- * This code provides the generic "frontend" layer to call a matching
- * "backend" driver implementation of cleancache. See
- * Documentation/vm/cleancache.rst for more information.
- *
- * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
- * Author: Dan Magenheimer
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/exportfs.h>
-#include <linux/mm.h>
-#include <linux/debugfs.h>
-#include <linux/cleancache.h>
-
-/*
- * cleancache_ops is set by cleancache_register_ops to contain the pointers
- * to the cleancache "backend" implementation functions.
- */
-static const struct cleancache_ops *cleancache_ops __read_mostly;
-
-/*
- * Counters available via /sys/kernel/debug/cleancache (if debugfs is
- * properly configured. These are for information only so are not protected
- * against increment races.
- */
-static u64 cleancache_succ_gets;
-static u64 cleancache_failed_gets;
-static u64 cleancache_puts;
-static u64 cleancache_invalidates;
-
-static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
-{
- switch (sb->cleancache_poolid) {
- case CLEANCACHE_NO_BACKEND:
- __cleancache_init_fs(sb);
- break;
- case CLEANCACHE_NO_BACKEND_SHARED:
- __cleancache_init_shared_fs(sb);
- break;
- }
-}
-
-/*
- * Register operations for cleancache. Returns 0 on success.
- */
-int cleancache_register_ops(const struct cleancache_ops *ops)
-{
- if (cmpxchg(&cleancache_ops, NULL, ops))
- return -EBUSY;
-
- /*
- * A cleancache backend can be built as a module and hence loaded after
- * a cleancache enabled filesystem has called cleancache_init_fs. To
- * handle such a scenario, here we call ->init_fs or ->init_shared_fs
- * for each active super block. To differentiate between local and
- * shared filesystems, we temporarily initialize sb->cleancache_poolid
- * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED
- * respectively in case there is no backend registered at the time
- * cleancache_init_fs or cleancache_init_shared_fs is called.
- *
- * Since filesystems can be mounted concurrently with cleancache
- * backend registration, we have to be careful to guarantee that all
- * cleancache enabled filesystems that has been mounted by the time
- * cleancache_register_ops is called has got and all mounted later will
- * get cleancache_poolid. This is assured by the following statements
- * tied together:
- *
- * a) iterate_supers skips only those super blocks that has started
- * ->kill_sb
- *
- * b) if iterate_supers encounters a super block that has not finished
- * ->mount yet, it waits until it is finished
- *
- * c) cleancache_init_fs is called from ->mount and
- * cleancache_invalidate_fs is called from ->kill_sb
- *
- * d) we call iterate_supers after cleancache_ops has been set
- *
- * From a) it follows that if iterate_supers skips a super block, then
- * either the super block is already dead, in which case we do not need
- * to bother initializing cleancache for it, or it was mounted after we
- * initiated iterate_supers. In the latter case, it must have seen
- * cleancache_ops set according to d) and initialized cleancache from
- * ->mount by itself according to c). This proves that we call
- * ->init_fs at least once for each active super block.
- *
- * From b) and c) it follows that if iterate_supers encounters a super
- * block that has already started ->init_fs, it will wait until ->mount
- * and hence ->init_fs has finished, then check cleancache_poolid, see
- * that it has already been set and therefore do nothing. This proves
- * that we call ->init_fs no more than once for each super block.
- *
- * Combined together, the last two paragraphs prove the function
- * correctness.
- *
- * Note that various cleancache callbacks may proceed before this
- * function is called or even concurrently with it, but since
- * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop
- * until the corresponding ->init_fs has been actually called and
- * cleancache_ops has been set.
- */
- iterate_supers(cleancache_register_ops_sb, NULL);
- return 0;
-}
-EXPORT_SYMBOL(cleancache_register_ops);
-
-/* Called by a cleancache-enabled filesystem at time of mount */
-void __cleancache_init_fs(struct super_block *sb)
-{
- int pool_id = CLEANCACHE_NO_BACKEND;
-
- if (cleancache_ops) {
- pool_id = cleancache_ops->init_fs(PAGE_SIZE);
- if (pool_id < 0)
- pool_id = CLEANCACHE_NO_POOL;
- }
- sb->cleancache_poolid = pool_id;
-}
-EXPORT_SYMBOL(__cleancache_init_fs);
-
-/* Called by a cleancache-enabled clustered filesystem at time of mount */
-void __cleancache_init_shared_fs(struct super_block *sb)
-{
- int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
-
- if (cleancache_ops) {
- pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
- if (pool_id < 0)
- pool_id = CLEANCACHE_NO_POOL;
- }
- sb->cleancache_poolid = pool_id;
-}
-EXPORT_SYMBOL(__cleancache_init_shared_fs);
-
-/*
- * If the filesystem uses exportable filehandles, use the filehandle as
- * the key, else use the inode number.
- */
-static int cleancache_get_key(struct inode *inode,
- struct cleancache_filekey *key)
-{
- int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
- int len = 0, maxlen = CLEANCACHE_KEY_MAX;
- struct super_block *sb = inode->i_sb;
-
- key->u.ino = inode->i_ino;
- if (sb->s_export_op != NULL) {
- fhfn = sb->s_export_op->encode_fh;
- if (fhfn) {
- len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
- if (len <= FILEID_ROOT || len == FILEID_INVALID)
- return -1;
- if (maxlen > CLEANCACHE_KEY_MAX)
- return -1;
- }
- }
- return 0;
-}
-
-/*
- * "Get" data from cleancache associated with the poolid/inode/index
- * that were specified when the data was put to cleanache and, if
- * successful, use it to fill the specified page with data and return 0.
- * The pageframe is unchanged and returns -1 if the get fails.
- * Page must be locked by caller.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-int __cleancache_get_page(struct page *page)
-{
- int ret = -1;
- int pool_id;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops) {
- cleancache_failed_gets++;
- goto out;
- }
-
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- pool_id = page->mapping->host->i_sb->cleancache_poolid;
- if (pool_id < 0)
- goto out;
-
- if (cleancache_get_key(page->mapping->host, &key) < 0)
- goto out;
-
- ret = cleancache_ops->get_page(pool_id, key, page->index, page);
- if (ret == 0)
- cleancache_succ_gets++;
- else
- cleancache_failed_gets++;
-out:
- return ret;
-}
-EXPORT_SYMBOL(__cleancache_get_page);
-
-/*
- * "Put" data from a page to cleancache and associate it with the
- * (previously-obtained per-filesystem) poolid and the page's,
- * inode and page index. Page must be locked. Note that a put_page
- * always "succeeds", though a subsequent get_page may succeed or fail.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-void __cleancache_put_page(struct page *page)
-{
- int pool_id;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops) {
- cleancache_puts++;
- return;
- }
-
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- pool_id = page->mapping->host->i_sb->cleancache_poolid;
- if (pool_id >= 0 &&
- cleancache_get_key(page->mapping->host, &key) >= 0) {
- cleancache_ops->put_page(pool_id, key, page->index, page);
- cleancache_puts++;
- }
-}
-EXPORT_SYMBOL(__cleancache_put_page);
-
-/*
- * Invalidate any data from cleancache associated with the poolid and the
- * page's inode and page index so that a subsequent "get" will fail.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-void __cleancache_invalidate_page(struct address_space *mapping,
- struct page *page)
-{
- /* careful... page->mapping is NULL sometimes when this is called */
- int pool_id = mapping->host->i_sb->cleancache_poolid;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops)
- return;
-
- if (pool_id >= 0) {
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (cleancache_get_key(mapping->host, &key) >= 0) {
- cleancache_ops->invalidate_page(pool_id,
- key, page->index);
- cleancache_invalidates++;
- }
- }
-}
-EXPORT_SYMBOL(__cleancache_invalidate_page);
-
-/*
- * Invalidate all data from cleancache associated with the poolid and the
- * mappings's inode so that all subsequent gets to this poolid/inode
- * will fail.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-void __cleancache_invalidate_inode(struct address_space *mapping)
-{
- int pool_id = mapping->host->i_sb->cleancache_poolid;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops)
- return;
-
- if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
- cleancache_ops->invalidate_inode(pool_id, key);
-}
-EXPORT_SYMBOL(__cleancache_invalidate_inode);
-
-/*
- * Called by any cleancache-enabled filesystem at time of unmount;
- * note that pool_id is surrendered and may be returned by a subsequent
- * cleancache_init_fs or cleancache_init_shared_fs.
- */
-void __cleancache_invalidate_fs(struct super_block *sb)
-{
- int pool_id;
-
- pool_id = sb->cleancache_poolid;
- sb->cleancache_poolid = CLEANCACHE_NO_POOL;
-
- if (cleancache_ops && pool_id >= 0)
- cleancache_ops->invalidate_fs(pool_id);
-}
-EXPORT_SYMBOL(__cleancache_invalidate_fs);
-
-static int __init init_cleancache(void)
-{
-#ifdef CONFIG_DEBUG_FS
- struct dentry *root = debugfs_create_dir("cleancache", NULL);
-
- debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets);
- debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets);
- debugfs_create_u64("puts", 0444, root, &cleancache_puts);
- debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates);
-#endif
- return 0;
-}
-module_init(init_cleancache)
diff --git a/mm/filemap.c b/mm/filemap.c
index 2fd9b2f24025..ad8c39d90bf9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/file.h>
@@ -34,13 +35,13 @@
#include <linux/cpuset.h>
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
-#include <linux/cleancache.h>
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
#include <linux/delayacct.h>
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
+#include <linux/migrate.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -149,16 +150,6 @@ static void filemap_unaccount_folio(struct address_space *mapping,
{
long nr;
- /*
- * if we're uptodate, flush out into the cleancache, otherwise
- * invalidate any existing cleancache entries. We can't leave
- * stale data around in the cleancache once our page is gone
- */
- if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio))
- cleancache_put_page(&folio->page);
- else
- cleancache_invalidate_page(mapping, &folio->page);
-
VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
int mapcount;
@@ -231,17 +222,15 @@ void __filemap_remove_folio(struct folio *folio, void *shadow)
void filemap_free_folio(struct address_space *mapping, struct folio *folio)
{
void (*freepage)(struct page *);
+ int refs = 1;
freepage = mapping->a_ops->freepage;
if (freepage)
freepage(&folio->page);
- if (folio_test_large(folio) && !folio_test_hugetlb(folio)) {
- folio_ref_sub(folio, folio_nr_pages(folio));
- VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio);
- } else {
- folio_put(folio);
- }
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ refs = folio_nr_pages(folio);
+ folio_put_refs(folio, refs);
}
/**
@@ -1388,6 +1377,95 @@ repeat:
return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}
+#ifdef CONFIG_MIGRATION
+/**
+ * migration_entry_wait_on_locked - Wait for a migration entry to be removed
+ * @entry: migration swap entry.
+ * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required
+ * for pte entries, pass NULL for pmd entries.
+ * @ptl: already locked ptl. This function will drop the lock.
+ *
+ * Wait for a migration entry referencing the given page to be removed. This is
+ * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except
+ * this can be called without taking a reference on the page. Instead this
+ * should be called while holding the ptl for the migration entry referencing
+ * the page.
+ *
+ * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock().
+ *
+ * This follows the same logic as folio_wait_bit_common() so see the comments
+ * there.
+ */
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+ spinlock_t *ptl)
+{
+ struct wait_page_queue wait_page;
+ wait_queue_entry_t *wait = &wait_page.wait;
+ bool thrashing = false;
+ bool delayacct = false;
+ unsigned long pflags;
+ wait_queue_head_t *q;
+ struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
+
+ q = folio_waitqueue(folio);
+ if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+ if (!folio_test_swapbacked(folio)) {
+ delayacct_thrashing_start();
+ delayacct = true;
+ }
+ psi_memstall_enter(&pflags);
+ thrashing = true;
+ }
+
+ init_wait(wait);
+ wait->func = wake_page_function;
+ wait_page.folio = folio;
+ wait_page.bit_nr = PG_locked;
+ wait->flags = 0;
+
+ spin_lock_irq(&q->lock);
+ folio_set_waiters(folio);
+ if (!folio_trylock_flag(folio, PG_locked, wait))
+ __add_wait_queue_entry_tail(q, wait);
+ spin_unlock_irq(&q->lock);
+
+ /*
+ * If a migration entry exists for the page the migration path must hold
+ * a valid reference to the page, and it must take the ptl to remove the
+ * migration entry. So the page is valid until the ptl is dropped.
+ */
+ if (ptep)
+ pte_unmap_unlock(ptep, ptl);
+ else
+ spin_unlock(ptl);
+
+ for (;;) {
+ unsigned int flags;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /* Loop until we've been woken or interrupted */
+ flags = smp_load_acquire(&wait->flags);
+ if (!(flags & WQ_FLAG_WOKEN)) {
+ if (signal_pending_state(TASK_UNINTERRUPTIBLE, current))
+ break;
+
+ io_schedule();
+ continue;
+ }
+ break;
+ }
+
+ finish_wait(q, wait);
+
+ if (thrashing) {
+ if (delayacct)
+ delayacct_thrashing_end();
+ psi_memstall_leave(&pflags);
+ }
+}
+#endif
+
void folio_wait_bit(struct folio *folio, int bit_nr)
{
folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 6bed12260dea..6f69b044a8cc 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -27,27 +27,7 @@ DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key);
* may be registered, but implementations can never deregister. This
* is a simple singly-linked list of all registered implementations.
*/
-static struct frontswap_ops *frontswap_ops __read_mostly;
-
-#define for_each_frontswap_ops(ops) \
- for ((ops) = frontswap_ops; (ops); (ops) = (ops)->next)
-
-/*
- * If enabled, frontswap_store will return failure even on success. As
- * a result, the swap subsystem will always write the page to swap, in
- * effect converting frontswap into a writethrough cache. In this mode,
- * there is no direct reduction in swap writes, but a frontswap backend
- * can unilaterally "reclaim" any pages in use with no data loss, thus
- * providing increases control over maximum memory usage due to frontswap.
- */
-static bool frontswap_writethrough_enabled __read_mostly;
-
-/*
- * If enabled, the underlying tmem implementation is capable of doing
- * exclusive gets, so frontswap_load, on a successful tmem_get must
- * mark the page as no longer in frontswap AND mark it dirty.
- */
-static bool frontswap_tmem_exclusive_gets_enabled __read_mostly;
+static const struct frontswap_ops *frontswap_ops __read_mostly;
#ifdef CONFIG_DEBUG_FS
/*
@@ -114,87 +94,22 @@ static inline void inc_frontswap_invalidates(void) { }
/*
* Register operations for frontswap
*/
-void frontswap_register_ops(struct frontswap_ops *ops)
+int frontswap_register_ops(const struct frontswap_ops *ops)
{
- DECLARE_BITMAP(a, MAX_SWAPFILES);
- DECLARE_BITMAP(b, MAX_SWAPFILES);
- struct swap_info_struct *si;
- unsigned int i;
-
- bitmap_zero(a, MAX_SWAPFILES);
- bitmap_zero(b, MAX_SWAPFILES);
-
- spin_lock(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list) {
- if (!WARN_ON(!si->frontswap_map))
- __set_bit(si->type, a);
- }
- spin_unlock(&swap_lock);
-
- /* the new ops needs to know the currently active swap devices */
- for_each_set_bit(i, a, MAX_SWAPFILES)
- ops->init(i);
-
- /*
- * Setting frontswap_ops must happen after the ops->init() calls
- * above; cmpxchg implies smp_mb() which will ensure the init is
- * complete at this point.
- */
- do {
- ops->next = frontswap_ops;
- } while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
+ if (frontswap_ops)
+ return -EINVAL;
+ frontswap_ops = ops;
static_branch_inc(&frontswap_enabled_key);
-
- spin_lock(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list) {
- if (si->frontswap_map)
- __set_bit(si->type, b);
- }
- spin_unlock(&swap_lock);
-
- /*
- * On the very unlikely chance that a swap device was added or
- * removed between setting the "a" list bits and the ops init
- * calls, we re-check and do init or invalidate for any changed
- * bits.
- */
- if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) {
- for (i = 0; i < MAX_SWAPFILES; i++) {
- if (!test_bit(i, a) && test_bit(i, b))
- ops->init(i);
- else if (test_bit(i, a) && !test_bit(i, b))
- ops->invalidate_area(i);
- }
- }
-}
-EXPORT_SYMBOL(frontswap_register_ops);
-
-/*
- * Enable/disable frontswap writethrough (see above).
- */
-void frontswap_writethrough(bool enable)
-{
- frontswap_writethrough_enabled = enable;
-}
-EXPORT_SYMBOL(frontswap_writethrough);
-
-/*
- * Enable/disable frontswap exclusive gets (see above).
- */
-void frontswap_tmem_exclusive_gets(bool enable)
-{
- frontswap_tmem_exclusive_gets_enabled = enable;
+ return 0;
}
-EXPORT_SYMBOL(frontswap_tmem_exclusive_gets);
/*
* Called when a swap device is swapon'd.
*/
-void __frontswap_init(unsigned type, unsigned long *map)
+void frontswap_init(unsigned type, unsigned long *map)
{
struct swap_info_struct *sis = swap_info[type];
- struct frontswap_ops *ops;
VM_BUG_ON(sis == NULL);
@@ -210,20 +125,16 @@ void __frontswap_init(unsigned type, unsigned long *map)
* p->frontswap set to something valid to work properly.
*/
frontswap_map_set(sis, map);
-
- for_each_frontswap_ops(ops)
- ops->init(type);
+ frontswap_ops->init(type);
}
-EXPORT_SYMBOL(__frontswap_init);
-bool __frontswap_test(struct swap_info_struct *sis,
+static bool __frontswap_test(struct swap_info_struct *sis,
pgoff_t offset)
{
if (sis->frontswap_map)
return test_bit(offset, sis->frontswap_map);
return false;
}
-EXPORT_SYMBOL(__frontswap_test);
static inline void __frontswap_set(struct swap_info_struct *sis,
pgoff_t offset)
@@ -253,7 +164,6 @@ int __frontswap_store(struct page *page)
int type = swp_type(entry);
struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry);
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(!PageLocked(page));
@@ -267,28 +177,19 @@ int __frontswap_store(struct page *page)
*/
if (__frontswap_test(sis, offset)) {
__frontswap_clear(sis, offset);
- for_each_frontswap_ops(ops)
- ops->invalidate_page(type, offset);
+ frontswap_ops->invalidate_page(type, offset);
}
- /* Try to store in each implementation, until one succeeds. */
- for_each_frontswap_ops(ops) {
- ret = ops->store(type, offset, page);
- if (!ret) /* successful store */
- break;
- }
+ ret = frontswap_ops->store(type, offset, page);
if (ret == 0) {
__frontswap_set(sis, offset);
inc_frontswap_succ_stores();
} else {
inc_frontswap_failed_stores();
}
- if (frontswap_writethrough_enabled)
- /* report failure so swap also writes to swap device */
- ret = -1;
+
return ret;
}
-EXPORT_SYMBOL(__frontswap_store);
/*
* "Get" data from frontswap associated with swaptype and offset that were
@@ -302,7 +203,6 @@ int __frontswap_load(struct page *page)
int type = swp_type(entry);
struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry);
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(!PageLocked(page));
@@ -312,21 +212,11 @@ int __frontswap_load(struct page *page)
return -1;
/* Try loading from each implementation, until one succeeds. */
- for_each_frontswap_ops(ops) {
- ret = ops->load(type, offset, page);
- if (!ret) /* successful load */
- break;
- }
- if (ret == 0) {
+ ret = frontswap_ops->load(type, offset, page);
+ if (ret == 0)
inc_frontswap_loads();
- if (frontswap_tmem_exclusive_gets_enabled) {
- SetPageDirty(page);
- __frontswap_clear(sis, offset);
- }
- }
return ret;
}
-EXPORT_SYMBOL(__frontswap_load);
/*
* Invalidate any data from frontswap associated with the specified swaptype
@@ -335,7 +225,6 @@ EXPORT_SYMBOL(__frontswap_load);
void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
{
struct swap_info_struct *sis = swap_info[type];
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(sis == NULL);
@@ -343,12 +232,10 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
if (!__frontswap_test(sis, offset))
return;
- for_each_frontswap_ops(ops)
- ops->invalidate_page(type, offset);
+ frontswap_ops->invalidate_page(type, offset);
__frontswap_clear(sis, offset);
inc_frontswap_invalidates();
}
-EXPORT_SYMBOL(__frontswap_invalidate_page);
/*
* Invalidate all data from frontswap associated with all offsets for the
@@ -357,7 +244,6 @@ EXPORT_SYMBOL(__frontswap_invalidate_page);
void __frontswap_invalidate_area(unsigned type)
{
struct swap_info_struct *sis = swap_info[type];
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(sis == NULL);
@@ -365,123 +251,10 @@ void __frontswap_invalidate_area(unsigned type)
if (sis->frontswap_map == NULL)
return;
- for_each_frontswap_ops(ops)
- ops->invalidate_area(type);
+ frontswap_ops->invalidate_area(type);
atomic_set(&sis->frontswap_pages, 0);
bitmap_zero(sis->frontswap_map, sis->max);
}
-EXPORT_SYMBOL(__frontswap_invalidate_area);
-
-static unsigned long __frontswap_curr_pages(void)
-{
- unsigned long totalpages = 0;
- struct swap_info_struct *si = NULL;
-
- assert_spin_locked(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list)
- totalpages += atomic_read(&si->frontswap_pages);
- return totalpages;
-}
-
-static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
- int *swapid)
-{
- int ret = -EINVAL;
- struct swap_info_struct *si = NULL;
- int si_frontswap_pages;
- unsigned long total_pages_to_unuse = total;
- unsigned long pages = 0, pages_to_unuse = 0;
-
- assert_spin_locked(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list) {
- si_frontswap_pages = atomic_read(&si->frontswap_pages);
- if (total_pages_to_unuse < si_frontswap_pages) {
- pages = pages_to_unuse = total_pages_to_unuse;
- } else {
- pages = si_frontswap_pages;
- pages_to_unuse = 0; /* unuse all */
- }
- /* ensure there is enough RAM to fetch pages from frontswap */
- if (security_vm_enough_memory_mm(current->mm, pages)) {
- ret = -ENOMEM;
- continue;
- }
- vm_unacct_memory(pages);
- *unused = pages_to_unuse;
- *swapid = si->type;
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-/*
- * Used to check if it's necessary and feasible to unuse pages.
- * Return 1 when nothing to do, 0 when need to shrink pages,
- * error code when there is an error.
- */
-static int __frontswap_shrink(unsigned long target_pages,
- unsigned long *pages_to_unuse,
- int *type)
-{
- unsigned long total_pages = 0, total_pages_to_unuse;
-
- assert_spin_locked(&swap_lock);
-
- total_pages = __frontswap_curr_pages();
- if (total_pages <= target_pages) {
- /* Nothing to do */
- *pages_to_unuse = 0;
- return 1;
- }
- total_pages_to_unuse = total_pages - target_pages;
- return __frontswap_unuse_pages(total_pages_to_unuse, pages_to_unuse, type);
-}
-
-/*
- * Frontswap, like a true swap device, may unnecessarily retain pages
- * under certain circumstances; "shrink" frontswap is essentially a
- * "partial swapoff" and works by calling try_to_unuse to attempt to
- * unuse enough frontswap pages to attempt to -- subject to memory
- * constraints -- reduce the number of pages in frontswap to the
- * number given in the parameter target_pages.
- */
-void frontswap_shrink(unsigned long target_pages)
-{
- unsigned long pages_to_unuse = 0;
- int type, ret;
-
- /*
- * we don't want to hold swap_lock while doing a very
- * lengthy try_to_unuse, but swap_list may change
- * so restart scan from swap_active_head each time
- */
- spin_lock(&swap_lock);
- ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
- spin_unlock(&swap_lock);
- if (ret == 0)
- try_to_unuse(type, true, pages_to_unuse);
- return;
-}
-EXPORT_SYMBOL(frontswap_shrink);
-
-/*
- * Count and return the number of frontswap pages across all
- * swap devices. This is exported so that backend drivers can
- * determine current usage without reading debugfs.
- */
-unsigned long frontswap_curr_pages(void)
-{
- unsigned long totalpages = 0;
-
- spin_lock(&swap_lock);
- totalpages = __frontswap_curr_pages();
- spin_unlock(&swap_lock);
-
- return totalpages;
-}
-EXPORT_SYMBOL(frontswap_curr_pages);
static int __init init_frontswap(void)
{
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 7c06db78a76c..92196562687b 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,7 +36,6 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc)
unsigned int nr_entries;
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
- nr_entries = filter_irq_stacks(entries, nr_entries);
return __stack_depot_save(entries, nr_entries, flags, can_alloc);
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 14ae5c18e776..97a9ed8f87a9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1596,6 +1596,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
}
/*
+ * Pages instantiated by device-dax (not filesystem-dax)
+ * may be compound pages.
+ */
+ page = compound_head(page);
+
+ /*
* Prevent the inode from being freed while we are interrogating
* the address_space, typically this would be handled by
* lock_page(), but dax pages do not use the page lock. This
diff --git a/mm/memory.c b/mm/memory.c
index f306e698a1e3..c125c4969913 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3529,7 +3529,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!si))
goto out;
- delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry, vma, vmf->address);
swapcache = page;
@@ -3577,7 +3576,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->address, &vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto unlock;
}
@@ -3591,13 +3589,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* owner processes (which may be unknown at hwpoison time)
*/
ret = VM_FAULT_HWPOISON;
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto out_release;
}
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
if (!locked) {
ret |= VM_FAULT_RETRY;
goto out_release;
diff --git a/mm/migrate.c b/mm/migrate.c
index 18ce840914f0..c7da064b4781 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -291,7 +291,6 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
{
pte_t pte;
swp_entry_t entry;
- struct folio *folio;
spin_lock(ptl);
pte = *ptep;
@@ -302,17 +301,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
if (!is_migration_entry(entry))
goto out;
- folio = page_folio(pfn_swap_entry_to_page(entry));
-
- /*
- * Once page cache replacement of page migration started, page_count
- * is zero; but we must not call folio_put_wait_locked() without
- * a ref. Use folio_try_get(), and just fault again if it fails.
- */
- if (!folio_try_get(folio))
- goto out;
- pte_unmap_unlock(ptep, ptl);
- folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ migration_entry_wait_on_locked(entry, ptep, ptl);
return;
out:
pte_unmap_unlock(ptep, ptl);
@@ -337,16 +326,11 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
{
spinlock_t *ptl;
- struct folio *folio;
ptl = pmd_lock(mm, pmd);
if (!is_pmd_migration_entry(*pmd))
goto unlock;
- folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)));
- if (!folio_try_get(folio))
- goto unlock;
- spin_unlock(ptl);
- folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl);
return;
unlock:
spin_unlock(ptl);
@@ -2431,22 +2415,8 @@ static bool migrate_vma_check_page(struct page *page)
return false;
/* Page from ZONE_DEVICE have one extra reference */
- if (is_zone_device_page(page)) {
- /*
- * Private page can never be pin as they have no valid pte and
- * GUP will fail for those. Yet if there is a pending migration
- * a thread might try to wait on the pte migration entry and
- * will bump the page reference count. Sadly there is no way to
- * differentiate a regular pin from migration wait. Hence to
- * avoid 2 racing thread trying to migrate back to CPU to enter
- * infinite loop (one stopping migration because the other is
- * waiting on pte migration entry). We always return true here.
- *
- * FIXME proper solution is to rework migration_entry_wait() so
- * it does not need to take a reference on page.
- */
- return is_device_private_page(page);
- }
+ if (is_zone_device_page(page))
+ extra++;
/* For file back page */
if (page_mapping(page))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d4205e5e41d1..3589febc6d31 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -74,6 +74,7 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
+#include <linux/delayacct.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -4365,6 +4366,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
psi_memstall_enter(&pflags);
+ delayacct_compact_start();
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
@@ -4372,6 +4374,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
+ delayacct_compact_end();
if (*compact_result == COMPACT_SKIPPED)
return NULL;
diff --git a/mm/page_io.c b/mm/page_io.c
index 9725c7e1eeea..0bf8e40f4e57 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -25,6 +25,7 @@
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
+#include <linux/delayacct.h>
void end_swap_bio_write(struct bio *bio)
{
@@ -370,6 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
* significant part of overall IO time.
*/
psi_memstall_enter(&pflags);
+ delayacct_swapin_start();
if (frontswap_load(page) == 0) {
SetPageUptodate(page);
@@ -432,6 +434,7 @@ int swap_readpage(struct page *page, bool synchronous)
out:
psi_memstall_leave(&pflags);
+ delayacct_swapin_end();
return ret;
}
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 5eea061bb1e5..99e360df9465 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -80,6 +80,8 @@ static __init void init_page_owner(void)
if (!page_owner_enabled)
return;
+ stack_depot_init();
+
register_dummy_stack();
register_failure_stack();
register_early_stack();
diff --git a/mm/percpu.c b/mm/percpu.c
index 4199a0604c32..ea28db283044 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -779,7 +779,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
{
struct pcpu_block_md *block = chunk->md_blocks + index;
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
- unsigned int rs, re, start; /* region start, region end */
+ unsigned int start, end; /* region start, region end */
/* promote scan_hint to contig_hint */
if (block->scan_hint) {
@@ -795,9 +795,8 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
block->right_free = 0;
/* iterate over free areas and update the contig hints */
- bitmap_for_each_clear_region(alloc_map, rs, re, start,
- PCPU_BITMAP_BLOCK_BITS)
- pcpu_block_update(block, rs, re);
+ for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS)
+ pcpu_block_update(block, start, end);
}
/**
@@ -1070,17 +1069,18 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
int *next_off)
{
- unsigned int page_start, page_end, rs, re;
+ unsigned int start, end;
- page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
- page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
+ start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
+ end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
- rs = page_start;
- bitmap_next_clear_region(chunk->populated, &rs, &re, page_end);
- if (rs >= page_end)
+ start = find_next_zero_bit(chunk->populated, end, start);
+ if (start >= end)
return true;
- *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
+ end = find_next_bit(chunk->populated, end, start + 1);
+
+ *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
return false;
}
@@ -1851,13 +1851,12 @@ area_found:
/* populate if not all pages are already there */
if (!is_atomic) {
- unsigned int page_start, page_end, rs, re;
+ unsigned int page_end, rs, re;
- page_start = PFN_DOWN(off);
+ rs = PFN_DOWN(off);
page_end = PFN_UP(off + size);
- bitmap_for_each_clear_region(chunk->populated, rs, re,
- page_start, page_end) {
+ for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) {
WARN_ON(chunk->immutable);
ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
@@ -2013,8 +2012,7 @@ static void pcpu_balance_free(bool empty_only)
list_for_each_entry_safe(chunk, next, &to_free, list) {
unsigned int rs, re;
- bitmap_for_each_set_region(chunk->populated, rs, re, 0,
- chunk->nr_pages) {
+ for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
pcpu_depopulate_chunk(chunk, rs, re);
spin_lock_irq(&pcpu_lock);
pcpu_chunk_depopulated(chunk, rs, re);
@@ -2084,8 +2082,7 @@ retry_pop:
continue;
/* @chunk can't go away while pcpu_alloc_mutex is held */
- bitmap_for_each_clear_region(chunk->populated, rs, re, 0,
- chunk->nr_pages) {
+ for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
int nr = min_t(int, re - rs, nr_to_pop);
spin_unlock_irq(&pcpu_lock);
@@ -2992,6 +2989,42 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
return ai;
}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
+{
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NUMA
+ int node = NUMA_NO_NODE;
+ void *ptr;
+
+ if (cpu_to_nd_fn)
+ node = cpu_to_nd_fn(cpu);
+
+ if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
+ ptr = memblock_alloc_from(size, align, goal);
+ pr_info("cpu %d has no node %d or node-local memory\n",
+ cpu, node);
+ pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n",
+ cpu, size, (u64)__pa(ptr));
+ } else {
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ node);
+
+ pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n",
+ cpu, size, node, (u64)__pa(ptr));
+ }
+ return ptr;
+#else
+ return memblock_alloc_from(size, align, goal);
+#endif
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ memblock_free(ptr, size);
+}
#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
#if defined(BUILD_EMBED_FIRST_CHUNK)
@@ -3001,14 +3034,13 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size: minimum free size for dynamic allocation in bytes
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
- * @alloc_fn: function to allocate percpu page
- * @free_fn: function to free percpu page
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
*
* This is a helper to ease setting up embedded first percpu chunk and
* can be called where pcpu_setup_first_chunk() is expected.
*
* If this function is used to setup the first chunk, it is allocated
- * by calling @alloc_fn and used as-is without being mapped into
+ * by calling pcpu_fc_alloc and used as-is without being mapped into
* vmalloc area. Allocations are always whole multiples of @atom_size
* aligned to @atom_size.
*
@@ -3022,7 +3054,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size specifies the minimum dynamic area size.
*
* If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned using @free_fn.
+ * size, the leftover is returned using pcpu_fc_free.
*
* RETURNS:
* 0 on success, -errno on failure.
@@ -3030,8 +3062,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn)
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
void *base = (void *)ULONG_MAX;
void **areas = NULL;
@@ -3066,7 +3097,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
BUG_ON(cpu == NR_CPUS);
/* allocate space for the whole group */
- ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+ ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
if (!ptr) {
rc = -ENOMEM;
goto out_free_areas;
@@ -3105,12 +3136,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
if (gi->cpu_map[i] == NR_CPUS) {
/* unused unit, free whole */
- free_fn(ptr, ai->unit_size);
+ pcpu_fc_free(ptr, ai->unit_size);
continue;
}
/* copy and return the unused part */
memcpy(ptr, __per_cpu_load, ai->static_size);
- free_fn(ptr + size_sum, ai->unit_size - size_sum);
+ pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
}
}
@@ -3129,7 +3160,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
out_free_areas:
for (group = 0; group < ai->nr_groups; group++)
if (areas[group])
- free_fn(areas[group],
+ pcpu_fc_free(areas[group],
ai->groups[group].nr_units * ai->unit_size);
out_free:
pcpu_free_alloc_info(ai);
@@ -3140,12 +3171,79 @@ out_free:
#endif /* BUILD_EMBED_FIRST_CHUNK */
#ifdef BUILD_PAGE_FIRST_CHUNK
+#include <asm/pgalloc.h>
+
+#ifndef P4D_TABLE_SIZE
+#define P4D_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PUD_TABLE_SIZE
+#define PUD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PMD_TABLE_SIZE
+#define PMD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PTE_TABLE_SIZE
+#define PTE_TABLE_SIZE PAGE_SIZE
+#endif
+void __init __weak pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd)) {
+ p4d_t *new;
+
+ new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pgd_populate(&init_mm, pgd, new);
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate memory\n", __func__);
+}
+
/**
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
* @reserved_size: the size of reserved percpu area in bytes
- * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: function to free percpu page, always called with PAGE_SIZE
- * @populate_pte_fn: function to populate pte
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
*
* This is a helper to ease setting up page-remapped first percpu
* chunk and can be called where pcpu_setup_first_chunk() is expected.
@@ -3156,10 +3254,7 @@ out_free:
* RETURNS:
* 0 on success, -errno on failure.
*/
-int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
static struct vm_struct vm;
struct pcpu_alloc_info *ai;
@@ -3201,7 +3296,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
for (i = 0; i < unit_pages; i++) {
void *ptr;
- ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
+ ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
if (!ptr) {
pr_warn("failed to allocate %s page for cpu%u\n",
psize_str, cpu);
@@ -3223,7 +3318,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
(unsigned long)vm.addr + unit * ai->unit_size;
for (i = 0; i < unit_pages; i++)
- populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+ pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
/* pte already populated, the following shouldn't fail */
rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
@@ -3253,7 +3348,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
enomem:
while (--j >= 0)
- free_fn(page_address(pages[j]), PAGE_SIZE);
+ pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
rc = -ENOMEM;
out_free_ar:
memblock_free(pages, pages_size);
@@ -3278,17 +3373,6 @@ out_free_ar:
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
-{
- return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -3299,9 +3383,8 @@ void __init setup_per_cpu_areas(void)
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
- pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
+ PAGE_SIZE, NULL, NULL);
if (rc < 0)
panic("Failed to initialize percpu areas.");
diff --git a/mm/shmem.c b/mm/shmem.c
index 66909efd0a1b..a09b29ec2b45 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -36,7 +36,6 @@
#include <linux/uio.h>
#include <linux/khugepaged.h>
#include <linux/hugetlb.h>
-#include <linux/frontswap.h>
#include <linux/fs_parser.h>
#include <linux/swapfile.h>
@@ -1152,7 +1151,7 @@ static void shmem_evict_inode(struct inode *inode)
static int shmem_find_swap_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices,
- unsigned int type, bool frontswap)
+ unsigned int type)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
@@ -1173,9 +1172,6 @@ static int shmem_find_swap_entries(struct address_space *mapping,
entry = radix_to_swp_entry(page);
if (swp_type(entry) != type)
continue;
- if (frontswap &&
- !frontswap_test(swap_info[type], swp_offset(entry)))
- continue;
indices[ret] = xas.xa_index;
entries[ret] = page;
@@ -1228,26 +1224,20 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
/*
* If swap found in inode, free it and move page from swapcache to filecache.
*/
-static int shmem_unuse_inode(struct inode *inode, unsigned int type,
- bool frontswap, unsigned long *fs_pages_to_unuse)
+static int shmem_unuse_inode(struct inode *inode, unsigned int type)
{
struct address_space *mapping = inode->i_mapping;
pgoff_t start = 0;
struct pagevec pvec;
pgoff_t indices[PAGEVEC_SIZE];
- bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0);
int ret = 0;
pagevec_init(&pvec);
do {
unsigned int nr_entries = PAGEVEC_SIZE;
- if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE)
- nr_entries = *fs_pages_to_unuse;
-
pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
- pvec.pages, indices,
- type, frontswap);
+ pvec.pages, indices, type);
if (pvec.nr == 0) {
ret = 0;
break;
@@ -1257,14 +1247,6 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
if (ret < 0)
break;
- if (frontswap_partial) {
- *fs_pages_to_unuse -= ret;
- if (*fs_pages_to_unuse == 0) {
- ret = FRONTSWAP_PAGES_UNUSED;
- break;
- }
- }
-
start = indices[pvec.nr - 1];
} while (true);
@@ -1276,8 +1258,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
* device 'type' back into memory, so the swap device can be
* unused.
*/
-int shmem_unuse(unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+int shmem_unuse(unsigned int type)
{
struct shmem_inode_info *info, *next;
int error = 0;
@@ -1300,8 +1281,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
atomic_inc(&info->stop_eviction);
mutex_unlock(&shmem_swaplist_mutex);
- error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
- fs_pages_to_unuse);
+ error = shmem_unuse_inode(&info->vfs_inode, type);
cond_resched();
mutex_lock(&shmem_swaplist_mutex);
@@ -4015,8 +3995,7 @@ int __init shmem_init(void)
return 0;
}
-int shmem_unuse(unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+int shmem_unuse(unsigned int type)
{
return 0;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index caa9f81a0d15..bf0df7aa7158 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -49,7 +49,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
-DEFINE_SPINLOCK(swap_lock);
+static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
atomic_long_t nr_swap_pages;
/*
@@ -71,7 +71,7 @@ static const char Unused_offset[] = "Unused swap offset entry ";
* all active swap_info_structs
* protected with swap_lock, and ordered by priority.
*/
-PLIST_HEAD(swap_active_head);
+static PLIST_HEAD(swap_active_head);
/*
* all available (active, not full) swap_info_structs
@@ -1923,8 +1923,7 @@ out:
static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
struct page *page;
swp_entry_t entry;
@@ -1945,9 +1944,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
continue;
offset = swp_offset(entry);
- if (frontswap && !frontswap_test(si, offset))
- continue;
-
pte_unmap(pte);
swap_map = &si->swap_map[offset];
page = lookup_swap_cache(entry, vma, addr);
@@ -1979,11 +1975,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
try_to_free_swap(page);
unlock_page(page);
put_page(page);
-
- if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) {
- ret = FRONTSWAP_PAGES_UNUSED;
- goto out;
- }
try_next:
pte = pte_offset_map(pmd, addr);
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1996,8 +1987,7 @@ out:
static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
pmd_t *pmd;
unsigned long next;
@@ -2009,8 +1999,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
next = pmd_addr_end(addr, end);
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
continue;
- ret = unuse_pte_range(vma, pmd, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_pte_range(vma, pmd, addr, next, type);
if (ret)
return ret;
} while (pmd++, addr = next, addr != end);
@@ -2019,8 +2008,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
pud_t *pud;
unsigned long next;
@@ -2031,8 +2019,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- ret = unuse_pmd_range(vma, pud, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_pmd_range(vma, pud, addr, next, type);
if (ret)
return ret;
} while (pud++, addr = next, addr != end);
@@ -2041,8 +2028,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
p4d_t *p4d;
unsigned long next;
@@ -2053,16 +2039,14 @@ static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
- ret = unuse_pud_range(vma, p4d, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_pud_range(vma, p4d, addr, next, type);
if (ret)
return ret;
} while (p4d++, addr = next, addr != end);
return 0;
}
-static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
- bool frontswap, unsigned long *fs_pages_to_unuse)
+static int unuse_vma(struct vm_area_struct *vma, unsigned int type)
{
pgd_t *pgd;
unsigned long addr, end, next;
@@ -2076,16 +2060,14 @@ static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- ret = unuse_p4d_range(vma, pgd, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_p4d_range(vma, pgd, addr, next, type);
if (ret)
return ret;
} while (pgd++, addr = next, addr != end);
return 0;
}
-static int unuse_mm(struct mm_struct *mm, unsigned int type,
- bool frontswap, unsigned long *fs_pages_to_unuse)
+static int unuse_mm(struct mm_struct *mm, unsigned int type)
{
struct vm_area_struct *vma;
int ret = 0;
@@ -2093,8 +2075,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type,
mmap_read_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->anon_vma) {
- ret = unuse_vma(vma, type, frontswap,
- fs_pages_to_unuse);
+ ret = unuse_vma(vma, type);
if (ret)
break;
}
@@ -2110,7 +2091,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type,
* if there are no inuse entries after prev till end of the map.
*/
static unsigned int find_next_to_unuse(struct swap_info_struct *si,
- unsigned int prev, bool frontswap)
+ unsigned int prev)
{
unsigned int i;
unsigned char count;
@@ -2124,8 +2105,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
for (i = prev + 1; i < si->max; i++) {
count = READ_ONCE(si->swap_map[i]);
if (count && swap_count(count) != SWAP_MAP_BAD)
- if (!frontswap || frontswap_test(si, i))
- break;
+ break;
if ((i % LATENCY_LIMIT) == 0)
cond_resched();
}
@@ -2136,12 +2116,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
return i;
}
-/*
- * If the boolean frontswap is true, only unuse pages_to_unuse pages;
- * pages_to_unuse==0 means all pages; ignored if frontswap is false
- */
-int try_to_unuse(unsigned int type, bool frontswap,
- unsigned long pages_to_unuse)
+static int try_to_unuse(unsigned int type)
{
struct mm_struct *prev_mm;
struct mm_struct *mm;
@@ -2155,13 +2130,10 @@ int try_to_unuse(unsigned int type, bool frontswap,
if (!READ_ONCE(si->inuse_pages))
return 0;
- if (!frontswap)
- pages_to_unuse = 0;
-
retry:
- retval = shmem_unuse(type, frontswap, &pages_to_unuse);
+ retval = shmem_unuse(type);
if (retval)
- goto out;
+ return retval;
prev_mm = &init_mm;
mmget(prev_mm);
@@ -2178,11 +2150,10 @@ retry:
spin_unlock(&mmlist_lock);
mmput(prev_mm);
prev_mm = mm;
- retval = unuse_mm(mm, type, frontswap, &pages_to_unuse);
-
+ retval = unuse_mm(mm, type);
if (retval) {
mmput(prev_mm);
- goto out;
+ return retval;
}
/*
@@ -2199,7 +2170,7 @@ retry:
i = 0;
while (READ_ONCE(si->inuse_pages) &&
!signal_pending(current) &&
- (i = find_next_to_unuse(si, i, frontswap)) != 0) {
+ (i = find_next_to_unuse(si, i)) != 0) {
entry = swp_entry(type, i);
page = find_get_page(swap_address_space(entry), i);
@@ -2217,14 +2188,6 @@ retry:
try_to_free_swap(page);
unlock_page(page);
put_page(page);
-
- /*
- * For frontswap, we just need to unuse pages_to_unuse, if
- * it was specified. Need not check frontswap again here as
- * we already zeroed out pages_to_unuse if not frontswap.
- */
- if (pages_to_unuse && --pages_to_unuse == 0)
- goto out;
}
/*
@@ -2242,10 +2205,10 @@ retry:
if (READ_ONCE(si->inuse_pages)) {
if (!signal_pending(current))
goto retry;
- retval = -EINTR;
+ return -EINTR;
}
-out:
- return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
+
+ return 0;
}
/*
@@ -2463,7 +2426,8 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
struct swap_cluster_info *cluster_info,
unsigned long *frontswap_map)
{
- frontswap_init(p->type, frontswap_map);
+ if (IS_ENABLED(CONFIG_FRONTSWAP))
+ frontswap_init(p->type, frontswap_map);
spin_lock(&swap_lock);
spin_lock(&p->lock);
setup_swap_info(p, prio, swap_map, cluster_info);
@@ -2576,7 +2540,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
disable_swap_slots_cache_lock();
set_current_oom_origin();
- err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
+ err = try_to_unuse(p->type);
clear_current_oom_origin();
if (err) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 5e243d7269c0..9dbf0b75da5d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -22,7 +22,6 @@
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
#include <linux/shmem_fs.h>
-#include <linux/cleancache.h>
#include <linux/rmap.h>
#include "internal.h"
@@ -264,7 +263,6 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
*/
folio_zero_range(folio, offset, length);
- cleancache_invalidate_page(folio->mapping, &folio->page);
if (folio_has_private(folio))
do_invalidatepage(&folio->page, offset, length);
if (!folio_test_large(folio))
@@ -351,7 +349,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
bool same_folio;
if (mapping_empty(mapping))
- goto out;
+ return;
/*
* 'start' and 'end' always covers the range of pages to be fully
@@ -442,9 +440,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
folio_batch_release(&fbatch);
index++;
}
-
-out:
- cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -498,10 +493,6 @@ void truncate_inode_pages_final(struct address_space *mapping)
xa_unlock_irq(&mapping->i_pages);
}
- /*
- * Cleancache needs notification even if there are no pages or shadow
- * entries.
- */
truncate_inode_pages(mapping, 0);
}
EXPORT_SYMBOL(truncate_inode_pages_final);
@@ -661,7 +652,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
int did_range_unmap = 0;
if (mapping_empty(mapping))
- goto out;
+ return 0;
folio_batch_init(&fbatch);
index = start;
@@ -725,8 +716,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
if (dax_mapping(mapping)) {
unmap_mapping_pages(mapping, start, end - start + 1, false);
}
-out:
- cleancache_invalidate_inode(mapping);
return ret;
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 0d3b65939016..9152fbde33b5 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -30,6 +30,14 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+/*
+ * lock ordering:
+ * page_lock
+ * pool->migrate_lock
+ * class->lock
+ * zspage->lock
+ */
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -57,6 +65,7 @@
#include <linux/wait.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
+#include <linux/local_lock.h>
#define ZSPAGE_MAGIC 0x58
@@ -101,15 +110,6 @@
#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
/*
- * Memory for allocating for handle keeps object position by
- * encoding <page, obj_idx> and the encoded value has a room
- * in least bit(ie, look at obj_to_location).
- * We use the bit to synchronize between object access by
- * user and migration.
- */
-#define HANDLE_PIN_BIT 0
-
-/*
* Head in allocated object should have OBJ_ALLOCATED_TAG
* to identify the object was allocated or not.
* It's okay to add the status bit in the least bit because
@@ -121,6 +121,7 @@
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+#define HUGE_BITS 1
#define FULLNESS_BITS 2
#define CLASS_BITS 8
#define ISOLATED_BITS 3
@@ -158,7 +159,7 @@ enum fullness_group {
NR_ZS_FULLNESS,
};
-enum zs_stat_type {
+enum class_stat_type {
CLASS_EMPTY,
CLASS_ALMOST_EMPTY,
CLASS_ALMOST_FULL,
@@ -213,22 +214,6 @@ struct size_class {
struct zs_size_stat stats;
};
-/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
-static void SetPageHugeObject(struct page *page)
-{
- SetPageOwnerPriv1(page);
-}
-
-static void ClearPageHugeObject(struct page *page)
-{
- ClearPageOwnerPriv1(page);
-}
-
-static int PageHugeObject(struct page *page)
-{
- return PageOwnerPriv1(page);
-}
-
/*
* Placed within free objects to form a singly linked list.
* For every zspage, zspage->freeobj gives head of this list.
@@ -269,15 +254,14 @@ struct zs_pool {
#ifdef CONFIG_COMPACTION
struct inode *inode;
struct work_struct free_work;
- /* A wait queue for when migration races with async_free_zspage() */
- struct wait_queue_head migration_wait;
- atomic_long_t isolated_pages;
- bool destroying;
#endif
+ /* protect page/zspage migration */
+ rwlock_t migrate_lock;
};
struct zspage {
struct {
+ unsigned int huge:HUGE_BITS;
unsigned int fullness:FULLNESS_BITS;
unsigned int class:CLASS_BITS + 1;
unsigned int isolated:ISOLATED_BITS;
@@ -293,17 +277,32 @@ struct zspage {
};
struct mapping_area {
+ local_lock_t lock;
char *vm_buf; /* copy buffer for objects that span pages */
char *vm_addr; /* address of kmap_atomic()'ed pages */
enum zs_mapmode vm_mm; /* mapping mode */
};
+/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+static void SetZsHugePage(struct zspage *zspage)
+{
+ zspage->huge = 1;
+}
+
+static bool ZsHugePage(struct zspage *zspage)
+{
+ return zspage->huge;
+}
+
#ifdef CONFIG_COMPACTION
static int zs_register_migration(struct zs_pool *pool);
static void zs_unregister_migration(struct zs_pool *pool);
static void migrate_lock_init(struct zspage *zspage);
static void migrate_read_lock(struct zspage *zspage);
static void migrate_read_unlock(struct zspage *zspage);
+static void migrate_write_lock(struct zspage *zspage);
+static void migrate_write_lock_nested(struct zspage *zspage);
+static void migrate_write_unlock(struct zspage *zspage);
static void kick_deferred_free(struct zs_pool *pool);
static void init_deferred_free(struct zs_pool *pool);
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
@@ -315,6 +314,9 @@ static void zs_unregister_migration(struct zs_pool *pool) {}
static void migrate_lock_init(struct zspage *zspage) {}
static void migrate_read_lock(struct zspage *zspage) {}
static void migrate_read_unlock(struct zspage *zspage) {}
+static void migrate_write_lock(struct zspage *zspage) {}
+static void migrate_write_lock_nested(struct zspage *zspage) {}
+static void migrate_write_unlock(struct zspage *zspage) {}
static void kick_deferred_free(struct zs_pool *pool) {}
static void init_deferred_free(struct zs_pool *pool) {}
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
@@ -366,14 +368,10 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
kmem_cache_free(pool->zspage_cachep, zspage);
}
+/* class->lock(which owns the handle) synchronizes races */
static void record_obj(unsigned long handle, unsigned long obj)
{
- /*
- * lsb of @obj represents handle lock while other bits
- * represent object value the handle is pointing so
- * updating shouldn't do store tearing.
- */
- WRITE_ONCE(*(unsigned long *)handle, obj);
+ *(unsigned long *)handle = obj;
}
/* zpool driver */
@@ -455,12 +453,9 @@ MODULE_ALIAS("zpool-zsmalloc");
#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
-
-static bool is_zspage_isolated(struct zspage *zspage)
-{
- return zspage->isolated;
-}
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static __maybe_unused int is_first_page(struct page *page)
{
@@ -517,6 +512,12 @@ static void get_zspage_mapping(struct zspage *zspage,
*class_idx = zspage->class;
}
+static struct size_class *zspage_class(struct zs_pool *pool,
+ struct zspage *zspage)
+{
+ return pool->size_class[zspage->class];
+}
+
static void set_zspage_mapping(struct zspage *zspage,
unsigned int class_idx,
enum fullness_group fullness)
@@ -543,21 +544,21 @@ static int get_size_class_index(int size)
return min_t(int, ZS_SIZE_CLASSES - 1, idx);
}
-/* type can be of enum type zs_stat_type or fullness_group */
-static inline void zs_stat_inc(struct size_class *class,
+/* type can be of enum type class_stat_type or fullness_group */
+static inline void class_stat_inc(struct size_class *class,
int type, unsigned long cnt)
{
class->stats.objs[type] += cnt;
}
-/* type can be of enum type zs_stat_type or fullness_group */
-static inline void zs_stat_dec(struct size_class *class,
+/* type can be of enum type class_stat_type or fullness_group */
+static inline void class_stat_dec(struct size_class *class,
int type, unsigned long cnt)
{
class->stats.objs[type] -= cnt;
}
-/* type can be of enum type zs_stat_type or fullness_group */
+/* type can be of enum type class_stat_type or fullness_group */
static inline unsigned long zs_stat_get(struct size_class *class,
int type)
{
@@ -719,7 +720,7 @@ static void insert_zspage(struct size_class *class,
{
struct zspage *head;
- zs_stat_inc(class, fullness, 1);
+ class_stat_inc(class, fullness, 1);
head = list_first_entry_or_null(&class->fullness_list[fullness],
struct zspage, list);
/*
@@ -741,10 +742,9 @@ static void remove_zspage(struct size_class *class,
enum fullness_group fullness)
{
VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
- VM_BUG_ON(is_zspage_isolated(zspage));
list_del_init(&zspage->list);
- zs_stat_dec(class, fullness, 1);
+ class_stat_dec(class, fullness, 1);
}
/*
@@ -767,13 +767,9 @@ static enum fullness_group fix_fullness_group(struct size_class *class,
if (newfg == currfg)
goto out;
- if (!is_zspage_isolated(zspage)) {
- remove_zspage(class, zspage, currfg);
- insert_zspage(class, zspage, newfg);
- }
-
+ remove_zspage(class, zspage, currfg);
+ insert_zspage(class, zspage, newfg);
set_zspage_mapping(zspage, class_idx, newfg);
-
out:
return newfg;
}
@@ -824,7 +820,9 @@ static struct zspage *get_zspage(struct page *page)
static struct page *get_next_page(struct page *page)
{
- if (unlikely(PageHugeObject(page)))
+ struct zspage *zspage = get_zspage(page);
+
+ if (unlikely(ZsHugePage(zspage)))
return NULL;
return (struct page *)page->index;
@@ -844,6 +842,12 @@ static void obj_to_location(unsigned long obj, struct page **page,
*obj_idx = (obj & OBJ_INDEX_MASK);
}
+static void obj_to_page(unsigned long obj, struct page **page)
+{
+ obj >>= OBJ_TAG_BITS;
+ *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
+}
+
/**
* location_to_obj - get obj value encoded from (<page>, <obj_idx>)
* @page: page object resides in zspage
@@ -865,33 +869,22 @@ static unsigned long handle_to_obj(unsigned long handle)
return *(unsigned long *)handle;
}
-static unsigned long obj_to_head(struct page *page, void *obj)
+static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
{
- if (unlikely(PageHugeObject(page))) {
+ unsigned long handle;
+ struct zspage *zspage = get_zspage(page);
+
+ if (unlikely(ZsHugePage(zspage))) {
VM_BUG_ON_PAGE(!is_first_page(page), page);
- return page->index;
+ handle = page->index;
} else
- return *(unsigned long *)obj;
-}
-
-static inline int testpin_tag(unsigned long handle)
-{
- return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
-}
-
-static inline int trypin_tag(unsigned long handle)
-{
- return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
-}
+ handle = *(unsigned long *)obj;
-static void pin_tag(unsigned long handle) __acquires(bitlock)
-{
- bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
-}
+ if (!(handle & OBJ_ALLOCATED_TAG))
+ return false;
-static void unpin_tag(unsigned long handle) __releases(bitlock)
-{
- bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+ *phandle = handle & ~OBJ_ALLOCATED_TAG;
+ return true;
}
static void reset_page(struct page *page)
@@ -900,7 +893,6 @@ static void reset_page(struct page *page)
ClearPagePrivate(page);
set_page_private(page, 0);
page_mapcount_reset(page);
- ClearPageHugeObject(page);
page->index = 0;
}
@@ -952,7 +944,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
cache_free_zspage(pool, zspage);
- zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ class_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
atomic_long_sub(class->pages_per_zspage,
&pool->pages_allocated);
}
@@ -963,6 +955,11 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
VM_BUG_ON(get_zspage_inuse(zspage));
VM_BUG_ON(list_empty(&zspage->list));
+ /*
+ * Since zs_free couldn't be sleepable, this function cannot call
+ * lock_page. The page locks trylock_zspage got will be released
+ * by __free_zspage.
+ */
if (!trylock_zspage(zspage)) {
kick_deferred_free(pool);
return;
@@ -1042,7 +1039,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
SetPagePrivate(page);
if (unlikely(class->objs_per_zspage == 1 &&
class->pages_per_zspage == 1))
- SetPageHugeObject(page);
+ SetZsHugePage(zspage);
} else {
prev_page->index = (unsigned long)page;
}
@@ -1246,8 +1243,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
unsigned long obj, off;
unsigned int obj_idx;
- unsigned int class_idx;
- enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
struct page *pages[2];
@@ -1260,21 +1255,26 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
*/
BUG_ON(in_interrupt());
- /* From now on, migration cannot move the object */
- pin_tag(handle);
-
+ /* It guarantees it can get zspage from handle safely */
+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(handle);
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
- /* migration cannot move any subpage in this zspage */
+ /*
+ * migration cannot move any zpages in this zspage. Here, class->lock
+ * is too heavy since callers would take some time until they calls
+ * zs_unmap_object API so delegate the locking from class to zspage
+ * which is smaller granularity.
+ */
migrate_read_lock(zspage);
+ read_unlock(&pool->migrate_lock);
- get_zspage_mapping(zspage, &class_idx, &fg);
- class = pool->size_class[class_idx];
+ class = zspage_class(pool, zspage);
off = (class->size * obj_idx) & ~PAGE_MASK;
- area = &get_cpu_var(zs_map_area);
+ local_lock(&zs_map_area.lock);
+ area = this_cpu_ptr(&zs_map_area);
area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
@@ -1290,7 +1290,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
ret = __zs_map_object(area, pages, off, class->size);
out:
- if (likely(!PageHugeObject(page)))
+ if (likely(!ZsHugePage(zspage)))
ret += ZS_HANDLE_SIZE;
return ret;
@@ -1304,16 +1304,13 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
unsigned long obj, off;
unsigned int obj_idx;
- unsigned int class_idx;
- enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
obj = handle_to_obj(handle);
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
- get_zspage_mapping(zspage, &class_idx, &fg);
- class = pool->size_class[class_idx];
+ class = zspage_class(pool, zspage);
off = (class->size * obj_idx) & ~PAGE_MASK;
area = this_cpu_ptr(&zs_map_area);
@@ -1328,10 +1325,9 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
__zs_unmap_object(area, pages, off, class->size);
}
- put_cpu_var(zs_map_area);
+ local_unlock(&zs_map_area.lock);
migrate_read_unlock(zspage);
- unpin_tag(handle);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
@@ -1354,17 +1350,19 @@ size_t zs_huge_class_size(struct zs_pool *pool)
}
EXPORT_SYMBOL_GPL(zs_huge_class_size);
-static unsigned long obj_malloc(struct size_class *class,
+static unsigned long obj_malloc(struct zs_pool *pool,
struct zspage *zspage, unsigned long handle)
{
int i, nr_page, offset;
unsigned long obj;
struct link_free *link;
+ struct size_class *class;
struct page *m_page;
unsigned long m_offset;
void *vaddr;
+ class = pool->size_class[zspage->class];
handle |= OBJ_ALLOCATED_TAG;
obj = get_freeobj(zspage);
@@ -1379,7 +1377,7 @@ static unsigned long obj_malloc(struct size_class *class,
vaddr = kmap_atomic(m_page);
link = (struct link_free *)vaddr + m_offset / sizeof(*link);
set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
- if (likely(!PageHugeObject(m_page)))
+ if (likely(!ZsHugePage(zspage)))
/* record handle in the header of allocated chunk */
link->handle = handle;
else
@@ -1388,7 +1386,6 @@ static unsigned long obj_malloc(struct size_class *class,
kunmap_atomic(vaddr);
mod_zspage_inuse(zspage, 1);
- zs_stat_inc(class, OBJ_USED, 1);
obj = location_to_obj(m_page, obj);
@@ -1424,13 +1421,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
size += ZS_HANDLE_SIZE;
class = pool->size_class[get_size_class_index(size)];
+ /* class->lock effectively protects the zpage migration */
spin_lock(&class->lock);
zspage = find_get_zspage(class);
if (likely(zspage)) {
- obj = obj_malloc(class, zspage, handle);
+ obj = obj_malloc(pool, zspage, handle);
/* Now move the zspage to another fullness group, if required */
fix_fullness_group(class, zspage);
record_obj(handle, obj);
+ class_stat_inc(class, OBJ_USED, 1);
spin_unlock(&class->lock);
return handle;
@@ -1445,14 +1444,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
}
spin_lock(&class->lock);
- obj = obj_malloc(class, zspage, handle);
+ obj = obj_malloc(pool, zspage, handle);
newfg = get_fullness_group(class, zspage);
insert_zspage(class, zspage, newfg);
set_zspage_mapping(zspage, class->index, newfg);
record_obj(handle, obj);
atomic_long_add(class->pages_per_zspage,
&pool->pages_allocated);
- zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ class_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ class_stat_inc(class, OBJ_USED, 1);
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
@@ -1462,7 +1462,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(zs_malloc);
-static void obj_free(struct size_class *class, unsigned long obj)
+static void obj_free(int class_size, unsigned long obj)
{
struct link_free *link;
struct zspage *zspage;
@@ -1472,18 +1472,20 @@ static void obj_free(struct size_class *class, unsigned long obj)
void *vaddr;
obj_to_location(obj, &f_page, &f_objidx);
- f_offset = (class->size * f_objidx) & ~PAGE_MASK;
+ f_offset = (class_size * f_objidx) & ~PAGE_MASK;
zspage = get_zspage(f_page);
vaddr = kmap_atomic(f_page);
/* Insert this object in containing zspage's freelist */
link = (struct link_free *)(vaddr + f_offset);
- link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+ if (likely(!ZsHugePage(zspage)))
+ link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+ else
+ f_page->index = 0;
kunmap_atomic(vaddr);
set_freeobj(zspage, f_objidx);
mod_zspage_inuse(zspage, -1);
- zs_stat_dec(class, OBJ_USED, 1);
}
void zs_free(struct zs_pool *pool, unsigned long handle)
@@ -1491,42 +1493,33 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
struct zspage *zspage;
struct page *f_page;
unsigned long obj;
- unsigned int f_objidx;
- int class_idx;
struct size_class *class;
enum fullness_group fullness;
- bool isolated;
if (unlikely(!handle))
return;
- pin_tag(handle);
+ /*
+ * The pool->migrate_lock protects the race with zpage's migration
+ * so it's safe to get the page from handle.
+ */
+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(handle);
- obj_to_location(obj, &f_page, &f_objidx);
+ obj_to_page(obj, &f_page);
zspage = get_zspage(f_page);
-
- migrate_read_lock(zspage);
-
- get_zspage_mapping(zspage, &class_idx, &fullness);
- class = pool->size_class[class_idx];
-
+ class = zspage_class(pool, zspage);
spin_lock(&class->lock);
- obj_free(class, obj);
+ read_unlock(&pool->migrate_lock);
+
+ obj_free(class->size, obj);
+ class_stat_dec(class, OBJ_USED, 1);
fullness = fix_fullness_group(class, zspage);
- if (fullness != ZS_EMPTY) {
- migrate_read_unlock(zspage);
+ if (fullness != ZS_EMPTY)
goto out;
- }
- isolated = is_zspage_isolated(zspage);
- migrate_read_unlock(zspage);
- /* If zspage is isolated, zs_page_putback will free the zspage */
- if (likely(!isolated))
- free_zspage(pool, class, zspage);
+ free_zspage(pool, class, zspage);
out:
-
spin_unlock(&class->lock);
- unpin_tag(handle);
cache_free_handle(pool, handle);
}
EXPORT_SYMBOL_GPL(zs_free);
@@ -1601,7 +1594,6 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
static unsigned long find_alloced_obj(struct size_class *class,
struct page *page, int *obj_idx)
{
- unsigned long head;
int offset = 0;
int index = *obj_idx;
unsigned long handle = 0;
@@ -1611,13 +1603,8 @@ static unsigned long find_alloced_obj(struct size_class *class,
offset += class->size * index;
while (offset < PAGE_SIZE) {
- head = obj_to_head(page, addr + offset);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- if (trypin_tag(handle))
- break;
- handle = 0;
- }
+ if (obj_allocated(page, addr + offset, &handle))
+ break;
offset += class->size;
index++;
@@ -1663,25 +1650,16 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
/* Stop if there is no more space */
if (zspage_full(class, get_zspage(d_page))) {
- unpin_tag(handle);
ret = -ENOMEM;
break;
}
used_obj = handle_to_obj(handle);
- free_obj = obj_malloc(class, get_zspage(d_page), handle);
+ free_obj = obj_malloc(pool, get_zspage(d_page), handle);
zs_object_copy(class, free_obj, used_obj);
obj_idx++;
- /*
- * record_obj updates handle's value to free_obj and it will
- * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
- * breaks synchronization using pin_tag(e,g, zs_free) so
- * let's keep the lock bit.
- */
- free_obj |= BIT(HANDLE_PIN_BIT);
record_obj(handle, free_obj);
- unpin_tag(handle);
- obj_free(class, used_obj);
+ obj_free(class->size, used_obj);
}
/* Remember last position in this iteration */
@@ -1706,7 +1684,6 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source)
zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
struct zspage, list);
if (zspage) {
- VM_BUG_ON(is_zspage_isolated(zspage));
remove_zspage(class, zspage, fg[i]);
return zspage;
}
@@ -1727,8 +1704,6 @@ static enum fullness_group putback_zspage(struct size_class *class,
{
enum fullness_group fullness;
- VM_BUG_ON(is_zspage_isolated(zspage));
-
fullness = get_fullness_group(class, zspage);
insert_zspage(class, zspage, fullness);
set_zspage_mapping(zspage, class->index, fullness);
@@ -1797,6 +1772,11 @@ static void migrate_write_lock(struct zspage *zspage)
write_lock(&zspage->lock);
}
+static void migrate_write_lock_nested(struct zspage *zspage)
+{
+ write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING);
+}
+
static void migrate_write_unlock(struct zspage *zspage)
{
write_unlock(&zspage->lock);
@@ -1810,35 +1790,10 @@ static void inc_zspage_isolation(struct zspage *zspage)
static void dec_zspage_isolation(struct zspage *zspage)
{
+ VM_BUG_ON(zspage->isolated == 0);
zspage->isolated--;
}
-static void putback_zspage_deferred(struct zs_pool *pool,
- struct size_class *class,
- struct zspage *zspage)
-{
- enum fullness_group fg;
-
- fg = putback_zspage(class, zspage);
- if (fg == ZS_EMPTY)
- schedule_work(&pool->free_work);
-
-}
-
-static inline void zs_pool_dec_isolated(struct zs_pool *pool)
-{
- VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
- atomic_long_dec(&pool->isolated_pages);
- /*
- * Checking pool->destroying must happen after atomic_long_dec()
- * for pool->isolated_pages above. Paired with the smp_mb() in
- * zs_unregister_migration().
- */
- smp_mb__after_atomic();
- if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
- wake_up_all(&pool->migration_wait);
-}
-
static void replace_sub_page(struct size_class *class, struct zspage *zspage,
struct page *newpage, struct page *oldpage)
{
@@ -1857,19 +1812,14 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
create_page_chain(class, zspage, pages);
set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
- if (unlikely(PageHugeObject(oldpage)))
+ if (unlikely(ZsHugePage(zspage)))
newpage->index = oldpage->index;
__SetPageMovable(newpage, page_mapping(oldpage));
}
static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{
- struct zs_pool *pool;
- struct size_class *class;
- int class_idx;
- enum fullness_group fullness;
struct zspage *zspage;
- struct address_space *mapping;
/*
* Page is locked so zspage couldn't be destroyed. For detail, look at
@@ -1879,41 +1829,9 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(PageIsolated(page), page);
zspage = get_zspage(page);
-
- /*
- * Without class lock, fullness could be stale while class_idx is okay
- * because class_idx is constant unless page is freed so we should get
- * fullness again under class lock.
- */
- get_zspage_mapping(zspage, &class_idx, &fullness);
- mapping = page_mapping(page);
- pool = mapping->private_data;
- class = pool->size_class[class_idx];
-
- spin_lock(&class->lock);
- if (get_zspage_inuse(zspage) == 0) {
- spin_unlock(&class->lock);
- return false;
- }
-
- /* zspage is isolated for object migration */
- if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
- spin_unlock(&class->lock);
- return false;
- }
-
- /*
- * If this is first time isolation for the zspage, isolate zspage from
- * size_class to prevent further object allocation from the zspage.
- */
- if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
- get_zspage_mapping(zspage, &class_idx, &fullness);
- atomic_long_inc(&pool->isolated_pages);
- remove_zspage(class, zspage, fullness);
- }
-
+ migrate_write_lock(zspage);
inc_zspage_isolation(zspage);
- spin_unlock(&class->lock);
+ migrate_write_unlock(zspage);
return true;
}
@@ -1923,16 +1841,13 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
{
struct zs_pool *pool;
struct size_class *class;
- int class_idx;
- enum fullness_group fullness;
struct zspage *zspage;
struct page *dummy;
void *s_addr, *d_addr, *addr;
- int offset, pos;
- unsigned long handle, head;
+ int offset;
+ unsigned long handle;
unsigned long old_obj, new_obj;
unsigned int obj_idx;
- int ret = -EAGAIN;
/*
* We cannot support the _NO_COPY case here, because copy needs to
@@ -1945,35 +1860,25 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
- zspage = get_zspage(page);
-
- /* Concurrent compactor cannot migrate any subpage in zspage */
- migrate_write_lock(zspage);
- get_zspage_mapping(zspage, &class_idx, &fullness);
pool = mapping->private_data;
- class = pool->size_class[class_idx];
- offset = get_first_obj_offset(page);
+ /*
+ * The pool migrate_lock protects the race between zpage migration
+ * and zs_free.
+ */
+ write_lock(&pool->migrate_lock);
+ zspage = get_zspage(page);
+ class = zspage_class(pool, zspage);
+
+ /*
+ * the class lock protects zpage alloc/free in the zspage.
+ */
spin_lock(&class->lock);
- if (!get_zspage_inuse(zspage)) {
- /*
- * Set "offset" to end of the page so that every loops
- * skips unnecessary object scanning.
- */
- offset = PAGE_SIZE;
- }
+ /* the migrate_write_lock protects zpage access via zs_map_object */
+ migrate_write_lock(zspage);
- pos = offset;
+ offset = get_first_obj_offset(page);
s_addr = kmap_atomic(page);
- while (pos < PAGE_SIZE) {
- head = obj_to_head(page, s_addr + pos);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- if (!trypin_tag(handle))
- goto unpin_objects;
- }
- pos += class->size;
- }
/*
* Here, any user cannot access all objects in the zspage so let's move.
@@ -1982,42 +1887,30 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
memcpy(d_addr, s_addr, PAGE_SIZE);
kunmap_atomic(d_addr);
- for (addr = s_addr + offset; addr < s_addr + pos;
+ for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE;
addr += class->size) {
- head = obj_to_head(page, addr);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- BUG_ON(!testpin_tag(handle));
+ if (obj_allocated(page, addr, &handle)) {
old_obj = handle_to_obj(handle);
obj_to_location(old_obj, &dummy, &obj_idx);
new_obj = (unsigned long)location_to_obj(newpage,
obj_idx);
- new_obj |= BIT(HANDLE_PIN_BIT);
record_obj(handle, new_obj);
}
}
+ kunmap_atomic(s_addr);
replace_sub_page(class, zspage, newpage, page);
- get_page(newpage);
-
- dec_zspage_isolation(zspage);
-
/*
- * Page migration is done so let's putback isolated zspage to
- * the list if @page is final isolated subpage in the zspage.
+ * Since we complete the data copy and set up new zspage structure,
+ * it's okay to release migration_lock.
*/
- if (!is_zspage_isolated(zspage)) {
- /*
- * We cannot race with zs_destroy_pool() here because we wait
- * for isolation to hit zero before we start destroying.
- * Also, we ensure that everyone can see pool->destroying before
- * we start waiting.
- */
- putback_zspage_deferred(pool, class, zspage);
- zs_pool_dec_isolated(pool);
- }
+ write_unlock(&pool->migrate_lock);
+ spin_unlock(&class->lock);
+ dec_zspage_isolation(zspage);
+ migrate_write_unlock(zspage);
+ get_page(newpage);
if (page_zone(newpage) != page_zone(page)) {
dec_zone_page_state(page, NR_ZSPAGES);
inc_zone_page_state(newpage, NR_ZSPAGES);
@@ -2025,55 +1918,21 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
reset_page(page);
put_page(page);
- page = newpage;
-
- ret = MIGRATEPAGE_SUCCESS;
-unpin_objects:
- for (addr = s_addr + offset; addr < s_addr + pos;
- addr += class->size) {
- head = obj_to_head(page, addr);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- BUG_ON(!testpin_tag(handle));
- unpin_tag(handle);
- }
- }
- kunmap_atomic(s_addr);
- spin_unlock(&class->lock);
- migrate_write_unlock(zspage);
- return ret;
+ return MIGRATEPAGE_SUCCESS;
}
static void zs_page_putback(struct page *page)
{
- struct zs_pool *pool;
- struct size_class *class;
- int class_idx;
- enum fullness_group fg;
- struct address_space *mapping;
struct zspage *zspage;
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
zspage = get_zspage(page);
- get_zspage_mapping(zspage, &class_idx, &fg);
- mapping = page_mapping(page);
- pool = mapping->private_data;
- class = pool->size_class[class_idx];
-
- spin_lock(&class->lock);
+ migrate_write_lock(zspage);
dec_zspage_isolation(zspage);
- if (!is_zspage_isolated(zspage)) {
- /*
- * Due to page_lock, we cannot free zspage immediately
- * so let's defer.
- */
- putback_zspage_deferred(pool, class, zspage);
- zs_pool_dec_isolated(pool);
- }
- spin_unlock(&class->lock);
+ migrate_write_unlock(zspage);
}
static const struct address_space_operations zsmalloc_aops = {
@@ -2095,36 +1954,8 @@ static int zs_register_migration(struct zs_pool *pool)
return 0;
}
-static bool pool_isolated_are_drained(struct zs_pool *pool)
-{
- return atomic_long_read(&pool->isolated_pages) == 0;
-}
-
-/* Function for resolving migration */
-static void wait_for_isolated_drain(struct zs_pool *pool)
-{
-
- /*
- * We're in the process of destroying the pool, so there are no
- * active allocations. zs_page_isolate() fails for completely free
- * zspages, so we need only wait for the zs_pool's isolated
- * count to hit zero.
- */
- wait_event(pool->migration_wait,
- pool_isolated_are_drained(pool));
-}
-
static void zs_unregister_migration(struct zs_pool *pool)
{
- pool->destroying = true;
- /*
- * We need a memory barrier here to ensure global visibility of
- * pool->destroying. Thus pool->isolated pages will either be 0 in which
- * case we don't care, or it will be > 0 and pool->destroying will
- * ensure that we wake up once isolation hits 0.
- */
- smp_mb();
- wait_for_isolated_drain(pool); /* This can block */
flush_work(&pool->free_work);
iput(pool->inode);
}
@@ -2154,7 +1985,6 @@ static void async_free_zspage(struct work_struct *work)
spin_unlock(&class->lock);
}
-
list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
list_del(&zspage->list);
lock_zspage(zspage);
@@ -2218,8 +2048,13 @@ static unsigned long __zs_compact(struct zs_pool *pool,
struct zspage *dst_zspage = NULL;
unsigned long pages_freed = 0;
+ /* protect the race between zpage migration and zs_free */
+ write_lock(&pool->migrate_lock);
+ /* protect zpage allocation/free */
spin_lock(&class->lock);
while ((src_zspage = isolate_zspage(class, true))) {
+ /* protect someone accessing the zspage(i.e., zs_map_object) */
+ migrate_write_lock(src_zspage);
if (!zs_can_compact(class))
break;
@@ -2228,6 +2063,8 @@ static unsigned long __zs_compact(struct zs_pool *pool,
cc.s_page = get_first_page(src_zspage);
while ((dst_zspage = isolate_zspage(class, false))) {
+ migrate_write_lock_nested(dst_zspage);
+
cc.d_page = get_first_page(dst_zspage);
/*
* If there is no more space in dst_page, resched
@@ -2237,6 +2074,10 @@ static unsigned long __zs_compact(struct zs_pool *pool,
break;
putback_zspage(class, dst_zspage);
+ migrate_write_unlock(dst_zspage);
+ dst_zspage = NULL;
+ if (rwlock_is_contended(&pool->migrate_lock))
+ break;
}
/* Stop if we couldn't find slot */
@@ -2244,19 +2085,28 @@ static unsigned long __zs_compact(struct zs_pool *pool,
break;
putback_zspage(class, dst_zspage);
+ migrate_write_unlock(dst_zspage);
+
if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
+ migrate_write_unlock(src_zspage);
free_zspage(pool, class, src_zspage);
pages_freed += class->pages_per_zspage;
- }
+ } else
+ migrate_write_unlock(src_zspage);
spin_unlock(&class->lock);
+ write_unlock(&pool->migrate_lock);
cond_resched();
+ write_lock(&pool->migrate_lock);
spin_lock(&class->lock);
}
- if (src_zspage)
+ if (src_zspage) {
putback_zspage(class, src_zspage);
+ migrate_write_unlock(src_zspage);
+ }
spin_unlock(&class->lock);
+ write_unlock(&pool->migrate_lock);
return pages_freed;
}
@@ -2362,15 +2212,12 @@ struct zs_pool *zs_create_pool(const char *name)
return NULL;
init_deferred_free(pool);
+ rwlock_init(&pool->migrate_lock);
pool->name = kstrdup(name, GFP_KERNEL);
if (!pool->name)
goto err;
-#ifdef CONFIG_COMPACTION
- init_waitqueue_head(&pool->migration_wait);
-#endif
-
if (create_cache(pool))
goto err;
diff --git a/mm/zswap.c b/mm/zswap.c
index 7944e3e57e78..cdf6950fcb2e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1378,7 +1378,7 @@ static void zswap_frontswap_init(unsigned type)
zswap_trees[type] = tree;
}
-static struct frontswap_ops zswap_frontswap_ops = {
+static const struct frontswap_ops zswap_frontswap_ops = {
.store = zswap_frontswap_store,
.load = zswap_frontswap_load,
.invalidate_page = zswap_frontswap_invalidate_page,
@@ -1475,11 +1475,15 @@ static int __init init_zswap(void)
if (!shrink_wq)
goto fallback_fail;
- frontswap_register_ops(&zswap_frontswap_ops);
+ ret = frontswap_register_ops(&zswap_frontswap_ops);
+ if (ret)
+ goto destroy_wq;
if (zswap_debugfs_init())
pr_warn("debugfs initialization failed\n");
return 0;
+destroy_wq:
+ destroy_workqueue(shrink_wq);
fallback_fail:
if (pool)
zswap_pool_destroy(pool);
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4369ffa3302a..9bf736290e48 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -108,7 +108,7 @@ out:
static inline void *vcc_walk(struct seq_file *seq, loff_t l)
{
struct vcc_state *state = seq->private;
- int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
+ int family = (uintptr_t)(pde_data(file_inode(seq->file)));
return __vcc_walk(&state->sk, family, &state->bucket, l) ?
state : NULL;
@@ -324,7 +324,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
page = get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- dev = PDE_DATA(file_inode(file));
+ dev = pde_data(file_inode(file));
if (!dev->ops->proc_read)
length = -EINVAL;
else {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1661979b6a6e..ee319779781e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -611,7 +611,7 @@ EXPORT_SYMBOL(bt_sock_wait_ready);
static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_lock(&l->lock);
return seq_hlist_start_head(&l->head, *pos);
@@ -619,7 +619,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
return seq_hlist_next(v, &l->head, pos);
}
@@ -627,14 +627,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void bt_seq_stop(struct seq_file *seq, void *v)
__releases(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_unlock(&l->lock);
}
static int bt_seq_show(struct seq_file *seq, void *v)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
if (v == SEQ_START_TOKEN) {
seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent");
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a52ad81596b7..55f47cadb114 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -615,6 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
br_multicast_del_port(p);
+ dev_put_track(dev, &p->dev_tracker);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -724,10 +725,10 @@ err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
br_multicast_del_port(p);
+ dev_put_track(dev, &p->dev_tracker);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
- dev_put(dev);
return err;
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 84ba456a78cc..1402d5ca242d 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br,
!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
- return br_vlan_state_allowed(*state, true);
- } else {
- return true;
+ if (!br_vlan_state_allowed(*state, true))
+ goto drop;
}
+ return true;
}
}
v = br_vlan_find(vg, *vid);
@@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto out_err;
}
err = br_vlan_dump_dev(dev, skb, cb, dump_flags);
- if (err && err != -EMSGSIZE)
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
goto out_err;
} else {
for_each_netdev_rcu(net, dev) {
diff --git a/net/can/bcm.c b/net/can/bcm.c
index bc88d901a1c0..95d209b52e6a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -193,7 +193,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
{
char ifname[IFNAMSIZ];
struct net *net = m->private;
- struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
+ struct sock *sk = (struct sock *)pde_data(m->file->f_inode);
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
diff --git a/net/can/proc.c b/net/can/proc.c
index b3099f0a3cb8..bbce97825f13 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -305,7 +305,7 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)PDE_DATA(m->file->f_inode);
+ int idx = (int)(long)pde_data(m->file->f_inode);
struct net_device *dev;
struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 9441b4a4912b..ecc400a0b7bb 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -190,14 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
- dout("parse_fsid '%s'\n", str);
+ dout("%s '%s'\n", __func__, str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
@@ -217,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
+ dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
return err;
}
+EXPORT_SYMBOL(ceph_parse_fsid);
/*
* ceph options
@@ -395,14 +396,14 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l)
+ struct fc_log *l, char delim)
{
struct p_log log = {.prefix = "libceph", .log = l};
int ret;
- /* ip1[:port1][,ip2[:port2]...] */
+ /* ip1[:port1][<delim>ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
- &opt->num_mon);
+ &opt->num_mon, delim);
if (ret) {
error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
@@ -428,8 +429,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_ip:
err = ceph_parse_ips(param->string,
param->string + param->size,
- &opt->my_addr,
- 1, NULL);
+ &opt->my_addr, 1, NULL, ',');
if (err) {
error_plog(&log, "Failed to parse ip: %d", err);
return err;
@@ -438,7 +438,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_fsid:
- err = parse_fsid(param->string, &opt->fsid);
+ err = ceph_parse_fsid(param->string, &opt->fsid);
if (err) {
error_plog(&log, "Failed to parse fsid: %d", err);
return err;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 7b891be799d2..45eba2dcb67a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1267,30 +1267,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
*/
int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count)
+ int max_count, int *count, char delim)
{
int i, ret = -EINVAL;
const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
+ char cur_delim = delim;
const char *ipend;
int port;
- char delim = ',';
if (*p == '[') {
- delim = ']';
+ cur_delim = ']';
p++;
}
- ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
+ &ipend);
if (ret)
goto bad;
ret = -EINVAL;
p = ipend;
- if (delim == ']') {
+ if (cur_delim == ']') {
if (*p != ']') {
dout("missing matching ']'\n");
goto bad;
@@ -1326,11 +1327,11 @@ int ceph_parse_ips(const char *c, const char *end,
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
addr[i].nonce = 0;
- dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
+ dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
if (p == end)
break;
- if (*p != ',')
+ if (*p != delim)
goto bad;
p++;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 84a0d9542fe9..1baab07820f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8981,6 +8981,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
goto out_unlock;
}
old_prog = link->prog;
+ if (old_prog->type != new_prog->type ||
+ old_prog->expected_attach_type != new_prog->expected_attach_type) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (old_prog == new_prog) {
/* no-op, don't disturb drivers */
bpf_prog_put(new_prog);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 213cb7b26b7a..6c2016f7f3d1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3364,7 +3364,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -3381,7 +3381,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -3401,7 +3401,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..88cc0ad7d386 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9b7171c40434..a5b5bb99c644 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -164,8 +164,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
{
struct net *net;
if (ops->exit) {
- list_for_each_entry(net, net_exit_list, exit_list)
+ list_for_each_entry(net, net_exit_list, exit_list) {
ops->exit(net);
+ cond_resched();
+ }
}
if (ops->exit_batch)
ops->exit_batch(net_exit_list);
diff --git a/net/core/of_net.c b/net/core/of_net.c
index 95a64c813ae5..f1a9bf7578e7 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -61,7 +61,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
{
struct platform_device *pdev = of_find_device_by_node(np);
struct nvmem_cell *cell;
- const void *buf;
+ const void *mac;
size_t len;
int ret;
@@ -78,32 +78,21 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
if (IS_ERR(cell))
return PTR_ERR(cell);
- buf = nvmem_cell_read(cell, &len);
+ mac = nvmem_cell_read(cell, &len);
nvmem_cell_put(cell);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- ret = 0;
- if (len == ETH_ALEN) {
- if (is_valid_ether_addr(buf))
- memcpy(addr, buf, ETH_ALEN);
- else
- ret = -EINVAL;
- } else if (len == 3 * ETH_ALEN - 1) {
- u8 mac[ETH_ALEN];
-
- if (mac_pton(buf, mac))
- memcpy(addr, mac, ETH_ALEN);
- else
- ret = -EINVAL;
- } else {
- ret = -EINVAL;
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
}
- kfree(buf);
+ memcpy(addr, mac, ETH_ALEN);
+ kfree(mac);
- return ret;
+ return 0;
}
/**
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 560a5e712dc3..84b62cd7bc57 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -546,7 +546,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
static int pgctrl_open(struct inode *inode, struct file *file)
{
- return single_open(file, pgctrl_show, PDE_DATA(inode));
+ return single_open(file, pgctrl_show, pde_data(inode));
}
static const struct proc_ops pktgen_proc_ops = {
@@ -1811,7 +1811,7 @@ static ssize_t pktgen_if_write(struct file *file,
static int pktgen_if_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_if_show, PDE_DATA(inode));
+ return single_open(file, pktgen_if_show, pde_data(inode));
}
static const struct proc_ops pktgen_if_proc_ops = {
@@ -1948,7 +1948,7 @@ out:
static int pktgen_thread_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_thread_show, PDE_DATA(inode));
+ return single_open(file, pktgen_thread_show, pde_data(inode));
}
static const struct proc_ops pktgen_thread_proc_ops = {
diff --git a/net/core/sock.c b/net/core/sock.c
index e21485ab285d..4ff806d71921 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -844,6 +844,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
}
num = ethtool_get_phc_vclocks(dev, &vclock_index);
+ dev_put(dev);
+
for (i = 0; i < num; i++) {
if (*(vclock_index + i) == phc_index) {
match = true;
@@ -2047,6 +2049,9 @@ void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
+ WARN_ON_ONCE(!llist_empty(&sk->defer_list));
+ sk_defer_free_flush(sk);
+
if (rcu_access_pointer(sk->sk_reuseport_cb)) {
reuseport_detach_sock(sk);
use_call_rcu = true;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 828de171708f..b4589861b84c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
+#include <linux/hash.h>
#include <net/arp.h>
#include <net/ip.h>
@@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
+static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
#define DEVINDEX_HASHBITS 8
@@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi)
pr_warn("Freeing alive fib_info %p\n", fi);
return;
}
- fib_info_cnt--;
call_rcu(&fi->rcu, free_fib_info_rcu);
}
@@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi)
spin_lock_bh(&fib_info_lock);
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
+
+ /* Paired with READ_ONCE() in fib_create_info(). */
+ WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
if (fi->nh) {
@@ -316,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
- unsigned int mask = DEVINDEX_HASHSIZE - 1;
+ return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+ u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
- return (val ^
- (val >> DEVINDEX_HASHBITS) ^
- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
+ return &fib_info_devhash[fib_devindex_hashfn(val)];
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -430,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- unsigned int hash;
spin_lock(&fib_info_lock);
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
+
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
@@ -1240,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static inline unsigned int fib_laddr_hashfn(__be32 val)
+static struct hlist_head *
+fib_info_laddrhash_bucket(const struct net *net, __be32 val)
{
- unsigned int mask = (fib_info_hash_size - 1);
+ u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
+ fib_info_hash_bits);
- return ((__force u32)val ^
- ((__force u32)val >> 7) ^
- ((__force u32)val >> 14)) & mask;
+ return &fib_info_laddrhash[slot];
}
static struct hlist_head *fib_info_hash_alloc(int bytes)
@@ -1282,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
+ fib_info_hash_bits = ilog2(new_size);
for (i = 0; i < old_size; i++) {
struct hlist_head *head = &fib_info_hash[i];
@@ -1299,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
fib_info_hash = new_info_hash;
+ fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &fib_info_laddrhash[i];
+ struct hlist_head *lhead = &old_laddrhash[i];
struct hlist_node *n;
struct fib_info *fi;
hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
struct hlist_head *ldest;
- unsigned int new_hash;
- new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
- ldest = &new_laddrhash[new_hash];
+ ldest = fib_info_laddrhash_bucket(fi->fib_net,
+ fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, ldest);
}
}
- fib_info_laddrhash = new_laddrhash;
spin_unlock_bh(&fib_info_lock);
@@ -1430,7 +1438,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
#endif
err = -ENOBUFS;
- if (fib_info_cnt >= fib_info_hash_size) {
+
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
@@ -1462,7 +1472,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
return ERR_PTR(err);
}
- fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1591,12 +1600,13 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
+ fib_info_cnt++;
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
struct hlist_head *head;
- head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+ head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, head);
}
if (fi->nh) {
@@ -1604,12 +1614,10 @@ link_it:
} else {
change_nexthops(fi) {
struct hlist_head *head;
- unsigned int hash;
if (!nexthop_nh->fib_nh_dev)
continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
@@ -1870,16 +1878,16 @@ nla_put_failure:
*/
int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
- int ret = 0;
- unsigned int hash = fib_laddr_hashfn(local);
- struct hlist_head *head = &fib_info_laddrhash[hash];
int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
struct net *net = dev_net(dev);
+ struct hlist_head *head;
struct fib_info *fi;
+ int ret = 0;
if (!fib_info_laddrhash || local == 0)
return 0;
+ head = fib_info_laddrhash_bucket(net, local);
hlist_for_each_entry(fi, head, fib_lhash) {
if (!net_eq(fi->fib_net, net) ||
fi->fib_tb_id != tb_id)
@@ -1961,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -1981,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_info *prev_fi = NULL;
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
+ int ret = 0;
if (force)
scope = -1;
@@ -2131,7 +2137,6 @@ out:
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
- unsigned int hash;
struct hlist_head *head;
struct fib_nh *nh;
int ret;
@@ -2147,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 05cd198d7a6b..341096807100 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
/* The RCU read lock provides a memory barrier
* guaranteeing that if fqdir->dead is false then
* the hash table destruction will not start until
- * after we unlock. Paired with inet_frags_exit_net().
+ * after we unlock. Paired with fqdir_pre_exit().
*/
- if (!fqdir->dead) {
+ if (!READ_ONCE(fqdir->dead)) {
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
fqdir->f->rhash_params);
refcount_dec(&fq->refcnt);
@@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{
+ /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
+ long high_thresh = READ_ONCE(fqdir->high_thresh);
struct inet_frag_queue *fq = NULL, *prev;
- if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
+ if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
return NULL;
rcu_read_lock();
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb8890f94e..fad803d2d711 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
rcu_read_lock();
- if (qp->q.fqdir->dead)
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(qp->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&qp->q.lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ac2b95c5694..99db2e41ed10 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -604,8 +604,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), key->tos, 0,
- skb->mark, skb_get_hash(skb));
+ tunnel_id_to_key32(key->tun_id),
+ key->tos & ~INET_ECN_MASK, 0, skb->mark,
+ skb_get_hash(skb));
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 57c1d8431386..139cec29ed06 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
@@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
- if (iter.frag)
- ip_options_fragment(iter.frag);
-
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
+ bool first_frag = (iter.offset == 0);
+
IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
+ if (first_frag && IPCB(skb)->opt.optlen) {
+ /* ipcb->opt is not populated for frags
+ * coming from __ip_make_skb(),
+ * ip_options_fragment() needs optlen
+ */
+ IPCB(iter.frag)->opt.optlen =
+ IPCB(skb)->opt.optlen;
+ ip_options_fragment(iter.frag);
+ ip_send_check(iter.iph);
+ }
}
skb->tstamp = tstamp;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index b518f20c9a24..f8e176c77d1c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -776,7 +776,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct clusterip_config *c = PDE_DATA(inode);
+ struct clusterip_config *c = pde_data(inode);
sf->private = c;
@@ -788,7 +788,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct clusterip_config *c = PDE_DATA(inode);
+ struct clusterip_config *c = pde_data(inode);
int ret;
ret = seq_release(inode, file);
@@ -802,7 +802,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
- struct clusterip_config *c = PDE_DATA(file_inode(file));
+ struct clusterip_config *c = pde_data(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
unsigned long nodenum;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 0e56df3a45e2..bcf7bc71cb56 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -220,7 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != inet_sdif(skb))
continue;
sock_hold(sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a53f256bf9d3..9f97b9cbf7b3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
@@ -971,7 +974,7 @@ struct proto raw_prot = {
static struct sock *raw_get_first(struct seq_file *seq)
{
struct sock *sk;
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
@@ -987,7 +990,7 @@ found:
static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
do {
@@ -1016,7 +1019,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(&h->lock)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
read_lock(&h->lock);
return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -1039,7 +1042,7 @@ EXPORT_SYMBOL_GPL(raw_seq_next);
void raw_seq_stop(struct seq_file *seq, void *v)
__releases(&h->lock)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
read_unlock(&h->lock);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b75836db19b..78e81465f5f3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
+ sk_defer_free_flush(sk);
if (spliced)
return spliced;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3f34e366b27..fec656f5a39e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2095,7 +2095,7 @@ process:
nf_reset_ct(skb);
if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_TCP_FILTER;
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data;
@@ -3002,7 +3002,7 @@ static unsigned short seq_file_family(const struct seq_file *seq)
#endif
/* Iterated from proc fs */
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
return afinfo->family;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 464590ea922e..090360939401 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2960,7 +2960,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) {
@@ -2993,7 +2993,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
do {
sk = sk_next(sk);
@@ -3050,7 +3050,7 @@ void udp_seq_stop(struct seq_file *seq, void *v)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
if (state->bucket <= afinfo->udp_table->mask)
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3eee17790a82..f927c199a93c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
__u32 valid_lft, u32 prefered_lft)
{
struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
- int create = 0;
+ int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
@@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
unsigned long now;
u32 stored_lft;
- /* Update lifetime (RFC4862 5.5.3 e)
- * We deviate from RFC4862 by honoring all Valid Lifetimes to
- * improve the reaction of SLAAC to renumbering events
- * (draft-gont-6man-slaac-renum-06, Section 4.2)
- */
+ /* update lifetime (RFC2462 5.5.3 e) */
spin_lock_bh(&ifp->lock);
now = jiffies;
if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
-
if (!create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
ifp->tstamp = now;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 463c37dea449..413f66781e50 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
fn = rcu_dereference_protected(f6i->fib6_node,
lockdep_is_held(&f6i->fib6_table->tb6_lock));
if (fn)
- fn->fn_sernum = fib6_new_sernum(net);
+ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
}
/*
@@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = READ_ONCE(w->root->fn_sernum);
}
} else {
- if (cb->args[5] != w->root->fn_sernum) {
+ int sernum = READ_ONCE(w->root->fn_sernum);
+ if (cb->args[5] != sernum) {
/* Begin at the root if the tree changed */
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = sernum;
w->state = FWS_INIT;
w->node = w->root;
w->skip = w->count;
@@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
/* paired with smp_rmb() in fib6_get_cookie_safe() */
smp_wmb();
while (fn) {
- fn->fn_sernum = sernum;
+ WRITE_ONCE(fn->fn_sernum, sernum);
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
@@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w)
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
- w->node->fn_sernum != c->sernum)
- w->node->fn_sernum = c->sernum;
+ READ_ONCE(w->node->fn_sernum) != c->sernum)
+ WRITE_ONCE(w->node->fn_sernum, c->sernum);
if (!c->func) {
WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
@@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
iter->w.args = iter;
- iter->sernum = iter->w.root->fn_sernum;
+ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
INIT_LIST_HEAD(&iter->w.lh);
fib6_walker_link(net, &iter->w);
}
@@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
{
- if (iter->sernum != iter->w.root->fn_sernum) {
- iter->sernum = iter->w.root->fn_sernum;
+ int sernum = READ_ONCE(iter->w.root->fn_sernum);
+
+ if (iter->sernum != sernum) {
+ iter->sernum = sernum;
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
WARN_ON(iter->w.skip);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fe786df4f849..97ade833f58c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
true, 0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e6de94203c13..f4884cda13b9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (from) {
fn = rcu_dereference(from->fib6_node);
if (fn && (rt->rt6i_flags & RTF_DEFAULT))
- fn->fn_sernum = -1;
+ WRITE_ONCE(fn->fn_sernum, -1);
}
}
rcu_read_unlock();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a618dce7e0bc..c0b138c20992 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
}
- if (rt->rt_type != RTN_UNICAST) {
+ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
ip_rt_put(rt);
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 86ad15abf897..750f9f9b4daf 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -285,7 +285,7 @@ static void __mctp_route_test_init(struct kunit *test,
struct mctp_test_route **rtp,
struct socket **sockp)
{
- struct sockaddr_mctp addr;
+ struct sockaddr_mctp addr = {0};
struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct socket *sock;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 75af1f701e1d..782b1d452269 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
+static struct mptcp_pm_addr_entry *
+__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
+ bool lookup_by_id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
+ (lookup_by_id && entry->addr.id == info->id))
+ return entry;
+ }
+ return NULL;
+}
+
static int
lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
{
@@ -777,7 +791,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (!removed)
continue;
@@ -1763,18 +1777,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
- (lookup_by_id && entry->addr.id == addr.addr.id)) {
- mptcp_nl_addr_backup(net, &entry->addr, bkup);
-
- if (bkup)
- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else
- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
- }
+ spin_lock_bh(&pernet->lock);
+ entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
+ if (!entry) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
}
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ addr = *entry;
+ spin_unlock_bh(&pernet->lock);
+
+ mptcp_nl_addr_backup(net, &addr.addr, bkup);
return 0;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0e6b42c76ea0..85317ce38e3f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
- char reset_start[0];
+ struct_group(reset,
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
@@ -458,7 +458,7 @@ struct mptcp_subflow_context {
long delegated_status;
- char reset_end[0];
+ );
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
@@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
static inline void
mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
- memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
}
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 7121ce2a47c0..78814417d753 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
bitmap = &ncf->bitmap;
spin_lock_irqsave(&nc->lock, flags);
- index = find_next_bit(bitmap, ncf->n_vids, 0);
+ index = find_first_bit(bitmap, ncf->n_vids);
if (index >= ncf->n_vids) {
spin_unlock_irqrestore(&nc->lock, flags);
return -1;
@@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return -1;
}
- index = find_next_zero_bit(bitmap, ncf->n_vids, 0);
+ index = find_first_zero_bit(bitmap, ncf->n_vids);
if (index < 0 || index >= ncf->n_vids) {
netdev_err(ndp->ndev.dev,
"Channel %u already has all VLAN filters set\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 894a325d39f2..d6aa5b47031e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1924,15 +1924,17 @@ repeat:
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_ct_put(ct);
skb->_nfct = 0;
- NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
- NF_CT_STAT_INC_ATOMIC(state->net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
*/
if (ret == -NF_REPEAT)
goto repeat;
+
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+ if (ret == -NF_DROP)
+ NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
ret = -ret;
goto out;
}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 7f19ee259609..55415f011943 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -20,13 +20,14 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#define HELPER_NAME "netbios-ns"
#define NMBD_PORT 137
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_netbios_ns");
-MODULE_ALIAS_NFCT_HELPER("netbios_ns");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
static unsigned int timeout __read_mostly = 3;
module_param(timeout, uint, 0400);
@@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
}
static struct nf_conntrack_helper helper __read_mostly = {
- .name = "netbios-ns",
+ .name = HELPER_NAME,
.tuple.src.l3num = NFPROTO_IPV4,
.tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 77938b1042f3..cf454f8ca2b0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8264,14 +8264,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
void *data, *data_boundary;
struct nft_rule_dp *prule;
struct nft_rule *rule;
- int i;
/* already handled or inactive chain? */
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
- i = 0;
data_size = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
@@ -8301,7 +8299,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
return -ENOMEM;
size = 0;
- track.last = last;
+ track.last = nft_expr_last(rule);
nft_rule_for_each_expr(expr, last, rule) {
track.cur = expr;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 58dcafe8bf79..3362417ebfdb 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
{
bool invert = false;
u32 flags, limit;
+ int err;
if (!tb[NFTA_CONNLIMIT_COUNT])
return -EINVAL;
@@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
priv->limit = limit;
priv->invert = invert;
- return nf_ct_netns_get(ctx->net, ctx->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
+ if (err < 0)
+ goto err_netns;
+
+ return 0;
+err_netns:
+ kfree(priv->list);
+
+ return err;
}
static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
@@ -206,7 +215,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_src = nft_expr_priv(src);
priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
- if (priv_dst->list)
+ if (!priv_dst->list)
return -ENOMEM;
nf_conncount_list_init(priv_dst->list);
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 5ee33d0ccd4e..4f745a409d34 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -106,7 +106,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
- if (priv_dst->last)
+ if (!priv_dst->last)
return -ENOMEM;
return 0;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index f04be5be73a0..c4f308460dd1 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -145,7 +145,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->invert = priv_src->invert;
priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
- if (priv_dst->limit)
+ if (!priv_dst->limit)
return -ENOMEM;
spin_lock_init(&priv_dst->limit->lock);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0484aef74273..f394a0b562f6 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -237,7 +237,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_quota *priv_dst = nft_expr_priv(dst);
priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
- if (priv_dst->consumed)
+ if (!priv_dst->consumed)
return -ENOMEM;
atomic64_set(priv_dst->consumed, 0);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 25524e393349..54a489f16b17 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1517,7 +1517,7 @@ EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
{
- u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u8 af = (unsigned long)pde_data(file_inode(seq->file));
struct net *net = seq_file_net(seq);
struct xt_pernet *xt_net;
@@ -1529,7 +1529,7 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u8 af = (unsigned long)pde_data(file_inode(seq->file));
struct net *net = seq_file_net(seq);
struct xt_pernet *xt_net;
@@ -1540,7 +1540,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void xt_table_seq_stop(struct seq_file *seq, void *v)
{
- u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u_int8_t af = (unsigned long)pde_data(file_inode(seq->file));
mutex_unlock(&xt[af].mutex);
}
@@ -1584,7 +1584,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
[MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE,
};
- uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
+ uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
if (ppos != NULL)
@@ -1633,7 +1633,7 @@ static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
{
- uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
+ uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9c5cfd74a0ee..0859b8f76764 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1052,7 +1052,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
static void *dl_seq_start(struct seq_file *s, loff_t *pos)
__acquires(htable->lock)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket;
spin_lock_bh(&htable->lock);
@@ -1069,7 +1069,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
*pos = ++(*bucket);
@@ -1083,7 +1083,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
if (!IS_ERR(bucket))
@@ -1125,7 +1125,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1140,7 +1140,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1155,7 +1155,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1169,7 +1169,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_show_v2(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
@@ -1183,7 +1183,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v)
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
struct dsthash_ent *ent;
@@ -1197,7 +1197,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
static int dl_seq_show(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
struct dsthash_ent *ent;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0446307516cd..7ddb9a78e3fc 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -551,7 +551,7 @@ static int recent_seq_open(struct inode *inode, struct file *file)
if (st == NULL)
return -ENOMEM;
- st->table = PDE_DATA(inode);
+ st->table = pde_data(inode);
return 0;
}
@@ -559,7 +559,7 @@ static ssize_t
recent_mt_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
- struct recent_table *t = PDE_DATA(file_inode(file));
+ struct recent_table *t = pde_data(file_inode(file));
struct recent_entry *e;
char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
const char *c = buf;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 6cfd30fc0798..0b93a17b9f11 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ if (!llcp_sock->local) {
+ release_sock(sk);
+ return -ENODEV;
+ }
+
if (sk->sk_type == SOCK_DGRAM) {
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5bd409ab4cc2..85ea7ddb48db 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
@@ -3353,6 +3354,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 6be2672a65ea..df864e692267 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct sk_buff *skb;
- unsigned long resend_at, rto_j;
+ unsigned long resend_at;
rxrpc_seq_t cursor, seq, top;
ktime_t now, max_age, oldest, ack_ts;
int ix;
@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- rto_j = call->peer->rto_j;
-
now = ktime_get_real();
- max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
+ max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
spin_lock_bh(&call->lock);
@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rto_j;
+ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 10f2bf2e9068..a45c83f22236 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -468,7 +468,7 @@ done:
if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
ack_lost_at += nowj;
WRITE_ONCE(call->ack_lost_at, ack_lost_at);
rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c9c6f49f9c28..179825a3b2fd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1062,7 +1062,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
qdisc_offload_graft_root(dev, new, old, extack);
- if (new && new->ops->attach)
+ if (new && new->ops->attach && !ingress)
goto skip;
for (i = 0; i < num_q; i++) {
@@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
err = -ENOENT;
if (!ops) {
- NL_SET_ERR_MSG(extack, "Specified qdisc not found");
+ NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b07bd1c7330f..f893d9a81b01 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1529,6 +1529,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
+ r->mpu = conf->mpu;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9267922ea9c3..23a9d6242429 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!hopt->rate.rate || !hopt->ceil.rate)
goto failure;
+ if (q->offload) {
+ /* Options not supported by the offload. */
+ if (hopt->rate.overhead || hopt->ceil.overhead) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
+ goto failure;
+ }
+ if (hopt->rate.mpu || hopt->ceil.mpu) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
+ goto failure;
+ }
+ if (hopt->quantum) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
+ goto failure;
+ }
+ if (hopt->prio) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
+ goto failure;
+ }
+ }
+
/* Keeping backward compatible with rate_table based iproute2 tc */
if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index aa3bcaaeabf7..d5ea62b82bb8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -566,12 +566,17 @@ static void smc_stat_fallback(struct smc_sock *smc)
mutex_unlock(&net->smc.mutex_fback_rsn);
}
-static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
- wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
+ wait_queue_head_t *clc_wait;
unsigned long flags;
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -586,18 +591,30 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
* smc socket->wq, which should be removed
* to clcsocket->wq during the fallback.
*/
+ clc_wait = sk_sleep(smc->clcsock->sk);
spin_lock_irqsave(&smc_wait->lock, flags);
spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
list_splice_init(&smc_wait->head, &clc_wait->head);
spin_unlock(&clc_wait->lock);
spin_unlock_irqrestore(&smc_wait->lock, flags);
}
+ mutex_unlock(&smc->clcsock_release_lock);
+ return 0;
}
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc_switch_to_fallback(smc, reason_code);
+ struct net *net = sock_net(&smc->sk);
+ int rc = 0;
+
+ rc = smc_switch_to_fallback(smc, reason_code);
+ if (rc) { /* fallback fails */
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
+ return rc;
+ }
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
@@ -634,9 +651,13 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first)
{
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
+ bool lgr_valid = false;
+
+ if (smc_conn_lgr_valid(conn))
+ lgr_valid = true;
smc_conn_free(conn);
- if (local_first)
+ if (local_first && lgr_valid)
smc_lgr_cleanup_early(lgr);
}
@@ -1514,11 +1535,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
smc_conn_abort(new_smc, local_first);
- if (reason_code < 0) { /* error, no fallback possible */
+ if (reason_code < 0 ||
+ smc_switch_to_fallback(new_smc, reason_code)) {
+ /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_switch_to_fallback(new_smc, reason_code);
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
@@ -1960,8 +1982,11 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
- smc_listen_out_connected(new_smc);
+ rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
+ if (rc)
+ smc_listen_out_err(new_smc);
+ else
+ smc_listen_out_connected(new_smc);
return;
}
@@ -2250,7 +2275,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ if (rc)
+ goto out;
} else {
rc = -EINVAL;
goto out;
@@ -2443,6 +2470,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
if (unlikely(!smc->clcsock->ops->setsockopt))
rc = -EOPNOTSUPP;
else
@@ -2452,6 +2484,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk_error_report(sk);
}
+ mutex_unlock(&smc->clcsock_release_lock);
if (optlen < sizeof(int))
return -EINVAL;
@@ -2468,7 +2501,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
}
@@ -2511,13 +2544,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct smc_sock *smc;
+ int rc;
smc = smc_sk(sock->sk);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
/* socket options apply to the CLC socket */
- if (unlikely(!smc->clcsock->ops->getsockopt))
+ if (unlikely(!smc->clcsock->ops->getsockopt)) {
+ mutex_unlock(&smc->clcsock_release_lock);
return -EOPNOTSUPP;
- return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ }
+ rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
+ optval, optlen);
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
static int smc_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1a4fc1c6c4ab..3d0b8e300deb 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -221,6 +221,7 @@ struct smc_connection {
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
+ u8 freed : 1; /* normal termiation */
u8 out_of_sync : 1; /* out of sync with peer */
};
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 84c8a4374fdd..9d5a97168969 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -197,7 +197,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
int rc;
- if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
+ if (!smc_conn_lgr_valid(conn) ||
+ (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
return -EPIPE;
if (conn->lgr->is_smcd) {
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 6be95a2a7b25..ce27399b38b1 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -774,7 +774,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
SMC_FIRST_CONTACT_MASK : 0;
- if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
+ if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) &&
smc_ib_is_valid_local_systemid())
memcpy(dclc.id_for_peer, local_systemid,
sizeof(local_systemid));
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8935ef4811b0..29525d03b253 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -211,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- if (!lgr)
+ if (!smc_conn_lgr_valid(conn))
return;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- conn->lgr = NULL;
}
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
@@ -749,9 +748,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
}
get_device(&lnk->smcibdev->ibdev->dev);
atomic_inc(&lnk->smcibdev->lnk_cnt);
+ refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
+ lnk->clearing = 0;
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
+ smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
lnk->link_idx = link_idx;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
@@ -806,6 +808,7 @@ out:
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
+ smc_lgr_put(lgr); /* lgr_hold above */
return rc;
}
@@ -844,6 +847,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->terminating = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
+ refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
mutex_init(&lgr->sndbufs_lock);
mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
@@ -996,8 +1000,12 @@ void smc_switch_link_and_count(struct smc_connection *conn,
struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
+ /* link_hold in smc_conn_create() */
+ smcr_link_put(conn->lnk);
conn->lnk = to_lnk;
atomic_inc(&conn->lnk->conn_cnt);
+ /* link_put in smc_conn_free() */
+ smcr_link_hold(conn->lnk);
}
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
@@ -1130,8 +1138,19 @@ void smc_conn_free(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- if (!lgr)
+ if (!lgr || conn->freed)
+ /* Connection has never been registered in a
+ * link group, or has already been freed.
+ */
return;
+
+ conn->freed = 1;
+ if (!smc_conn_lgr_valid(conn))
+ /* Connection has already unregistered from
+ * link group.
+ */
+ goto lgr_put;
+
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
@@ -1148,6 +1167,10 @@ void smc_conn_free(struct smc_connection *conn)
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
+lgr_put:
+ if (!lgr->is_smcd)
+ smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
+ smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
}
/* unregister a link from a buf_desc */
@@ -1203,13 +1226,29 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
}
}
-/* must be called under lgr->llc_conf_mutex lock */
-void smcr_link_clear(struct smc_link *lnk, bool log)
+static void __smcr_link_clear(struct smc_link *lnk)
{
+ struct smc_link_group *lgr = lnk->lgr;
struct smc_ib_device *smcibdev;
- if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+ smc_wr_free_link_mem(lnk);
+ smc_ibdev_cnt_dec(lnk);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
+ smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk, bool log)
+{
+ if (!lnk->lgr || lnk->clearing ||
+ lnk->state == SMC_LNK_UNUSED)
return;
+ lnk->clearing = 1;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk, log);
smcr_buf_unmap_lgr(lnk);
@@ -1218,14 +1257,18 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
- smc_wr_free_link_mem(lnk);
- smc_ibdev_cnt_dec(lnk);
- put_device(&lnk->smcibdev->ibdev->dev);
- smcibdev = lnk->smcibdev;
- memset(lnk, 0, sizeof(struct smc_link));
- lnk->state = SMC_LNK_UNUSED;
- if (!atomic_dec_return(&smcibdev->lnk_cnt))
- wake_up(&smcibdev->lnks_deleted);
+ smcr_link_put(lnk); /* theoretically last link_put */
+}
+
+void smcr_link_hold(struct smc_link *lnk)
+{
+ refcount_inc(&lnk->refcnt);
+}
+
+void smcr_link_put(struct smc_link *lnk)
+{
+ if (refcount_dec_and_test(&lnk->refcnt))
+ __smcr_link_clear(lnk);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1290,6 +1333,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
__smc_lgr_free_bufs(lgr, true);
}
+/* won't be freed until no one accesses to lgr anymore */
+static void __smc_lgr_free(struct smc_link_group *lgr)
+{
+ smc_lgr_free_bufs(lgr);
+ if (lgr->is_smcd) {
+ if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
+ wake_up(&lgr->smcd->lgrs_deleted);
+ } else {
+ smc_wr_free_lgr_mem(lgr);
+ if (!atomic_dec_return(&lgr_cnt))
+ wake_up(&lgrs_deleted);
+ }
+ kfree(lgr);
+}
+
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
@@ -1305,19 +1363,23 @@ static void smc_lgr_free(struct smc_link_group *lgr)
smc_llc_lgr_clear(lgr);
}
- smc_lgr_free_bufs(lgr);
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
- if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
- wake_up(&lgr->smcd->lgrs_deleted);
- } else {
- smc_wr_free_lgr_mem(lgr);
- if (!atomic_dec_return(&lgr_cnt))
- wake_up(&lgrs_deleted);
}
- kfree(lgr);
+ smc_lgr_put(lgr); /* theoretically last lgr_put */
+}
+
+void smc_lgr_hold(struct smc_link_group *lgr)
+{
+ refcount_inc(&lgr->refcnt);
+}
+
+void smc_lgr_put(struct smc_link_group *lgr)
+{
+ if (refcount_dec_and_test(&lgr->refcnt))
+ __smc_lgr_free(lgr);
}
static void smc_sk_wake_ups(struct smc_sock *smc)
@@ -1469,16 +1531,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
/* Called when an SMCR device is removed or the smc module is unloaded.
* If smcibdev is given, all SMCR link groups using this device are terminated.
* If smcibdev is NULL, all SMCR link groups are terminated.
- *
- * We must wait here for QPs been destroyed before we destroy the CQs,
- * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
- * smc_sock cannot be released.
*/
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
- LIST_HEAD(lgr_linkdown_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
@@ -1490,7 +1547,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev)
- list_move_tail(&lgr->list, &lgr_linkdown_list);
+ smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
@@ -1502,16 +1559,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
__smc_lgr_terminate(lgr, false);
}
- list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].smcibdev == smcibdev) {
- mutex_lock(&lgr->llc_conf_mutex);
- smcr_link_down_cond(&lgr->lnk[i]);
- mutex_unlock(&lgr->llc_conf_mutex);
- }
- }
- }
-
if (smcibdev) {
if (atomic_read(&smcibdev->lnk_cnt))
wait_event(smcibdev->lnks_deleted,
@@ -1856,6 +1903,10 @@ create:
goto out;
}
}
+ smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
+ if (!conn->lgr->is_smcd)
+ smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
+ conn->freed = 0;
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
@@ -2240,14 +2291,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+ !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+ !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
@@ -2256,7 +2309,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
int i;
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&conn->lgr->lnk[i]))
@@ -2270,7 +2323,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
int i;
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&conn->lgr->lnk[i]))
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 521c64a3d8d3..4cb03e942364 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -137,6 +137,8 @@ struct smc_link {
u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
u8 link_idx; /* index in lgr link array */
u8 link_is_asym; /* is link asymmetric? */
+ u8 clearing : 1; /* link is being cleared */
+ refcount_t refcnt; /* link reference count */
struct smc_link_group *lgr; /* parent link group */
struct work_struct link_down_wrk; /* wrk to bring link down */
char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */
@@ -249,6 +251,7 @@ struct smc_link_group {
u8 terminating : 1;/* lgr is terminating */
u8 freeing : 1; /* lgr is being freed */
+ refcount_t refcnt; /* lgr reference count */
bool is_smcd; /* SMC-R or SMC-D */
u8 smc_version;
u8 negotiated_eid[SMC_MAX_EID_LEN];
@@ -409,6 +412,11 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+static inline bool smc_conn_lgr_valid(struct smc_connection *conn)
+{
+ return conn->lgr && conn->alert_token_local;
+}
+
/*
* Returns true if the specified link is usable.
*
@@ -487,6 +495,8 @@ struct smc_clc_msg_accept_confirm;
void smc_lgr_cleanup_early(struct smc_link_group *lgr);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
+void smc_lgr_hold(struct smc_link_group *lgr);
+void smc_lgr_put(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -518,6 +528,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smcr_link_hold(struct smc_link *lnk);
+void smcr_link_put(struct smc_link *lnk);
void smc_switch_link_and_count(struct smc_connection *conn,
struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 7c8dad28c18d..b8898c787d23 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
r->diag_state = sk->sk_state;
if (smc->use_fallback)
r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
- else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
+ else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd)
r->diag_mode = SMC_DIAG_MODE_SMCD;
else
r->diag_mode = SMC_DIAG_MODE_SMCR;
@@ -142,7 +142,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+ if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_link *link = smc->conn.lnk;
@@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
}
- if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+ if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index db9825c01e0a..291f1484a1b7 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -369,7 +369,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
new_pe->ndev = ndev;
- netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
+ if (ndev)
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
write_lock(&pnettable->lock);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 47512ccce5ef..a54e90a1110f 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -125,10 +125,6 @@ int smc_wr_tx_v2_send(struct smc_link *link,
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter,
- smc_wr_tx_dismisser dismisser,
- unsigned long data);
void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index fe97f3106536..4a4082bb22ad 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
if (ret)
return ret;
- if (!ret) {
- *buf_in = buf;
- *body_size = toksize;
- }
+ *buf_in = buf;
+ *body_size = toksize;
return ret;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b87565b64928..c2ba9d4cd2c7 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net)
static ssize_t write_gssp(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
char tbuf[20];
unsigned long i;
int res;
@@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf,
static ssize_t read_gssp(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
unsigned long p = *ppos;
char tbuf[10];
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 59641803472c..bb1177395b99 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1536,7 +1536,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_read(filp, buf, count, ppos, cd);
}
@@ -1544,14 +1544,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_write(filp, buf, count, ppos, cd);
}
static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_poll(filp, wait, cd);
}
@@ -1560,21 +1560,21 @@ static long cache_ioctl_procfs(struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_ioctl(inode, filp, cmd, arg, cd);
}
static int cache_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_open(inode, filp, cd);
}
static int cache_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_release(inode, filp, cd);
}
@@ -1591,14 +1591,14 @@ static const struct proc_ops cache_channel_proc_ops = {
static int content_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_open(inode, filp, cd);
}
static int content_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_release(inode, filp, cd);
}
@@ -1612,14 +1612,14 @@ static const struct proc_ops content_proc_ops = {
static int open_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return open_flush(inode, filp, cd);
}
static int release_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return release_flush(inode, filp, cd);
}
@@ -1627,7 +1627,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return read_flush(filp, buf, count, ppos, cd);
}
@@ -1636,7 +1636,7 @@ static ssize_t write_flush_procfs(struct file *filp,
const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return write_flush(filp, buf, count, ppos, cd);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a312ea2bc440..c83fe618767c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2900,7 +2900,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport, reuseport;
- int ret = 0;
+ int ret = 0, ident;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
@@ -2914,8 +2914,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
+ ident = xprt->xprt_class->ident;
rcu_read_unlock();
+ if (!xprtargs->ident)
+ xprtargs->ident = ident;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ee5336d73fdd..35588f0afa86 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_unlink(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_unlink(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index c964b48eaaba..52908f9e6eab 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -66,7 +66,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
static int rpc_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rpc_proc_show, PDE_DATA(inode));
+ return single_open(file, rpc_proc_show, pde_data(inode));
}
static const struct proc_ops rpc_proc_ops = {
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 2766dd21935b..b64a0286b182 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -295,8 +295,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
online = 1;
else if (!strncmp(buf, "remove", 6))
remove = 1;
- else
- return -EINVAL;
+ else {
+ count = -EINVAL;
+ goto out_put;
+ }
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
count = -EINTR;
@@ -307,25 +309,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
goto release_tasks;
}
if (offline) {
- set_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive--;
- spin_unlock(&xps->xps_lock);
+ if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive--;
+ spin_unlock(&xps->xps_lock);
+ }
} else if (online) {
- clear_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive++;
- spin_unlock(&xps->xps_lock);
+ if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive++;
+ spin_unlock(&xps->xps_lock);
+ }
} else if (remove) {
if (test_bit(XPRT_OFFLINE, &xprt->state)) {
- set_bit(XPRT_REMOVE, &xprt->state);
- xprt_force_disconnect(xprt);
- if (test_bit(XPRT_CONNECTED, &xprt->state)) {
- if (!xprt->sending.qlen &&
- !xprt->pending.qlen &&
- !xprt->backlog.qlen &&
- !atomic_long_read(&xprt->queuelen))
- rpc_xprt_switch_remove_xprt(xps, xprt);
+ if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) {
+ xprt_force_disconnect(xprt);
+ if (test_bit(XPRT_CONNECTED, &xprt->state)) {
+ if (!xprt->sending.qlen &&
+ !xprt->pending.qlen &&
+ !xprt->backlog.qlen &&
+ !atomic_long_read(&xprt->queuelen))
+ rpc_xprt_switch_remove_xprt(xps, xprt);
+ }
}
} else {
count = -EINVAL;
@@ -422,6 +427,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_change_state.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt);
static struct kobj_attribute rpc_sysfs_xprt_switch_info =
__ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
@@ -430,6 +436,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
&rpc_sysfs_xprt_switch_info.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch);
static struct kobj_type rpc_sysfs_client_type = {
.release = rpc_sysfs_client_release,
@@ -439,14 +446,14 @@ static struct kobj_type rpc_sysfs_client_type = {
static struct kobj_type rpc_sysfs_xprt_switch_type = {
.release = rpc_sysfs_xprt_switch_release,
- .default_attrs = rpc_sysfs_xprt_switch_attrs,
+ .default_groups = rpc_sysfs_xprt_switch_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_switch_namespace,
};
static struct kobj_type rpc_sysfs_xprt_type = {
.release = rpc_sysfs_xprt_release,
- .default_attrs = rpc_sysfs_xprt_attrs,
+ .default_groups = rpc_sysfs_xprt_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_namespace,
};
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 17f174d6ea3b..faba7136dd9a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -13,10 +13,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
#undef RPCRDMA_BACKCHANNEL_DEBUG
/**
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ff699307e820..515dd7a66a04 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -45,10 +45,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_mr *mr)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8035a983c8ce..281ddb87ac8d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -54,10 +54,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 16e5696314a4..42e375dbdadb 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -60,10 +60,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/*
* tunables
*/
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3d3673ba9e1e..f172d1298013 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -63,17 +63,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-/*
- * Globals/Macros
- */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
-/*
- * internal functions
- */
static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
@@ -274,8 +263,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_connect_status = -ENETUNREACH;
goto wake_connect_worker;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pISpc rejected: %s\n",
- sap, rdma_reject_msg(id, event->status));
ep->re_connect_status = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
ep->re_connect_status = -ENOTCONN;
@@ -291,8 +278,6 @@ disconnected:
break;
}
- dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap,
- ep->re_id->device->name, rdma_event_msg(event->event));
return 0;
}
@@ -419,14 +404,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_attr.qp_type = IB_QPT_RC;
ep->re_attr.port_num = ~0;
- dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
- "iovs: send %d recv %d\n",
- __func__,
- ep->re_attr.cap.max_send_wr,
- ep->re_attr.cap.max_recv_wr,
- ep->re_attr.cap.max_send_sge,
- ep->re_attr.cap.max_recv_sge);
-
ep->re_send_batch = ep->re_max_requests >> 3;
ep->re_send_count = ep->re_send_batch;
init_waitqueue_head(&ep->re_connect_wait);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d8ee06a9650a..69b6ee5a5fd1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1910,7 +1910,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
- if (RPC_IS_ASYNC(task)) {
+ if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
* filesystem namespace of the process making the rpc
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 95e774f1b91f..efc84845bb6b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2059,6 +2059,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
splice_read_end:
release_sock(sk);
+ sk_defer_free_flush(sk);
return copied ? : err;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 12e2ddaf887f..d45d5366115a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -192,8 +192,11 @@ void wait_for_unix_gc(void)
{
/* If number of inflight sockets is insane,
* force a garbage collect right now.
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight() and gc_in_progress().
*/
- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
@@ -213,7 +216,9 @@ void unix_gc(void)
if (gc_in_progress)
goto out;
- gc_in_progress = true;
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, true);
+
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -299,7 +304,10 @@ void unix_gc(void)
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
- gc_in_progress = false;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, false);
+
wake_up(&unix_gc_wait);
out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 052ae709ce28..aa27a02478dc 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp)
} else {
BUG_ON(list_empty(&u->link));
}
- unix_tot_inflight++;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
user->unix_inflight++;
spin_unlock(&unix_gc_lock);
@@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
- unix_tot_inflight--;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
user->unix_inflight--;
spin_unlock(&unix_gc_lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dccb8f3318ef..04d1ce9b510f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -31,6 +31,7 @@
#include <linux/if_tunnel.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/gre.h>
@@ -3295,7 +3296,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
fl4->flowi4_proto = iph->protocol;
fl4->daddr = reverse ? iph->saddr : iph->daddr;
fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
+ fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index 4866afd054da..eb4d94742e6b 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -113,11 +113,11 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
index f6d593e47037..8fdd2c9c56b2 100644
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -6,6 +6,7 @@
*/
#include <linux/version.h>
#include <linux/ptrace.h>
+#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -22,17 +23,17 @@ int prog(struct pt_regs *ctx)
{
struct signal_struct *signal;
struct task_struct *tsk;
- char oldcomm[16] = {};
- char newcomm[16] = {};
+ char oldcomm[TASK_COMM_LEN] = {};
+ char newcomm[TASK_COMM_LEN] = {};
u16 oom_score_adj;
u32 pid;
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
- bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read_kernel(newcomm, sizeof(newcomm),
- (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm);
+ bpf_probe_read_kernel_str(newcomm, sizeof(newcomm),
+ (void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
index eaa32693f8fc..80edadacb692 100644
--- a/samples/bpf/test_overhead_tp_kern.c
+++ b/samples/bpf/test_overhead_tp_kern.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
@@ -11,8 +12,8 @@
struct task_rename {
__u64 pad;
__u32 pid;
- char oldcomm[16];
- char newcomm[16];
+ char oldcomm[TASK_COMM_LEN];
+ char newcomm[TASK_COMM_LEN];
__u16 oom_score_adj;
};
SEC("tracepoint/task/task_rename")
diff --git a/scripts/Makefile b/scripts/Makefile
index ecd3acacd0ec..ce5aa9030b74 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -25,7 +25,7 @@ HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include
HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED
endif
-ifdef CONFIG_DYNAMIC_FTRACE
+ifdef CONFIG_BUILDTIME_MCOUNT_SORT
HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED
endif
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 9e2092fd5206..7099c603ff0a 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -8,7 +8,6 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS) += -fsanitize=local-bounds
ubsan-cflags-$(CONFIG_UBSAN_SHIFT) += -fsanitize=shift
ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO) += -fsanitize=integer-divide-by-zero
ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE) += -fsanitize=unreachable
-ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE) += -fsanitize=object-size
ubsan-cflags-$(CONFIG_UBSAN_BOOL) += -fsanitize=bool
ubsan-cflags-$(CONFIG_UBSAN_ENUM) += -fsanitize=enum
ubsan-cflags-$(CONFIG_UBSAN_TRAP) += -fsanitize-undefined-trap-on-error
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 1784921c645d..b01c36a15d9d 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3172,7 +3172,7 @@ sub process {
length($line) > 75 &&
!($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
# file delta changes
- $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
+ $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ ||
# filename then :
$line =~ /^\s*(?:Fixes:|Link:|$signature_tags)/i ||
# A Fixes: or Link: line or signature tag line
@@ -3479,47 +3479,47 @@ sub process {
# Kconfig supports named choices), so use a word boundary
# (\b) rather than a whitespace character (\s)
$line =~ /^\+\s*(?:config|menuconfig|choice)\b/) {
- my $length = 0;
- my $cnt = $realcnt;
- my $ln = $linenr + 1;
- my $f;
- my $is_start = 0;
- my $is_end = 0;
- for (; $cnt > 0 && defined $lines[$ln - 1]; $ln++) {
- $f = $lines[$ln - 1];
- $cnt-- if ($lines[$ln - 1] !~ /^-/);
- $is_end = $lines[$ln - 1] =~ /^\+/;
+ my $ln = $linenr;
+ my $needs_help = 0;
+ my $has_help = 0;
+ my $help_length = 0;
+ while (defined $lines[$ln]) {
+ my $f = $lines[$ln++];
next if ($f =~ /^-/);
- last if (!$file && $f =~ /^\@\@/);
+ last if ($f !~ /^[\+ ]/); # !patch context
- if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) {
- $is_start = 1;
- } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
- $length = -1;
+ if ($f =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) {
+ $needs_help = 1;
+ next;
+ }
+ if ($f =~ /^\+\s*help\s*$/) {
+ $has_help = 1;
+ next;
}
- $f =~ s/^.//;
- $f =~ s/#.*//;
- $f =~ s/^\s+//;
- next if ($f =~ /^$/);
+ $f =~ s/^.//; # strip patch context [+ ]
+ $f =~ s/#.*//; # strip # directives
+ $f =~ s/^\s+//; # strip leading blanks
+ next if ($f =~ /^$/); # skip blank lines
+ # At the end of this Kconfig block:
# This only checks context lines in the patch
# and so hopefully shouldn't trigger false
# positives, even though some of these are
# common words in help texts
- if ($f =~ /^\s*(?:config|menuconfig|choice|endchoice|
- if|endif|menu|endmenu|source)\b/x) {
- $is_end = 1;
+ if ($f =~ /^(?:config|menuconfig|choice|endchoice|
+ if|endif|menu|endmenu|source)\b/x) {
last;
}
- $length++;
+ $help_length++ if ($has_help);
}
- if ($is_start && $is_end && $length < $min_conf_desc_length) {
+ if ($needs_help &&
+ $help_length < $min_conf_desc_length) {
+ my $stat_real = get_stat_real($linenr, $ln - 1);
WARN("CONFIG_DESCRIPTION",
- "please write a paragraph that describes the config symbol fully\n" . $herecurr);
+ "please write a help paragraph that fully describes the config symbol\n" . "$here\n$stat_real\n");
}
- #print "is_start<$is_start> is_end<$is_end> length<$length>\n";
}
# check MAINTAINERS entries
diff --git a/scripts/const_structs.checkpatch b/scripts/const_structs.checkpatch
index 3980985205a0..1eeb7b42c5b9 100644
--- a/scripts/const_structs.checkpatch
+++ b/scripts/const_structs.checkpatch
@@ -12,19 +12,27 @@ driver_info
drm_connector_funcs
drm_encoder_funcs
drm_encoder_helper_funcs
+dvb_frontend_ops
+dvb_tuner_ops
ethtool_ops
extent_io_ops
+fb_ops
file_lock_operations
file_operations
hv_ops
+hwmon_ops
+ib_device_ops
ide_dma_ops
ide_port_ops
+ieee80211_ops
+iio_buffer_setup_ops
inode_operations
intel_dvo_dev_ops
irq_domain_ops
item_operations
iwl_cfg
iwl_ops
+kernel_param_ops
kgdb_arch
kgdb_io
kset_uevent_ops
@@ -32,25 +40,33 @@ lock_manager_operations
machine_desc
microcode_ops
mlxsw_reg_info
+mtd_ooblayout_ops
mtrr_ops
+nand_controller_ops
neigh_ops
net_device_ops
+nft_expr_ops
nlmsvc_binding
nvkm_device_chip
of_device_id
pci_raw_ops
phy_ops
+pinconf_ops
pinctrl_ops
pinmux_ops
pipe_buf_operations
platform_hibernation_ops
platform_suspend_ops
+proc_ops
proto_ops
+pwm_ops
regmap_access_table
regulator_ops
+reset_control_ops
rpc_pipe_ops
rtc_class_ops
sd_desc
+sdhci_ops
seq_operations
sirfsoc_padmux
snd_ac97_build_ops
@@ -67,6 +83,13 @@ uart_ops
usb_mon_operations
v4l2_ctrl_ops
v4l2_ioctl_ops
+v4l2_subdev_core_ops
+v4l2_subdev_internal_ops
+v4l2_subdev_ops
+v4l2_subdev_pad_ops
+v4l2_subdev_video_ops
+vb2_ops
vm_operations_struct
wacom_features
+watchdog_ops
wd_ops
diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff
index d3422ee15e30..f2bbde4bba86 100755
--- a/scripts/dtc/dtx_diff
+++ b/scripts/dtc/dtx_diff
@@ -59,12 +59,8 @@ Otherwise DTx is treated as a dts source file (aka .dts).
or '/include/' to be processed.
If DTx_1 and DTx_2 are in different architectures, then this script
- may not work since \${ARCH} is part of the include path. Two possible
- workarounds:
-
- `basename $0` \\
- <(ARCH=arch_of_dtx_1 `basename $0` DTx_1) \\
- <(ARCH=arch_of_dtx_2 `basename $0` DTx_2)
+ may not work since \${ARCH} is part of the include path. The following
+ workaround can be used:
`basename $0` ARCH=arch_of_dtx_1 DTx_1 >tmp_dtx_1.dts
`basename $0` ARCH=arch_of_dtx_2 DTx_2 >tmp_dtx_2.dts
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 2075db0c08b8..6bd5221d37b8 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1718,7 +1718,7 @@ sub vcs_exists {
%VCS_cmds = %VCS_cmds_hg;
return 2 if eval $VCS_cmds{"available"};
%VCS_cmds = ();
- if (!$printed_novcs) {
+ if (!$printed_novcs && $email_git) {
warn("$P: No supported VCS found. Add --nogit to options?\n");
warn("Using a git repository produces better results.\n");
warn("Try Linus Torvalds' latest git repository using:\n");
diff --git a/security/security.c b/security/security.c
index 3d4eb474f35b..22261d79f333 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1048,8 +1048,19 @@ int security_dentry_init_security(struct dentry *dentry, int mode,
const char **xattr_name, void **ctx,
u32 *ctxlen)
{
- return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode,
- name, xattr_name, ctx, ctxlen);
+ struct security_hook_list *hp;
+ int rc;
+
+ /*
+ * Only one module will provide a security context.
+ */
+ hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) {
+ rc = hp->hook.dentry_init_security(dentry, mode, name,
+ xattr_name, ctx, ctxlen);
+ if (rc != LSM_RET_DEFAULT(dentry_init_security))
+ return rc;
+ }
+ return LSM_RET_DEFAULT(dentry_init_security);
}
EXPORT_SYMBOL(security_dentry_init_security);
diff --git a/sound/core/info.c b/sound/core/info.c
index a451b24199c3..782fba87cc04 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -234,7 +234,7 @@ static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma)
static int snd_info_entry_open(struct inode *inode, struct file *file)
{
- struct snd_info_entry *entry = PDE_DATA(inode);
+ struct snd_info_entry *entry = pde_data(inode);
struct snd_info_private_data *data;
int mode, err;
@@ -365,7 +365,7 @@ static int snd_info_seq_show(struct seq_file *seq, void *p)
static int snd_info_text_entry_open(struct inode *inode, struct file *file)
{
- struct snd_info_entry *entry = PDE_DATA(inode);
+ struct snd_info_entry *entry = pde_data(inode);
struct snd_info_private_data *data;
int err;
diff --git a/sound/core/init.c b/sound/core/init.c
index ac335f5906c6..31ba7024e3ad 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -1111,29 +1111,14 @@ EXPORT_SYMBOL(snd_card_file_remove);
*/
int snd_power_ref_and_wait(struct snd_card *card)
{
- wait_queue_entry_t wait;
- int result = 0;
-
snd_power_ref(card);
- /* fastpath */
if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
return 0;
- init_waitqueue_entry(&wait, current);
- add_wait_queue(&card->power_sleep, &wait);
- while (1) {
- if (card->shutdown) {
- result = -ENODEV;
- break;
- }
- if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
- break;
- snd_power_unref(card);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(30 * HZ);
- snd_power_ref(card);
- }
- remove_wait_queue(&card->power_sleep, &wait);
- return result;
+ wait_event_cmd(card->power_sleep,
+ card->shutdown ||
+ snd_power_get_state(card) == SNDRV_CTL_POWER_D0,
+ snd_power_unref(card), snd_power_ref(card));
+ return card->shutdown ? -ENODEV : 0;
}
EXPORT_SYMBOL_GPL(snd_power_ref_and_wait);
diff --git a/sound/core/misc.c b/sound/core/misc.c
index 3579dd7a161f..50e4aaa6270d 100644
--- a/sound/core/misc.c
+++ b/sound/core/misc.c
@@ -112,7 +112,7 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device,
{
const struct snd_pci_quirk *q;
- for (q = list; q->subvendor; q++) {
+ for (q = list; q->subvendor || q->subdevice; q++) {
if (q->subvendor != vendor)
continue;
if (!q->subdevice ||
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index 30b40d865863..718595380868 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
//
-// cs35l41.c -- CS35l41 ALSA HDA audio driver
+// CS35l41 ALSA HDA audio driver
//
// Copyright 2021 Cirrus Logic, Inc.
//
@@ -17,19 +17,19 @@
#include "cs35l41_hda.h"
static const struct reg_sequence cs35l41_hda_config[] = {
- { CS35L41_PLL_CLK_CTRL, 0x00000430 }, //3200000Hz, BCLK Input, PLL_REFCLK_EN = 1
- { CS35L41_GLOBAL_CLK_CTRL, 0x00000003 }, //GLOBAL_FS = 48 kHz
- { CS35L41_SP_ENABLES, 0x00010000 }, //ASP_RX1_EN = 1
- { CS35L41_SP_RATE_CTRL, 0x00000021 }, //ASP_BCLK_FREQ = 3.072 MHz
- { CS35L41_SP_FORMAT, 0x20200200 }, //24 bits, I2S, BCLK Slave, FSYNC Slave
- { CS35L41_DAC_PCM1_SRC, 0x00000008 }, //DACPCM1_SRC = ASPRX1
- { CS35L41_AMP_DIG_VOL_CTRL, 0x00000000 }, //AMP_VOL_PCM 0.0 dB
- { CS35L41_AMP_GAIN_CTRL, 0x00000084 }, //AMP_GAIN_PCM 4.5 dB
- { CS35L41_PWR_CTRL2, 0x00000001 }, //AMP_EN = 1
+ { CS35L41_PLL_CLK_CTRL, 0x00000430 }, // 3200000Hz, BCLK Input, PLL_REFCLK_EN = 1
+ { CS35L41_GLOBAL_CLK_CTRL, 0x00000003 }, // GLOBAL_FS = 48 kHz
+ { CS35L41_SP_ENABLES, 0x00010000 }, // ASP_RX1_EN = 1
+ { CS35L41_SP_RATE_CTRL, 0x00000021 }, // ASP_BCLK_FREQ = 3.072 MHz
+ { CS35L41_SP_FORMAT, 0x20200200 }, // 24 bits, I2S, BCLK Slave, FSYNC Slave
+ { CS35L41_DAC_PCM1_SRC, 0x00000008 }, // DACPCM1_SRC = ASPRX1
+ { CS35L41_AMP_DIG_VOL_CTRL, 0x00000000 }, // AMP_VOL_PCM 0.0 dB
+ { CS35L41_AMP_GAIN_CTRL, 0x00000084 }, // AMP_GAIN_PCM 4.5 dB
+ { CS35L41_PWR_CTRL2, 0x00000001 }, // AMP_EN = 1
};
static const struct reg_sequence cs35l41_hda_start_bst[] = {
- { CS35L41_PWR_CTRL2, 0x00000021 }, //BST_EN = 10, AMP_EN = 1
+ { CS35L41_PWR_CTRL2, 0x00000021 }, // BST_EN = 10, AMP_EN = 1
{ CS35L41_PWR_CTRL1, 0x00000001, 3000}, // set GLOBAL_EN = 1
};
@@ -60,7 +60,7 @@ static const struct reg_sequence cs35l41_stop_ext_vspk[] = {
{ 0x00000040, 0x00000055 },
{ 0x00000040, 0x000000AA },
{ 0x00007438, 0x00585941 },
- { 0x00002014, 0x00000000, 3000}, //set GLOBAL_EN = 0
+ { 0x00002014, 0x00000000, 3000}, // set GLOBAL_EN = 0
{ 0x0000742C, 0x00000009 },
{ 0x00007438, 0x00580941 },
{ 0x00011008, 0x00000001 },
@@ -78,7 +78,7 @@ static const struct reg_sequence cs35l41_safe_to_active[] = {
{ 0x0000742C, 0x0000000F },
{ 0x0000742C, 0x00000079 },
{ 0x00007438, 0x00585941 },
- { CS35L41_PWR_CTRL1, 0x00000001, 2000 }, //GLOBAL_EN = 1
+ { CS35L41_PWR_CTRL1, 0x00000001, 2000 }, // GLOBAL_EN = 1
{ 0x0000742C, 0x000000F9 },
{ 0x00007438, 0x00580941 },
{ 0x00000040, 0x000000CC },
@@ -89,8 +89,8 @@ static const struct reg_sequence cs35l41_active_to_safe[] = {
{ 0x00000040, 0x00000055 },
{ 0x00000040, 0x000000AA },
{ 0x00007438, 0x00585941 },
- { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, //AMP_VOL_PCM Mute
- { CS35L41_PWR_CTRL2, 0x00000000 }, //AMP_EN = 0
+ { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, // AMP_VOL_PCM Mute
+ { CS35L41_PWR_CTRL2, 0x00000000 }, // AMP_EN = 0
{ CS35L41_PWR_CTRL1, 0x00000000 },
{ 0x0000742C, 0x00000009, 2000 },
{ 0x00007438, 0x00580941 },
@@ -161,11 +161,13 @@ static void cs35l41_hda_playback_hook(struct device *dev, int action)
if (reg_seq->close)
ret = regmap_multi_reg_write(reg, reg_seq->close, reg_seq->num_close);
break;
+ default:
+ ret = -EINVAL;
+ break;
}
if (ret)
dev_warn(cs35l41->dev, "Failed to apply multi reg write: %d\n", ret);
-
}
static int cs35l41_hda_channel_map(struct device *dev, unsigned int tx_num, unsigned int *tx_slot,
@@ -182,20 +184,19 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas
struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
struct hda_component *comps = master_data;
- if (comps && cs35l41->index >= 0 && cs35l41->index < HDA_MAX_COMPONENTS)
- comps = &comps[cs35l41->index];
- else
+ if (!comps || cs35l41->index < 0 || cs35l41->index >= HDA_MAX_COMPONENTS)
return -EINVAL;
- if (!comps->dev) {
- comps->dev = dev;
- strscpy(comps->name, dev_name(dev), sizeof(comps->name));
- comps->playback_hook = cs35l41_hda_playback_hook;
- comps->set_channel_map = cs35l41_hda_channel_map;
- return 0;
- }
+ comps = &comps[cs35l41->index];
+ if (comps->dev)
+ return -EBUSY;
+
+ comps->dev = dev;
+ strscpy(comps->name, dev_name(dev), sizeof(comps->name));
+ comps->playback_hook = cs35l41_hda_playback_hook;
+ comps->set_channel_map = cs35l41_hda_channel_map;
- return -EBUSY;
+ return 0;
}
static void cs35l41_hda_unbind(struct device *dev, struct device *master, void *master_data)
@@ -227,6 +228,8 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
internal_boost = true;
switch (hw_cfg->gpio1_func) {
+ case CS35L41_NOT_USED:
+ break;
case CS35l41_VSPK_SWITCH:
regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
CS35L41_GPIO1_CTRL_MASK, 1 << CS35L41_GPIO1_CTRL_SHIFT);
@@ -235,13 +238,21 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
CS35L41_GPIO1_CTRL_MASK, 2 << CS35L41_GPIO1_CTRL_SHIFT);
break;
+ default:
+ dev_err(cs35l41->dev, "Invalid function %d for GPIO1\n", hw_cfg->gpio1_func);
+ return -EINVAL;
}
switch (hw_cfg->gpio2_func) {
+ case CS35L41_NOT_USED:
+ break;
case CS35L41_INTERRUPT:
regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
CS35L41_GPIO2_CTRL_MASK, 2 << CS35L41_GPIO2_CTRL_SHIFT);
break;
+ default:
+ dev_err(cs35l41->dev, "Invalid function %d for GPIO2\n", hw_cfg->gpio2_func);
+ return -EINVAL;
}
if (internal_boost) {
@@ -256,11 +267,7 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
cs35l41->reg_seq = &cs35l41_hda_reg_seq_ext_bst;
}
- ret = cs35l41_hda_channel_map(cs35l41->dev, 0, NULL, 1, (unsigned int *)&hw_cfg->spk_pos);
- if (ret)
- return ret;
-
- return 0;
+ return cs35l41_hda_channel_map(cs35l41->dev, 0, NULL, 1, (unsigned int *)&hw_cfg->spk_pos);
}
static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41,
@@ -269,7 +276,7 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
struct cs35l41_hda_hw_config *hw_cfg;
u32 values[HDA_MAX_COMPONENTS];
struct acpi_device *adev;
- struct device *acpi_dev;
+ struct device *physdev;
char *property;
size_t nval;
int i, ret;
@@ -280,11 +287,11 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
return ERR_PTR(-ENODEV);
}
- acpi_dev = get_device(acpi_get_first_physical_node(adev));
+ physdev = get_device(acpi_get_first_physical_node(adev));
acpi_dev_put(adev);
property = "cirrus,dev-index";
- ret = device_property_count_u32(acpi_dev, property);
+ ret = device_property_count_u32(physdev, property);
if (ret <= 0)
goto no_acpi_dsd;
@@ -294,7 +301,7 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
}
nval = ret;
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err;
@@ -311,7 +318,9 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
goto err;
}
- /* No devm_ version as CLSA0100, in no_acpi_dsd case, can't use devm version */
+ /* To use the same release code for all laptop variants we can't use devm_ version of
+ * gpiod_get here, as CLSA010* don't have a fully functional bios with an _DSD node
+ */
cs35l41->reset_gpio = fwnode_gpiod_get_index(&adev->fwnode, "reset", cs35l41->index,
GPIOD_OUT_LOW, "cs35l41-reset");
@@ -322,46 +331,46 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
}
property = "cirrus,speaker-position";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err_free;
hw_cfg->spk_pos = values[cs35l41->index];
property = "cirrus,gpio1-func";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err_free;
hw_cfg->gpio1_func = values[cs35l41->index];
property = "cirrus,gpio2-func";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err_free;
hw_cfg->gpio2_func = values[cs35l41->index];
property = "cirrus,boost-peak-milliamp";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret == 0)
hw_cfg->bst_ipk = values[cs35l41->index];
property = "cirrus,boost-ind-nanohenry";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret == 0)
hw_cfg->bst_ind = values[cs35l41->index];
property = "cirrus,boost-cap-microfarad";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret == 0)
hw_cfg->bst_cap = values[cs35l41->index];
- put_device(acpi_dev);
+ put_device(physdev);
return hw_cfg;
err_free:
kfree(hw_cfg);
err:
- put_device(acpi_dev);
+ put_device(physdev);
dev_err(cs35l41->dev, "Failed property %s: %d\n", property, ret);
return ERR_PTR(ret);
@@ -370,18 +379,18 @@ no_acpi_dsd:
/*
* Device CLSA0100 doesn't have _DSD so a gpiod_get by the label reset won't work.
* And devices created by i2c-multi-instantiate don't have their device struct pointing to
- * the correct fwnode, so acpi_dev must be used here
+ * the correct fwnode, so acpi_dev must be used here.
* And devm functions expect that the device requesting the resource has the correct
- * fwnode
+ * fwnode.
*/
if (strncmp(hid, "CLSA0100", 8) != 0)
return ERR_PTR(-EINVAL);
/* check I2C address to assign the index */
cs35l41->index = id == 0x40 ? 0 : 1;
- cs35l41->reset_gpio = gpiod_get_index(acpi_dev, NULL, 0, GPIOD_OUT_HIGH);
+ cs35l41->reset_gpio = gpiod_get_index(physdev, NULL, 0, GPIOD_OUT_HIGH);
cs35l41->vspk_always_on = true;
- put_device(acpi_dev);
+ put_device(physdev);
return NULL;
}
@@ -416,8 +425,7 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
if (ret == -EBUSY) {
dev_info(cs35l41->dev, "Reset line busy, assuming shared reset\n");
} else {
- if (ret != -EPROBE_DEFER)
- dev_err(cs35l41->dev, "Failed to get reset GPIO: %d\n", ret);
+ dev_err_probe(cs35l41->dev, ret, "Failed to get reset GPIO: %d\n", ret);
goto err;
}
}
@@ -437,7 +445,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
ret = regmap_read(cs35l41->regmap, CS35L41_IRQ1_STATUS3, &int_sts);
if (ret || (int_sts & CS35L41_OTP_BOOT_ERR)) {
- dev_err(cs35l41->dev, "OTP Boot error\n");
+ dev_err(cs35l41->dev, "OTP Boot status %x error: %d\n",
+ int_sts & CS35L41_OTP_BOOT_ERR, ret);
ret = -EIO;
goto err;
}
@@ -463,6 +472,10 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
goto err;
}
+ ret = cs35l41_test_key_unlock(cs35l41->dev, cs35l41->regmap);
+ if (ret)
+ goto err;
+
ret = cs35l41_register_errata_patch(cs35l41->dev, cs35l41->regmap, reg_revid);
if (ret)
goto err;
@@ -473,6 +486,10 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
goto err;
}
+ ret = cs35l41_test_key_lock(cs35l41->dev, cs35l41->regmap);
+ if (ret)
+ goto err;
+
ret = cs35l41_hda_apply_properties(cs35l41, acpi_hw_cfg);
if (ret)
goto err;
@@ -480,8 +497,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
acpi_hw_cfg = NULL;
if (cs35l41->reg_seq->probe) {
- ret = regmap_register_patch(cs35l41->regmap, cs35l41->reg_seq->probe,
- cs35l41->reg_seq->num_probe);
+ ret = regmap_multi_reg_write(cs35l41->regmap, cs35l41->reg_seq->probe,
+ cs35l41->reg_seq->num_probe);
if (ret) {
dev_err(cs35l41->dev, "Fail to apply probe reg patch: %d\n", ret);
goto err;
@@ -506,9 +523,9 @@ err:
return ret;
}
-EXPORT_SYMBOL_GPL(cs35l41_hda_probe);
+EXPORT_SYMBOL_NS_GPL(cs35l41_hda_probe, SND_HDA_SCODEC_CS35L41);
-int cs35l41_hda_remove(struct device *dev)
+void cs35l41_hda_remove(struct device *dev)
{
struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
@@ -517,11 +534,8 @@ int cs35l41_hda_remove(struct device *dev)
if (!cs35l41->vspk_always_on)
gpiod_set_value_cansleep(cs35l41->reset_gpio, 0);
gpiod_put(cs35l41->reset_gpio);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(cs35l41_hda_remove);
-
+EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41);
MODULE_DESCRIPTION("CS35L41 HDA Driver");
MODULE_AUTHOR("Lucas Tanure, Cirrus Logic Inc, <tanureal@opensource.cirrus.com>");
diff --git a/sound/pci/hda/cs35l41_hda.h b/sound/pci/hda/cs35l41_hda.h
index 76c69a8a22f6..74951001501c 100644
--- a/sound/pci/hda/cs35l41_hda.h
+++ b/sound/pci/hda/cs35l41_hda.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
- * cs35l41_hda.h -- CS35L41 ALSA HDA audio driver
+ * CS35L41 ALSA HDA audio driver
*
* Copyright 2021 Cirrus Logic, Inc.
*
@@ -64,6 +64,6 @@ struct cs35l41_hda {
int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int irq,
struct regmap *regmap);
-int cs35l41_hda_remove(struct device *dev);
+void cs35l41_hda_remove(struct device *dev);
#endif /*__CS35L41_HDA_H__*/
diff --git a/sound/pci/hda/cs35l41_hda_i2c.c b/sound/pci/hda/cs35l41_hda_i2c.c
index 4a9462fb5c14..e810b278fb91 100644
--- a/sound/pci/hda/cs35l41_hda_i2c.c
+++ b/sound/pci/hda/cs35l41_hda_i2c.c
@@ -32,7 +32,9 @@ static int cs35l41_hda_i2c_probe(struct i2c_client *clt, const struct i2c_device
static int cs35l41_hda_i2c_remove(struct i2c_client *clt)
{
- return cs35l41_hda_remove(&clt->dev);
+ cs35l41_hda_remove(&clt->dev);
+
+ return 0;
}
static const struct i2c_device_id cs35l41_hda_i2c_id[] = {
@@ -58,9 +60,9 @@ static struct i2c_driver cs35l41_i2c_driver = {
.probe = cs35l41_hda_i2c_probe,
.remove = cs35l41_hda_i2c_remove,
};
-
module_i2c_driver(cs35l41_i2c_driver);
MODULE_DESCRIPTION("HDA CS35L41 driver");
+MODULE_IMPORT_NS(SND_HDA_SCODEC_CS35L41);
MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c
index 77426e96c58f..9f8123893cc8 100644
--- a/sound/pci/hda/cs35l41_hda_spi.c
+++ b/sound/pci/hda/cs35l41_hda_spi.c
@@ -30,7 +30,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi)
static int cs35l41_hda_spi_remove(struct spi_device *spi)
{
- return cs35l41_hda_remove(&spi->dev);
+ cs35l41_hda_remove(&spi->dev);
+
+ return 0;
}
static const struct spi_device_id cs35l41_hda_spi_id[] = {
@@ -55,9 +57,9 @@ static struct spi_driver cs35l41_spi_driver = {
.probe = cs35l41_hda_spi_probe,
.remove = cs35l41_hda_spi_remove,
};
-
module_spi_driver(cs35l41_spi_driver);
MODULE_DESCRIPTION("HDA CS35L41 driver");
+MODULE_IMPORT_NS(SND_HDA_SCODEC_CS35L41);
MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c
index df0b4522babf..2d1fa706327b 100644
--- a/sound/pci/hda/patch_cs8409-tables.c
+++ b/sound/pci/hda/patch_cs8409-tables.c
@@ -490,6 +490,8 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0ADC, "Warlock", CS8409_WARLOCK),
SND_PCI_QUIRK(0x1028, 0x0AF4, "Warlock", CS8409_WARLOCK),
SND_PCI_QUIRK(0x1028, 0x0AF5, "Warlock", CS8409_WARLOCK),
+ SND_PCI_QUIRK(0x1028, 0x0BB5, "Warlock N3 15 TGL-U Nuvoton EC", CS8409_WARLOCK),
+ SND_PCI_QUIRK(0x1028, 0x0BB6, "Warlock V3 15 TGL-U Nuvoton EC", CS8409_WARLOCK),
SND_PCI_QUIRK(0x1028, 0x0A77, "Cyborg", CS8409_CYBORG),
SND_PCI_QUIRK(0x1028, 0x0A78, "Cyborg", CS8409_CYBORG),
SND_PCI_QUIRK(0x1028, 0x0A79, "Cyborg", CS8409_CYBORG),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index eef973661b0a..668274e52674 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6948,6 +6948,7 @@ enum {
ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
ALC287_FIXUP_LEGION_16ACHG6,
ALC287_FIXUP_CS35L41_I2C_2,
+ ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -8698,6 +8699,16 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cs35l41_fixup_i2c_two,
},
+ [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 },
+ { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_HP_MUTE_LED,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8911,6 +8922,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 5d391f62351b..96991ddf5055 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -431,6 +431,14 @@ static const struct usbmix_name_map aorus_master_alc1220vb_map[] = {
{}
};
+/* MSI MPG X570S Carbon Max Wifi with ALC4080 */
+static const struct usbmix_name_map msi_mpg_x570s_carbon_max_wifi_alc4080_map[] = {
+ { 29, "Speaker Playback" },
+ { 30, "Front Headphone Playback" },
+ { 32, "IEC958 Playback" },
+ {}
+};
+
/*
* Control map entries
*/
@@ -577,6 +585,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.map = trx40_mobo_map,
.connector_map = trx40_mobo_connector_map,
},
+ { /* MSI MPG X570S Carbon Max Wifi */
+ .id = USB_ID(0x0db0, 0x419c),
+ .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
+ },
{ /* MSI TRX40 */
.id = USB_ID(0x0db0, 0x543d),
.map = trx40_mobo_map,
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 5ef1c15e88ad..11e86739456d 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t)
"RECLAIM %12s%15s%15s\n"
" %15llu%15llu%15llums\n"
"THRASHING%12s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "COMPACT %12s%15s%15s\n"
" %15llu%15llu%15llums\n",
"count", "real total", "virtual total",
"delay total", "delay average",
@@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t)
"count", "delay total", "delay average",
(unsigned long long)t->thrashing_count,
(unsigned long long)t->thrashing_delay_total,
- average_ms(t->thrashing_delay_total, t->thrashing_count));
+ average_ms(t->thrashing_delay_total, t->thrashing_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->compact_count,
+ (unsigned long long)t->compact_delay_total,
+ average_ms(t->compact_delay_total, t->compact_count));
}
static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 2da3316bb559..bf6e96011dfe 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -452,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h
index 754a07856817..500b96e71f18 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/arch/x86/include/uapi/asm/prctl.h
@@ -2,20 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_GET_XCOMP_SUPP 0x1021
-#define ARCH_GET_XCOMP_PERM 0x1022
-#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index d9b420972934..f70702fcb224 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
- bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
- task->group_leader->comm);
+ bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm),
+ task->group_leader->comm);
bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
return 0;
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index ab9353f2fd46..9a5c1f008fe6 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx)
*/
struct task_struct *prev = (struct task_struct *)ctx[1];
struct task_struct *next = (struct task_struct *)ctx[2];
- struct event event = {};
+ struct runq_event event = {};
u64 *tsp, delta_us;
long state;
u32 pid;
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
index 2414cc764461..d78f4148597f 100644
--- a/tools/bpf/runqslower/runqslower.c
+++ b/tools/bpf/runqslower/runqslower.c
@@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void)
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
- const struct event *e = data;
+ const struct runq_event *e = data;
struct tm *tm;
char ts[32];
time_t t;
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
index 9db225425e5f..4f70f07200c2 100644
--- a/tools/bpf/runqslower/runqslower.h
+++ b/tools/bpf/runqslower/runqslower.h
@@ -4,7 +4,7 @@
#define TASK_COMM_LEN 16
-struct event {
+struct runq_event {
char task[TASK_COMM_LEN];
__u64 delta_us;
pid_t pid;
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 5d2ab38965cc..9ab313e93555 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -18,7 +18,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _TOOLS_LINUX_BITOPS_H_
#error only <linux/bitops.h> can be included directly
diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
deleted file mode 100644
index 6481fd11012a..000000000000
--- a/tools/include/asm-generic/bitops/find.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
-#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
-
-extern unsigned long _find_next_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert, unsigned long le);
-extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
-extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
-extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
-
-#ifndef find_next_bit
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number for the next set bit
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val;
-
- if (unlikely(offset >= size))
- return size;
-
- val = *addr & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
- }
-
- return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
-}
-#endif
-
-#ifndef find_next_and_bit
-/**
- * find_next_and_bit - find the next set bit in both memory regions
- * @addr1: The first address to base the search on
- * @addr2: The second address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number for the next set bit
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_next_and_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long size,
- unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val;
-
- if (unlikely(offset >= size))
- return size;
-
- val = *addr1 & *addr2 & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
- }
-
- return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
-}
-#endif
-
-#ifndef find_next_zero_bit
-/**
- * find_next_zero_bit - find the next cleared bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number of the next zero bit
- * If no bits are zero, returns @size.
- */
-static inline
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val;
-
- if (unlikely(offset >= size))
- return size;
-
- val = *addr | ~GENMASK(size - 1, offset);
- return val == ~0UL ? size : ffz(val);
- }
-
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
-}
-#endif
-
-#ifndef find_first_bit
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum number of bits to search
- *
- * Returns the bit number of the first set bit.
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *addr & GENMASK(size - 1, 0);
-
- return val ? __ffs(val) : size;
- }
-
- return _find_first_bit(addr, size);
-}
-
-#endif /* find_first_bit */
-
-#ifndef find_first_zero_bit
-
-/**
- * find_first_zero_bit - find the first cleared bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum number of bits to search
- *
- * Returns the bit number of the first cleared bit.
- * If no bits are zero, returns @size.
- */
-static inline
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *addr | ~GENMASK(size - 1, 0);
-
- return val == ~0UL ? size : ffz(val);
- }
-
- return _find_first_zero_bit(addr, size);
-}
-#endif
-
-#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 95611df1d26e..ea97804d04d4 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -1,9 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_BITOPS_H
-#define _PERF_BITOPS_H
+#ifndef _TOOLS_LINUX_BITMAP_H
+#define _TOOLS_LINUX_BITMAP_H
#include <string.h>
#include <linux/bitops.h>
+#include <linux/find.h>
#include <stdlib.h>
#include <linux/kernel.h>
@@ -181,4 +182,4 @@ static inline int bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
-#endif /* _PERF_BITOPS_H */
+#endif /* _TOOLS_LINUX_BITMAP_H */
diff --git a/include/asm-generic/bitops/find.h b/tools/include/linux/find.h
index 0d132ee2a291..47e2bd6c5174 100644
--- a/include/asm-generic/bitops/find.h
+++ b/tools/include/linux/find.h
@@ -1,11 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_BITOPS_FIND_H_
-#define _ASM_GENERIC_BITOPS_FIND_H_
+#ifndef _TOOLS_LINUX_FIND_H_
+#define _TOOLS_LINUX_FIND_H_
+
+#ifndef _TOOLS_LINUX_BITMAP_H
+#error tools: only <linux/bitmap.h> can be included directly
+#endif
+
+#include <linux/bitops.h>
extern unsigned long _find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
unsigned long start, unsigned long invert, unsigned long le);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
@@ -95,8 +103,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
}
#endif
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
-
+#ifndef find_first_bit
/**
* find_first_bit - find the first set bit in a memory region
* @addr: The address to start the search at
@@ -116,7 +123,34 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
return _find_first_bit(addr, size);
}
+#endif
+#ifndef find_first_and_bit
+/**
+ * find_first_and_bit - find the first set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_and_bit(addr1, addr2, size);
+}
+#endif
+
+#ifndef find_first_zero_bit
/**
* find_first_zero_bit - find the first cleared bit in a memory region
* @addr: The address to start the search at
@@ -136,16 +170,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
return _find_first_zero_bit(addr, size);
}
-#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
-
-#ifndef find_first_bit
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
#endif
-#ifndef find_first_zero_bit
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-#endif
-
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
#ifndef find_last_bit
/**
@@ -185,4 +210,5 @@ extern unsigned long find_next_clump8(unsigned long *clump,
#define find_first_clump8(clump, bits, size) \
find_next_clump8((clump), (bits), (size), 0)
-#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
+
+#endif /*__LINUX_FIND_H_ */
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
index ad6fa21d977b..38edaa08f862 100644
--- a/tools/include/linux/hash.h
+++ b/tools/include/linux/hash.h
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
return val * GOLDEN_RATIO_32;
}
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
{
/* High bits are more random, so use them. */
return __hash_32(val) >> (32 - bits);
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 4557a8b6086f..1c48b0ae3ba3 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+
#undef __NR_syscalls
-#define __NR_syscalls 450
+#define __NR_syscalls 451
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f066637ee206..b46bcdb0cab1 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1131,7 +1131,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
-#define KVM_CAP_XSAVE2 207
+#define KVM_CAP_VM_GPA_BITS 207
+#define KVM_CAP_XSAVE2 208
+#define KVM_CAP_SYS_ATTRIBUTES 209
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1163,11 +1165,20 @@ struct kvm_irq_routing_hv_sint {
__u32 sint;
};
+struct kvm_irq_routing_xen_evtchn {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+};
+
+#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1))
+
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
struct kvm_irq_routing_entry {
__u32 gsi;
@@ -1179,6 +1190,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
+ struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8];
} u;
};
@@ -1209,6 +1221,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1552,8 +1565,6 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_XSAVE */
#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave)
#define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave)
-/* Available with KVM_CAP_XSAVE2 */
-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
/* Available with KVM_CAP_XCRS */
#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
@@ -1613,6 +1624,9 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 109aa7ffcf97..ba4b8d94e004 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -96,6 +96,26 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
}
#endif
+#ifndef find_first_and_bit
+/*
+ * Find the first set bit in two memory regions.
+ */
+unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ unsigned long idx, val;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ val = addr1[idx] & addr2[idx];
+ if (val)
+ return min(idx * BITS_PER_LONG + __ffs(val), size);
+ }
+
+ return size;
+}
+#endif
+
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 7ea86a44eae5..210ea7c06ce8 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -141,7 +141,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
}
if (evsel->fd == NULL &&
- perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
return -ENOMEM;
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
@@ -384,7 +384,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
int err = 0, i;
- for (i = 0; i < evsel->cpus->nr && !err; i++)
+ for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++)
err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter, i);
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index e2c481fcede6..3f1886ad9d80 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -364,3 +364,4 @@
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 15109af9d075..2600b4237292 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -529,3 +529,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index ed9c5c2eafad..799147658dee 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -452,3 +452,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index fe8f8dd157b4..c84d12608cd2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,6 +371,7 @@
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 1a17ec83d3c4..740ae764537e 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -333,7 +333,7 @@ int bench_epoll_ctl(int argc, const char **argv)
/* default to the number of CPUs */
if (!nthreads)
- nthreads = cpu->nr;
+ nthreads = perf_cpu_map__nr(cpu);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 0d1dd8879197..37de970c9743 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -452,7 +452,7 @@ int bench_epoll_wait(int argc, const char **argv)
/* default to the number of CPUs and leave one for the writer pthread */
if (!nthreads)
- nthreads = cpu->nr - 1;
+ nthreads = perf_cpu_map__nr(cpu) - 1;
worker = calloc(nthreads, sizeof(*worker));
if (!worker) {
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index 482738e9bdad..de56601f69ee 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -71,7 +71,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist)
int cnt = 0;
evlist__for_each_entry(evlist, evsel)
- cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+ cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus);
return cnt;
}
@@ -151,7 +151,7 @@ static int bench_evlist_open_close__run(char *evstr)
init_stats(&time_stats);
- printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr);
+ printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.cpus));
printf(" Number of threads:\t%d\n", evlist->core.threads->nr);
printf(" Number of events:\t%d (%d fds)\n",
evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 9627b6ab8670..dbcecec4eeda 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -150,7 +150,7 @@ int bench_futex_hash(int argc, const char **argv)
}
if (!params.nthreads) /* default to the number of CPUs */
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index a512a320df74..6fc9a3d55c1f 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -173,7 +173,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index aca47ce8b1e7..2f59d5d1c509 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -175,7 +175,7 @@ int bench_futex_requeue(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 888ee6037945..861deb934745 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -252,7 +252,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
/* some sanity checks */
if (params.nwakes > params.nthreads ||
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index aa82db51c0ab..cfda48bef1d7 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -151,7 +151,7 @@ int bench_futex_wake(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 71452599f87d..dec24dc0e767 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -281,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
int ret;
int last_cpu;
- last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1).cpu;
+ last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 409b721666cb..fbf43a454cba 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -535,12 +535,9 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
static int perf_event__repipe_tracing_data(struct perf_session *session,
union perf_event *event)
{
- int err;
-
perf_event__repipe_synth(session->tool, event);
- err = perf_event__process_tracing_data(session, event);
- return err;
+ return perf_event__process_tracing_data(session, event);
}
static int dso__read_build_id(struct dso *dso)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ecd4f99a6c14..abae8184e171 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -515,7 +515,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
- evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(DATA_PAGE_SIZE) &&
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 973ade18b72a..3f98689dd687 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -230,11 +230,12 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b)
if (!a->core.cpus || !b->core.cpus)
return false;
- if (a->core.cpus->nr != b->core.cpus->nr)
+ if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
return false;
- for (int i = 0; i < a->core.cpus->nr; i++) {
- if (a->core.cpus->map[i].cpu != b->core.cpus->map[i].cpu)
+ for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
+ if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
+ perf_cpu_map__cpu(b->core.cpus, i).cpu)
return false;
}
@@ -788,7 +789,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct evlist_cpu_iterator evlist_cpu_itr;
- struct affinity affinity;
+ struct affinity saved_affinity, *affinity = NULL;
int err;
bool second_pass = false;
@@ -803,8 +804,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
evlist__set_leader(evsel_list);
- if (affinity__setup(&affinity) < 0)
- return -1;
+ if (!cpu_map__is_dummy(evsel_list->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return -1;
+ affinity = &saved_affinity;
+ }
evlist__for_each_entry(evsel_list, counter) {
if (bpf_counter__load(counter, &target))
@@ -813,7 +817,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
all_counters_use_bpf = false;
}
- evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
/*
@@ -869,7 +873,7 @@ try_again:
*/
/* First close errored or weak retry */
- evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
if (!counter->reset_group && !counter->errored)
@@ -878,7 +882,7 @@ try_again:
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
}
/* Now reopen weak */
- evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
if (!counter->reset_group && !counter->errored)
@@ -904,7 +908,7 @@ try_again_reset:
counter->supported = true;
}
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
index 73089c682f80..41bac1c6a008 100644
--- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
@@ -19,6 +19,22 @@
"EdgeDetect": "0"
},
{
+ "Unit": "CBO",
+ "EventCode": "0xE0",
+ "UMask": "0x00",
+ "EventName": "event-hyphen",
+ "BriefDescription": "UNC_CBO_HYPHEN",
+ "PublicDescription": "UNC_CBO_HYPHEN"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0xC0",
+ "UMask": "0x00",
+ "EventName": "event-two-hyph",
+ "BriefDescription": "UNC_CBO_TWO_HYPH",
+ "PublicDescription": "UNC_CBO_TWO_HYPH"
+ },
+ {
"EventCode": "0x7",
"EventName": "uncore_hisi_l3c.rd_hit_cpipe",
"BriefDescription": "Total read hits",
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 0bf399c49849..4965dd666956 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -17,8 +17,8 @@ static unsigned long *get_bitmap(const char *str, int nbits)
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i].cpu, bm);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
}
if (map)
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index 16b6d6f47f38..78db4d704e76 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -75,10 +75,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong id", ev->id == 123);
TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
- TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
- TEST_ASSERT_VAL("wrong cpus", map->map[0].cpu == 1);
- TEST_ASSERT_VAL("wrong cpus", map->map[1].cpu == 2);
- TEST_ASSERT_VAL("wrong cpus", map->map[2].cpu == 3);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__nr(map) == 3);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 0).cpu == 1);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 1).cpu == 2);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 2).cpu == 3);
perf_cpu_map__put(map);
return 0;
}
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index f4a4aba33f76..4c96829510c9 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -25,14 +25,15 @@ static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
unsigned long *bm = NULL;
- int i;
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < map->nr; i++) {
- set_bit(map->map[i].cpu, bm);
- }
+ struct perf_cpu cpu;
+ int i;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ set_bit(cpu.cpu, bm);
}
if (map)
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 0ad62914b4d7..c3c17600f29c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -59,11 +59,12 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
}
CPU_ZERO(&cpu_set);
- CPU_SET(cpus->map[0].cpu, &cpu_set);
+ CPU_SET(perf_cpu_map__cpu(cpus, 0).cpu, &cpu_set);
sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
- cpus->map[0].cpu, str_error_r(errno, sbuf, sizeof(sbuf)));
+ perf_cpu_map__cpu(cpus, 0).cpu,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_free_cpus;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index a508f1dbcb2a..e71efadb24f5 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2069,6 +2069,31 @@ static int test_event(struct evlist_test *e)
return ret;
}
+static int test_event_fake_pmu(const char *str)
+{
+ struct parse_events_error err;
+ struct evlist *evlist;
+ int ret;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ parse_events_error__init(&err);
+ perf_pmu__test_parse_init();
+ ret = __parse_events(evlist, str, &err, &perf_pmu__fake);
+ if (ret) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ str, ret, err.str);
+ parse_events_error__print(&err, str);
+ }
+
+ parse_events_error__exit(&err);
+ evlist__delete(evlist);
+
+ return ret;
+}
+
static int test_events(struct evlist_test *events, unsigned cnt)
{
int ret1, ret2 = 0;
@@ -2276,6 +2301,26 @@ static int test_pmu_events_alias(char *event, char *alias)
return test_event(&e);
}
+static int test_pmu_events_alias2(void)
+{
+ static const char events[][30] = {
+ "event-hyphen",
+ "event-two-hyph",
+ };
+ unsigned long i;
+ int ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ ret = test_event_fake_pmu(&events[i][0]);
+ if (ret) {
+ pr_err("check_parse_fake %s failed\n", &events[i][0]);
+ break;
+ }
+ }
+
+ return ret;
+}
+
static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
int ret1, ret2 = 0;
@@ -2313,6 +2358,10 @@ do { \
return ret;
}
+ ret1 = test_pmu_events_alias2();
+ if (!ret2)
+ ret2 = ret1;
+
ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
if (!ret2)
ret2 = ret1;
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index df1c9a3cc05b..1c695fb5a79c 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -143,6 +143,34 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
.matching_pmu = "uncore_cbox_0",
};
+static const struct perf_pmu_test_event uncore_hyphen = {
+ .event = {
+ .name = "event-hyphen",
+ .event = "umask=0x00,event=0xe0",
+ .desc = "Unit: uncore_cbox UNC_CBO_HYPHEN",
+ .topic = "uncore",
+ .long_desc = "UNC_CBO_HYPHEN",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0,event=0xe0",
+ .alias_long_desc = "UNC_CBO_HYPHEN",
+ .matching_pmu = "uncore_cbox_0",
+};
+
+static const struct perf_pmu_test_event uncore_two_hyph = {
+ .event = {
+ .name = "event-two-hyph",
+ .event = "umask=0x00,event=0xc0",
+ .desc = "Unit: uncore_cbox UNC_CBO_TWO_HYPH",
+ .topic = "uncore",
+ .long_desc = "UNC_CBO_TWO_HYPH",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0,event=0xc0",
+ .alias_long_desc = "UNC_CBO_TWO_HYPH",
+ .matching_pmu = "uncore_cbox_0",
+};
+
static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
.event = {
.name = "uncore_hisi_l3c.rd_hit_cpipe",
@@ -188,6 +216,8 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = {
static const struct perf_pmu_test_event *uncore_events[] = {
&uncore_hisi_ddrc_flux_wcmd,
&unc_cbo_xsnp_response_miss_eviction,
+ &uncore_hyphen,
+ &uncore_two_hyph,
&uncore_hisi_l3c_rd_hit_cpipe,
&uncore_imc_free_running_cache_miss,
&uncore_imc_cache_hits,
@@ -654,6 +684,8 @@ static struct perf_pmu_test_pmu test_pmus[] = {
},
.aliases = {
&unc_cbo_xsnp_response_miss_eviction,
+ &uncore_hyphen,
+ &uncore_two_hyph,
},
},
{
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index c4ef0c7002f1..ee1e3dcbc0bd 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -122,44 +122,48 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that CPU ID contains socket, die, core and CPU
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
- TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i].cpu == id.cpu.cpu);
+ TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
+ perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].core_id == id.core);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1);
}
// Test that core ID contains socket, die and core
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].core_id == id.core);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1);
}
// Test that die ID contains socket and die
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
@@ -168,10 +172,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that socket ID contains only socket
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
@@ -181,10 +186,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that node ID contains only node
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(map->map[i]) == id.node);
+ cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index f1e30d566db3..4d216c0dc425 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu)
clear_bit(cpu, a->sched_cpus);
}
-void affinity__cleanup(struct affinity *a)
+static void __affinity__cleanup(struct affinity *a)
{
int cpu_set_size = get_cpu_set_size();
@@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a)
zfree(&a->sched_cpus);
zfree(&a->orig_cpus);
}
+
+void affinity__cleanup(struct affinity *a)
+{
+ if (a != NULL)
+ __affinity__cleanup(a);
+}
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 5632efc44738..825336304a37 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
mp->idx = idx;
if (per_cpu) {
- mp->cpu = evlist->core.cpus->map[idx];
+ mp->cpu = perf_cpu_map__cpu(evlist->core.cpus, idx);
if (evlist->core.threads)
mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
else
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 2b81707b9dba..7a447d918458 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -61,7 +61,7 @@ int evsel__alloc_counts(struct evsel *evsel)
struct perf_cpu_map *cpus = evsel__cpus(evsel);
int nthreads = perf_thread_map__nr(evsel->core.threads);
- evsel->counts = perf_counts__new(cpus ? cpus->nr : 1, nthreads);
+ evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads);
return evsel->counts != NULL ? 0 : -ENOMEM;
}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 0d3c2006a15d..703ae6d3386e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -4,7 +4,6 @@
#include <stdbool.h>
#include <stdio.h>
-#include <stdbool.h>
#include <internal/cpumap.h>
#include <perf/cpumap.h>
@@ -57,7 +56,7 @@ struct perf_cpu cpu__max_present_cpu(void);
*/
static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
{
- return cpus->nr == 1 && cpus->map[0].cpu == -1;
+ return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
}
/**
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index e20b835a1194..d275d843c155 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -325,7 +325,7 @@ struct numa_topology *numa_topology__new(void)
if (!node_map)
goto out;
- nr = (u32) node_map->nr;
+ nr = (u32) perf_cpu_map__nr(node_map);
tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
if (!tp)
@@ -334,7 +334,7 @@ struct numa_topology *numa_topology__new(void)
tp->nr = nr;
for (i = 0; i < nr; i++) {
- if (load_numa_node(&tp->nodes[i], node_map->map[i].cpu)) {
+ if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
numa_topology__delete(tp);
tp = NULL;
break;
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index 7c554234b43d..7f234215147d 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -124,22 +124,23 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
events_nr++;
- if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 ||
- matched_cpus->nr < cpus->nr ||
- matched_cpus->nr < pmu->cpus->nr)) {
+ if (perf_cpu_map__nr(matched_cpus) > 0 &&
+ (perf_cpu_map__nr(unmatched_cpus) > 0 ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
perf_cpu_map__put(evsel->core.cpus);
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.cpus = perf_cpu_map__get(matched_cpus);
evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
- if (unmatched_cpus->nr > 0) {
+ if (perf_cpu_map__nr(unmatched_cpus) > 0) {
cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
buf1, pmu->name, evsel->name);
}
}
- if (matched_cpus->nr == 0) {
+ if (perf_cpu_map__nr(matched_cpus) == 0) {
evlist__remove(evlist, evsel);
evsel__delete(evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6e88d404b5b3..eaad04e1672a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -430,15 +430,19 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
struct evlist_cpu_iterator evlist_cpu_itr;
- struct affinity affinity;
+ struct affinity saved_affinity, *affinity = NULL;
bool has_imm = false;
- if (affinity__setup(&affinity) < 0)
- return;
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
/* Disable 'immediate' events last */
for (int imm = 0; imm <= 1; imm++) {
- evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
pos = evlist_cpu_itr.evsel;
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -454,7 +458,7 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
break;
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -487,12 +491,16 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
struct evlist_cpu_iterator evlist_cpu_itr;
- struct affinity affinity;
+ struct affinity saved_affinity, *affinity = NULL;
- if (affinity__setup(&affinity) < 0)
- return;
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
- evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
pos = evlist_cpu_itr.evsel;
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -500,7 +508,7 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
continue;
evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2f6b18af49e5..22d3267ce294 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1064,6 +1064,17 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un
{
}
+static void evsel__set_default_freq_period(struct record_opts *opts,
+ struct perf_event_attr *attr)
+{
+ if (opts->freq) {
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -1130,14 +1141,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
* We default some events to have a default interval. But keep
* it a weak assumption overridable by the user.
*/
- if (!attr->sample_period) {
- if (opts->freq) {
- attr->freq = 1;
- attr->sample_freq = opts->freq;
- } else {
- attr->sample_period = opts->default_interval;
- }
- }
+ if ((evsel->is_libpfm_event && !attr->sample_period) ||
+ (!evsel->is_libpfm_event && (!attr->sample_period ||
+ opts->user_freq != UINT_MAX ||
+ opts->user_interval != ULLONG_MAX)))
+ evsel__set_default_freq_period(opts, attr);
+
/*
* If attr->freq was set (here or earlier), ask for period
* to be sampled.
@@ -1782,7 +1791,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
nthreads = threads->nr;
if (evsel->core.fd == NULL &&
- perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
+ perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
@@ -2020,9 +2029,10 @@ retry_open:
test_attr__ready();
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[idx].cpu, group_fd, evsel->open_flags);
+ pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[idx].cpu,
+ fd = sys_perf_event_open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx).cpu,
group_fd, evsel->open_flags);
FD(evsel, idx, thread) = fd;
@@ -2038,7 +2048,8 @@ retry_open:
bpf_counter__install_pe(evsel, idx, fd);
if (unlikely(test_attr__enabled)) {
- test_attr__open(&evsel->core.attr, pid, cpus->map[idx],
+ test_attr__open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx),
fd, group_fd, evsel->open_flags);
}
@@ -2079,7 +2090,8 @@ try_fallback:
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__ignore_missing_thread(evsel, cpus->nr, idx, threads, thread, err)) {
+ if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
+ idx, threads, thread, err)) {
/* We just removed 1 thread, so lower the upper nthreads limit. */
nthreads--;
@@ -2119,7 +2131,7 @@ out_close:
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
+ return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
}
void evsel__close(struct evsel *evsel)
@@ -2131,8 +2143,7 @@ void evsel__close(struct evsel *evsel)
int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
{
if (cpu_map_idx == -1)
- return evsel__open_cpu(evsel, cpus, NULL, 0,
- cpus ? cpus->nr : 1);
+ return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
}
@@ -2982,7 +2993,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
struct perf_cpu_map *cpus = evsel->core.cpus;
struct perf_thread_map *threads = evsel->core.threads;
- if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
+ if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
return -ENOMEM;
return store_evsel_ids(evsel, evlist);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5720ceebffac..041b42d33bf5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -11,6 +11,7 @@
#include <perf/evsel.h>
#include "symbol_conf.h"
#include <internal/cpumap.h>
+#include <perf/cpumap.h>
struct bpf_object;
struct cgroup;
@@ -191,7 +192,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
static inline int evsel__nr_cpus(struct evsel *evsel)
{
- return evsel__cpus(evsel)->nr;
+ return perf_cpu_map__nr(evsel__cpus(evsel));
}
void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3901440aeff9..f70ba56912d4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -16,6 +16,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "mem-events.h"
+#include "path.h"
#include "srcline.h"
#include "symbol.h"
#include "sort.h"
@@ -1416,7 +1417,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
struct stat st;
/*sshfs might return bad dent->d_type, so we have to stat*/
- snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+ path__join(path, sizeof(path), dir_name, dent->d_name);
if (stat(path, &st))
continue;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 12261ed8c15b..0e8ff8d1e206 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -250,7 +250,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
nr_cpus = perf_cpu_map__nr(cpu_map);
for (idx = 0; idx < nr_cpus; idx++) {
- cpu = cpu_map->map[idx]; /* map c index to online cpu index */
+ cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
if (cpu__get_node(cpu) == node)
set_bit(cpu.cpu, mask->bits);
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index acf20ce98ce9..9739b05b999e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1697,6 +1697,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
}
}
+
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, list, str, head,
+ true, true)) {
+ pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
+ ok++;
+ }
+ }
+
out_err:
if (ok)
*listp = list;
@@ -2098,8 +2107,17 @@ static void perf_pmu__parse_init(void)
pmu = NULL;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
list_for_each_entry(alias, &pmu->aliases, list) {
- if (strchr(alias->name, '-'))
+ char *tmp = strchr(alias->name, '-');
+
+ if (tmp) {
+ char *tmp2 = NULL;
+
+ tmp2 = strchr(tmp + 1, '-');
len++;
+ if (tmp2)
+ len++;
+ }
+
len++;
}
}
@@ -2119,8 +2137,20 @@ static void perf_pmu__parse_init(void)
list_for_each_entry(alias, &pmu->aliases, list) {
struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
char *tmp = strchr(alias->name, '-');
+ char *tmp2 = NULL;
- if (tmp != NULL) {
+ if (tmp)
+ tmp2 = strchr(tmp + 1, '-');
+ if (tmp2) {
+ SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+ PMU_EVENT_SYMBOL_PREFIX);
+ p++;
+ tmp++;
+ SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX);
+ p++;
+ SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2);
+ len += 3;
+ } else if (tmp) {
SET_SYMBOL(strndup(alias->name, tmp - alias->name),
PMU_EVENT_SYMBOL_PREFIX);
p++;
@@ -2147,23 +2177,38 @@ err:
*/
int perf_pmu__test_parse_init(void)
{
- struct perf_pmu_event_symbol *list;
+ struct perf_pmu_event_symbol *list, *tmp, symbols[] = {
+ {(char *)"read", PMU_EVENT_SYMBOL},
+ {(char *)"event", PMU_EVENT_SYMBOL_PREFIX},
+ {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2},
+ };
+ unsigned long i, j;
- list = malloc(sizeof(*list) * 1);
+ tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols));
if (!list)
return -ENOMEM;
- list->type = PMU_EVENT_SYMBOL;
- list->symbol = strdup("read");
-
- if (!list->symbol) {
- free(list);
- return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
+ tmp->type = symbols[i].type;
+ tmp->symbol = strdup(symbols[i].symbol);
+ if (!list->symbol)
+ goto err_free;
}
perf_pmu_events_list = list;
- perf_pmu_events_list_num = 1;
+ perf_pmu_events_list_num = ARRAY_SIZE(symbols);
+
+ qsort(perf_pmu_events_list, ARRAY_SIZE(symbols),
+ sizeof(struct perf_pmu_event_symbol), comp_pmu);
return 0;
+
+err_free:
+ for (j = 0, tmp = list; j < i; j++, tmp++)
+ free(tmp->symbol);
+ free(list);
+ return -ENOMEM;
}
enum perf_pmu_event_symbol_type
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c7fc93f54577..a38b8b160e80 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -53,6 +53,7 @@ enum perf_pmu_event_symbol_type {
PMU_EVENT_SYMBOL, /* normal style PMU event */
PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */
PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */
+ PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */
};
struct perf_pmu_event_symbol {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4efe9872c667..5b6e4b5249cf 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -149,6 +149,8 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat
return PE_PMU_EVENT_PRE;
case PMU_EVENT_SYMBOL_SUFFIX:
return PE_PMU_EVENT_SUF;
+ case PMU_EVENT_SYMBOL_SUFFIX2:
+ return PE_PMU_EVENT_SUF2;
case PMU_EVENT_SYMBOL:
return parse_state->fake_pmu
? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT;
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 174158982fae..be8c51770051 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%type <str> event_pmu_name
%destructor { free ($$); } <str>
@@ -372,6 +372,19 @@ PE_KERNEL_PMU_EVENT opt_pmu_config
$$ = list;
}
|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc
+{
+ struct list_head *list;
+ char pmu_name[128];
+ snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5);
+ free($1);
+ free($3);
+ free($5);
+ if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+ YYABORT;
+ $$ = list;
+}
+|
PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
{
struct list_head *list;
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 734d006d9a8c..c28dd50bd571 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -67,7 +67,7 @@ static bool perf_probe_api(setup_probe_fn_t fn)
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
do {
@@ -144,7 +144,7 @@ bool perf_can_record_cpu_wide(void)
if (!cpus)
return false;
- cpu = cpus->map[0];
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b2a02c9ab8ea..a834918a0a0d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -3083,6 +3083,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ if (sym->type != STT_FUNC)
+ continue;
+
/* There can be duplicated symbols in the map */
for (i = 0; i < j; i++)
if (sym->start == syms[i]->start) {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index f3e5131f183c..52d8995cfd73 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -638,17 +638,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- return pcpus->cpus->nr;
+ return perf_cpu_map__nr(pcpus->cpus);
}
static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- if (i >= pcpus->cpus->nr)
+ if (i >= perf_cpu_map__nr(pcpus->cpus))
return NULL;
- return Py_BuildValue("i", pcpus->cpus->map[i]);
+ return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
}
static PySequenceMethods pyrf_cpu_map__sequence_methods = {
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 20461f174991..007a64681416 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call
if (opts->group)
evlist__set_leader(evlist);
- if (evlist->core.cpus->map[0].cpu < 0)
+ if (perf_cpu_map__cpu(evlist->core.cpus, 0).cpu < 0)
opts->no_inherit = true;
use_comm_exec = perf_can_comm_exec();
@@ -248,11 +248,11 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
if (cpus)
- cpu = cpus->map[0];
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
} else {
- cpu = evlist->core.cpus->map[0];
+ cpu = perf_cpu_map__cpu(evlist->core.cpus, 0);
}
while (1) {
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index f5ad0e62227a..e752e1f4a5f0 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1607,8 +1607,8 @@ static void python_process_stat(struct perf_stat_config *config,
}
for (thread = 0; thread < threads->nr; thread++) {
- for (cpu = 0; cpu < cpus->nr; cpu++) {
- process_stat(counter, cpus->map[cpu],
+ for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
perf_thread_map__pid(threads, thread), tstamp,
perf_counts(counter->counts, cpu, thread));
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f19348dddd55..2c0d30f08e78 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2537,8 +2537,8 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}
- for (i = 0; i < map->nr; i++) {
- struct perf_cpu cpu = map->map[i];
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 4c9f211249db..1e0c731fc539 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -734,8 +734,8 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
if (!m)
return -1;
- for (i = 0; i < m->nr; i++) {
- c = m->map[i];
+ for (i = 0; i < perf_cpu_map__nr(m); i++) {
+ c = perf_cpu_map__cpu(m, i);
if (c.cpu >= nr_cpus) {
ret = -1;
break;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index c9ba8050cc2b..70f095624a0b 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1186,12 +1186,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
static void synthesize_cpus(struct cpu_map_entries *cpus,
struct perf_cpu_map *map)
{
- int i;
+ int i, map_nr = perf_cpu_map__nr(map);
- cpus->nr = map->nr;
+ cpus->nr = map_nr;
- for (i = 0; i < map->nr; i++)
- cpus->cpu[i] = map->map[i].cpu;
+ for (i = 0; i < map_nr; i++)
+ cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu;
}
static void synthesize_mask(struct perf_record_record_cpu_map *mask,
@@ -1202,13 +1202,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask,
mask->nr = BITS_TO_LONGS(max);
mask->long_size = sizeof(long);
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i].cpu, mask->mask);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask);
}
static size_t cpus_size(struct perf_cpu_map *map)
{
- return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+ return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
}
static size_t mask_size(struct perf_cpu_map *map, int *max)
@@ -1217,9 +1217,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max)
*max = 0;
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
/* bit position of the cpu is + 1 */
- int bit = map->map[i].cpu + 1;
+ int bit = perf_cpu_map__cpu(map, i).cpu + 1;
if (bit > *max)
*max = bit;
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 27945eeb0cb5..c1ebfc5d2e0c 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -95,15 +95,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
if (target->cpu_list)
ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
- top->evlist->core.cpus->nr > 1 ? "s" : "",
+ perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "",
target->cpu_list);
else {
if (target->tid)
ret += SNPRINTF(bf + ret, size - ret, ")");
else
ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
- top->evlist->core.cpus->nr,
- top->evlist->core.cpus->nr > 1 ? "s" : "");
+ perf_cpu_map__nr(top->evlist->core.cpus),
+ perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "");
}
perf_top__reset_sample_counters(top);
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 16ec895bbe5f..5bd9e6e80625 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page)
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
x, y)
-#define preemptible() (1)
+#define pagefault_disabled() (0)
static inline void *kmap(struct page *page)
{
@@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
#define kmemleak_free(a)
#define PageSlab(p) (0)
+#define flush_dcache_page(p)
#define MAX_ERRNO 4095
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index af798b9d232c..a3c1e67441f9 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -261,7 +261,7 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
}
ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
- "Set FPSIMD registers via %s\n", type->name);
+ "Got FPSIMD registers via %s\n", type->name);
if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
goto out;
@@ -557,7 +557,14 @@ static int do_parent(pid_t child)
}
/* prctl() flags */
- ptrace_set_get_inherit(child, &vec_types[i]);
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_get_inherit(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n",
+ vec_types[i].name);
+ }
/* Step through every possible VQ */
for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 32fc5b3b5cf6..911345c526e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -10,6 +10,7 @@
#include "test_d_path.skel.h"
#include "test_d_path_check_rdonly_mem.skel.h"
+#include "test_d_path_check_types.skel.h"
static int duration;
@@ -167,6 +168,16 @@ static void test_d_path_check_rdonly_mem(void)
test_d_path_check_rdonly_mem__destroy(skel);
}
+static void test_d_path_check_types(void)
+{
+ struct test_d_path_check_types *skel;
+
+ skel = test_d_path_check_types__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+
+ test_d_path_check_types__destroy(skel);
+}
+
void test_d_path(void)
{
if (test__start_subtest("basic"))
@@ -174,4 +185,7 @@ void test_d_path(void)
if (test__start_subtest("check_rdonly_mem"))
test_d_path_check_rdonly_mem();
+
+ if (test__start_subtest("check_alloc_mem"))
+ test_d_path_check_types();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 983ab0b47d30..b2b357f8c74c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -8,46 +8,47 @@
void serial_test_xdp_link(void)
{
- __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
struct bpf_link_info link_info;
struct bpf_prog_info prog_info;
struct bpf_link *link;
+ int err;
__u32 link_info_len = sizeof(link_info);
__u32 prog_info_len = sizeof(prog_info);
skel1 = test_xdp_link__open_and_load();
- if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel1, "skel_load"))
goto cleanup;
prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
skel2 = test_xdp_link__open_and_load();
- if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel2, "skel_load"))
goto cleanup;
prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
memset(&prog_info, 0, sizeof(prog_info));
err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ if (!ASSERT_OK(err, "fd_info1"))
goto cleanup;
id1 = prog_info.id;
memset(&prog_info, 0, sizeof(prog_info));
err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ if (!ASSERT_OK(err, "fd_info2"))
goto cleanup;
id2 = prog_info.id;
/* set initial prog attachment */
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "fd_attach"))
goto cleanup;
/* validate prog ID */
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+ goto cleanup;
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
@@ -62,7 +63,7 @@ void serial_test_xdp_link(void)
/* detach BPF program */
opts.old_fd = prog_fd1;
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "prog_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "prog_detach"))
goto cleanup;
/* now BPF link should attach successfully */
@@ -73,24 +74,23 @@ void serial_test_xdp_link(void)
/* validate prog ID */
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
goto cleanup;
/* BPF prog attach is not allowed to replace BPF link */
opts.old_fd = prog_fd1;
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_attach_fail"))
goto cleanup;
/* Can't force-update when BPF link is active */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
- if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_update_fail"))
goto cleanup;
/* Can't force-detach when BPF link is active */
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
- if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_detach_fail"))
goto cleanup;
/* BPF link is not allowed to replace another BPF link */
@@ -110,40 +110,39 @@ void serial_test_xdp_link(void)
skel2->links.xdp_handler = link;
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id2, "id2_check",
- "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
goto cleanup;
/* updating program under active BPF link works as expected */
err = bpf_link__update_program(link, skel1->progs.xdp_handler);
- if (CHECK(err, "link_upd", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_upd"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_info"))
goto cleanup;
- CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
- "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
- CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+ ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+ ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex");
+
+ /* updating program under active BPF link with different type fails */
+ err = bpf_link__update_program(link, skel1->progs.tc_handler);
+ if (!ASSERT_ERR(err, "link_upd_invalid"))
+ goto cleanup;
err = bpf_link__detach(link);
- if (CHECK(err, "link_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "link_detach"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
- goto cleanup;
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
+
+ ASSERT_OK(err, "link_info");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
/* ifindex should be zeroed out */
- CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+ ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex");
cleanup:
test_xdp_link__destroy(skel1);
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
new file mode 100644
index 000000000000..7e02b7361307
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to 'regular' memory. It
+ * cannot be submitted to ring buffer.
+ */
+ bpf_ringbuf_submit(active, 0);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index a8233e7f173b..728dbd39eff0 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
@@ -41,11 +41,11 @@ struct {
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index ce6974016f53..43bd7a20cc50 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index ee7d6ac0f615..64ff32eaae92 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp)
{
return 0;
}
+
+SEC("tc")
+int tc_handler(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
new file mode 100644
index 000000000000..b64d33e4833c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -0,0 +1,95 @@
+{
+ "ringbuf: invalid reservation offset 1",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* spill R6(mem) into the stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ /* fill it back in R7 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+ /* should be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* add invalid offset to reserved ringbuf memory */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "dereference of modified alloc_mem ptr R1",
+},
+{
+ "ringbuf: invalid reservation offset 2",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* spill R6(mem) into the stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ /* fill it back in R7 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+ /* add invalid offset to reserved ringbuf memory */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe),
+ /* should be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "R7 min value is outside of the allowed memory range",
+},
+{
+ "ringbuf: check passing rb mem to helpers",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ /* pass allocated ring buffer memory to fib lookup */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup),
+ /* submit the ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 2 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 1a8eb9672bd1..8cfc5349d2a8 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -84,7 +84,7 @@
},
.fixup_map_ringbuf = { 1 },
.result = REJECT,
- .errstr = "R0 pointer arithmetic on mem_or_null prohibited",
+ .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited",
},
{
"check corrupted spill/fill",
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 8c129961accf..dce7de7755e6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -8,11 +8,12 @@
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
+/x86_64/amx_test
+/x86_64/cpuid_test
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
/x86_64/emulator_error_test
-/x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_clock_test
/x86_64/kvm_pv_test
@@ -22,6 +23,7 @@
/x86_64/mmio_warning_test
/x86_64/mmu_role_test
/x86_64/platform_info_test
+/x86_64/pmu_event_filter_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
/x86_64/sev_migrate_tests
@@ -36,6 +38,7 @@
/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
+/x86_64/vmx_exception_with_invalid_guest_state
/x86_64/vmx_invalid_nested_guest_state
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ee8cf2149824..0e4926bc9a58 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -43,11 +43,11 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handler
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
-TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
-TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
@@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
@@ -69,6 +70,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
@@ -83,6 +85,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 66775de26952..4ed6aa049a91 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -345,7 +345,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
* guest_code - The vCPU's entry point
*/
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-void vm_xsave_req_perm(void);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e94ba0fc67d8..8a470da7b71a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -364,6 +364,24 @@ static inline unsigned long get_xmm(int n)
}
bool is_intel_cpu(void);
+bool is_amd_cpu(void);
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+ unsigned int x86;
+
+ x86 = (eax >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+ return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
@@ -375,6 +393,8 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index);
struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid);
void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_cpuid2 *cpuid);
@@ -419,6 +439,11 @@ void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
uint64_t pte);
/*
+ * get_cpuid() - find matching CPUID entry and return pointer to it.
+ */
+struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index);
+/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
* if a match was found and successfully overwritten.
@@ -433,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_xsave_req_perm(int bit);
enum x86_page_size {
X86_PAGE_SIZE_4K = 0,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 4a645dc77f34..d8cf851ab119 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -393,11 +393,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
struct kvm_vm *vm;
int i;
- /*
- * Permission needs to be requested before KVM_SET_CPUID2.
- */
- vm_xsave_req_perm();
-
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
@@ -497,9 +492,11 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
uint64_t first_page, uint32_t num_pages)
{
- struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages };
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log, .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
int ret;
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index babb0f28575c..9f000dfb5594 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -665,16 +665,31 @@ static bool is_xfd_supported(void)
return !!(eax & CPUID_XFD_BIT);
}
-void vm_xsave_req_perm(void)
+void vm_xsave_req_perm(int bit)
{
- unsigned long bitmask;
+ int kvm_fd;
+ u64 bitmask;
long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ exit(KSFT_SKIP);
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+ if (!(bitmask & (1ULL << bit)))
+ exit(KSFT_SKIP);
if (!is_xfd_supported())
- return;
+ exit(KSFT_SKIP);
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
- rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
- XSTATE_XTILE_DATA_BIT);
/*
* The older kernel version(<5.15) can't support
* ARCH_REQ_XCOMP_GUEST_PERM and directly return.
@@ -684,7 +699,7 @@ void vm_xsave_req_perm(void)
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
- TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK,
+ TEST_ASSERT(bitmask & (1ULL << bit),
"prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
bitmask);
}
@@ -886,6 +901,17 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
return entry;
}
+
+int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+}
+
/*
* VM VCPU CPUID Set
*
@@ -903,12 +929,9 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
void vcpu_set_cpuid(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ rc = __vcpu_set_cpuid(vm, vcpuid, cpuid);
TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
rc, errno);
@@ -1136,25 +1159,25 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+ r);
r = vcpu_save_xsave_state(vm, vcpu, state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+ r);
if (kvm_check_cap(KVM_CAP_XCRS)) {
r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
@@ -1163,17 +1186,17 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
}
r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+ r);
if (nested_size) {
state->nested.size = sizeof(state->nested_);
r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
+ r);
TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
} else
state->nested.size = 0;
@@ -1181,12 +1204,12 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
for (i = 0; i < nmsrs; i++)
state->msrs.entries[i].index = list->indices[i];
r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
+ TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
+ r, r == nmsrs ? -1 : list->indices[r]);
r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+ r);
free(list);
return state;
@@ -1199,7 +1222,7 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
+ r);
r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
TEST_ASSERT(r == state->msrs.nmsrs,
@@ -1214,28 +1237,28 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
+ r);
r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+ r);
if (state->nested.size) {
r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
+ r);
}
}
@@ -1245,10 +1268,10 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state)
free(state);
}
-bool is_intel_cpu(void)
+static bool cpu_vendor_string_is(const char *vendor)
{
+ const uint32_t *chunk = (const uint32_t *)vendor;
int eax, ebx, ecx, edx;
- const uint32_t *chunk;
const int leaf = 0;
__asm__ __volatile__(
@@ -1257,10 +1280,22 @@ bool is_intel_cpu(void)
"=c"(ecx), "=d"(edx)
: /* input */ "0"(leaf), "2"(0));
- chunk = (const uint32_t *)("GenuineIntel");
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}
+bool is_intel_cpu(void)
+{
+ return cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+bool is_amd_cpu(void)
+{
+ return cpu_vendor_string_is("AuthenticAMD");
+}
+
uint32_t kvm_get_cpuid_max_basic(void)
{
return kvm_get_supported_cpuid_entry(0)->eax;
@@ -1384,6 +1419,23 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
}
}
+struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+ if (cur->function == function && cur->index == index)
+ return cur;
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
bool set_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 *ent)
{
@@ -1479,22 +1531,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
return cpuid;
}
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
-
-static inline unsigned x86_family(unsigned int eax)
-{
- unsigned int x86;
-
- x86 = (eax >> 8) & 0xf;
-
- if (x86 == 0xf)
- x86 += (eax >> 20) & 0xff;
-
- return x86;
-}
-
unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
@@ -1504,11 +1540,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
/* Avoid reserved HyperTransport region on AMD processors. */
- eax = ecx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
- ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
- edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+ if (!is_amd_cpu())
return max_gfn;
/* On parts with <40 physical address bits, the area is fully hidden */
@@ -1518,6 +1550,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
/* Before family 17h, the HyperTransport area is just below 1T. */
ht_gfn = (1 << 28) - num_ht_pages;
eax = 1;
+ ecx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
if (x86_family(eax) < 0x17)
goto done;
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 523c1e99ed64..52a3ef6629e8 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -329,6 +329,8 @@ int main(int argc, char *argv[])
u32 amx_offset;
int stage, ret;
+ vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT);
+
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index a711f83749ea..16d2465c5634 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -154,6 +154,34 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
return guest_cpuids;
}
+static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = get_cpuid(cpuid, 0x7, 0);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = get_cpuid(cpuid, 0x80000008, 0);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
int main(void)
{
struct kvm_cpuid2 *supp_cpuid, *cpuid2;
@@ -175,5 +203,7 @@ int main(void)
for (stage = 0; stage < 3; stage++)
run_vcpu(vm, VCPU_ID, stage);
+ set_cpuid_after_run(vm, cpuid2);
+
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
new file mode 100644
index 000000000000..c715adcbd487
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * In lieu of copying perf_event.h into tools...
+ */
+#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union cpuid10_ebx {
+ struct {
+ unsigned int no_unhalted_core_cycles:1;
+ unsigned int no_instructions_retired:1;
+ unsigned int no_unhalted_reference_cycles:1;
+ unsigned int no_llc_reference:1;
+ unsigned int no_llc_misses:1;
+ unsigned int no_branch_instruction_retired:1;
+ unsigned int no_branch_misses_retired:1;
+ } split;
+ unsigned int full;
+};
+
+/* End of stuff taken from perf_event.h. */
+
+/* Oddly, this isn't in perf_event.h. */
+#define ARCH_PERFMON_BRANCHES_RETIRED 5
+
+#define VCPU_ID 0
+#define NUM_BRANCHES 42
+
+/*
+ * This is how the event selector and unit mask are stored in an AMD
+ * core performance event-select register. Intel's format is similar,
+ * but the event selector is only 8 bits.
+ */
+#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
+ (umask & 0xff) << 8)
+
+/*
+ * "Branch instructions retired", from the Intel SDM, volume 3,
+ * "Pre-defined Architectural Performance Events."
+ */
+
+#define INTEL_BR_RETIRED EVENT(0xc4, 0)
+
+/*
+ * "Retired branch instructions", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+
+#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
+
+/*
+ * This event list comprises Intel's eight architectural events plus
+ * AMD's "retired branch instructions" for Zen[123] (and possibly
+ * other AMD CPUs).
+ */
+static const uint64_t event_list[] = {
+ EVENT(0x3c, 0),
+ EVENT(0xc0, 0),
+ EVENT(0x3c, 1),
+ EVENT(0x2e, 0x4f),
+ EVENT(0x2e, 0x41),
+ EVENT(0xc4, 0),
+ EVENT(0xc5, 0),
+ EVENT(0xa4, 1),
+ AMD_ZEN_BR_RETIRED,
+};
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(0);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(1);
+
+ for (;;) {
+ uint64_t br0, br1;
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ br0 = rdmsr(MSR_IA32_PMC0);
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ br1 = rdmsr(MSR_IA32_PMC0);
+ GUEST_SYNC(br1 - br0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(1);
+
+ for (;;) {
+ uint64_t br0, br1;
+
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
+ br0 = rdmsr(MSR_K7_PERFCTR0);
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ br1 = rdmsr(MSR_K7_PERFCTR0);
+ GUEST_SYNC(br1 - br0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vm_to_sync(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ get_ucall(vm, VCPU_ID, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vm *vm)
+{
+ bool success;
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+ success = run_vm_to_sync(vm);
+ vm_install_exception_handler(vm, GP_VECTOR, NULL);
+
+ return success;
+}
+
+static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+{
+ struct kvm_pmu_event_filter *f;
+ int size = sizeof(*f) + nevents * sizeof(f->events[0]);
+
+ f = malloc(size);
+ TEST_ASSERT(f, "Out of memory");
+ memset(f, 0, size);
+ f->nevents = nevents;
+ return f;
+}
+
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+ struct kvm_pmu_event_filter *f;
+ int i;
+
+ f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+ f->action = action;
+ for (i = 0; i < ARRAY_SIZE(event_list); i++)
+ f->events[i] = event_list[i];
+
+ return f;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
+ uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+ return f;
+}
+
+static void test_without_filter(struct kvm_vm *vm)
+{
+ uint64_t count = run_vm_to_sync(vm);
+
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static uint64_t test_with_filter(struct kvm_vm *vm,
+ struct kvm_pmu_event_filter *f)
+{
+ vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
+ return run_vm_to_sync(vm);
+}
+
+static void test_member_deny_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ uint64_t count = test_with_filter(vm, f);
+
+ free(f);
+ if (count)
+ pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+ __func__, count);
+ TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+static void test_member_allow_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ uint64_t count = test_with_filter(vm, f);
+
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_deny_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ uint64_t count;
+
+ remove_event(f, INTEL_BR_RETIRED);
+ remove_event(f, AMD_ZEN_BR_RETIRED);
+ count = test_with_filter(vm, f);
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_allow_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ uint64_t count;
+
+ remove_event(f, INTEL_BR_RETIRED);
+ remove_event(f, AMD_ZEN_BR_RETIRED);
+ count = test_with_filter(vm, f);
+ free(f);
+ if (count)
+ pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+ __func__, count);
+ TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+/*
+ * Check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, an EBX bit vector of length greater
+ * than 5, and EBX[5] clear.
+ */
+static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
+{
+ union cpuid10_eax eax = { .full = entry->eax };
+ union cpuid10_ebx ebx = { .full = entry->ebx };
+
+ return eax.split.version_id && eax.split.num_counters > 0 &&
+ eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
+ !ebx.split.no_branch_instruction_retired;
+}
+
+/*
+ * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
+ * clear on AMD hardware.
+ */
+static bool use_intel_pmu(void)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_get_supported_cpuid_index(0xa, 0);
+ return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
+}
+
+static bool is_zen1(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
+}
+
+static bool is_zen2(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 &&
+ x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
+}
+
+static bool is_zen3(uint32_t eax)
+{
+ return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
+}
+
+/*
+ * Determining AMD support for a PMU event requires consulting the AMD
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
+ */
+static bool use_amd_pmu(void)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_get_supported_cpuid_index(1, 0);
+ return is_amd_cpu() && entry &&
+ (is_zen1(entry->eax) ||
+ is_zen2(entry->eax) ||
+ is_zen3(entry->eax));
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void) = NULL;
+ struct kvm_vm *vm;
+ int r;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
+ if (!r) {
+ print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
+ exit(KSFT_SKIP);
+ }
+
+ if (use_intel_pmu())
+ guest_code = intel_guest_code;
+ else if (use_amd_pmu())
+ guest_code = amd_guest_code;
+
+ if (!guest_code) {
+ print_skip("Don't know how to test this guest PMU");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (!sanity_check_pmu(vm)) {
+ print_skip("Guest PMU is not functional");
+ exit(KSFT_SKIP);
+ }
+
+ test_without_filter(vm);
+ test_member_deny_list(vm);
+ test_member_allow_list(vm);
+ test_not_member_deny_list(vm);
+ test_not_member_allow_list(vm);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 2da8eb8e2d96..a626d40fdb48 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -105,7 +105,6 @@ static void guest_code(void *arg)
if (cpu_has_svm()) {
run_guest(svm->vmcb, svm->vmcb_gpa);
- svm->vmcb->save.rip += 3;
run_guest(svm->vmcb, svm->vmcb_gpa);
} else {
vmlaunch();
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index 5a6a662f2e59..a426078b16a3 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -77,8 +77,8 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
switch (get_ucall(vm, vcpuid, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
+ uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
return;
case UCALL_DONE:
return;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 2835a17f1b7a..edac8839e717 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -30,8 +30,8 @@ static struct kvm_vm *vm;
static void l2_guest_code(void)
{
/* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
}
static void l1_guest_code(struct vmx_pages *vmx_pages)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..27a850f3d7ce
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID 0
+
+static struct kvm_vm *vm;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(void)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_run(vm, VCPU_ID);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d\n",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(void)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state();
+ __run_vcpu_with_invalid_state();
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+}
+
+static void set_invalid_guest_state(void)
+{
+ set_or_clear_invalid_guest_state(true);
+}
+
+static void clear_invalid_guest_state(void)
+{
+ set_or_clear_invalid_guest_state(false);
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state();
+ run_vcpu_with_invalid_state();
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) {
+ print_skip("Must be run with kvm_intel.unrestricted_guest=0");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, (void *)guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state();
+ run_vcpu_with_invalid_state();
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state();
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 478e0ae8b93e..865e17146815 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -46,20 +46,20 @@ static struct kvm_vm *vm;
#define MIN_STEAL_TIME 50000
struct pvclock_vcpu_time_info {
- u32 version;
- u32 pad0;
- u64 tsc_timestamp;
- u64 system_time;
- u32 tsc_to_system_mul;
- s8 tsc_shift;
- u8 flags;
- u8 pad[2];
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
} __attribute__((__packed__)); /* 32 bytes */
struct pvclock_wall_clock {
- u32 version;
- u32 sec;
- u32 nsec;
+ u32 version;
+ u32 sec;
+ u32 nsec;
} __attribute__((__packed__));
struct vcpu_runstate_info {
@@ -74,11 +74,11 @@ struct arch_vcpu_info {
};
struct vcpu_info {
- uint8_t evtchn_upcall_pending;
- uint8_t evtchn_upcall_mask;
- unsigned long evtchn_pending_sel;
- struct arch_vcpu_info arch;
- struct pvclock_vcpu_time_info time;
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
}; /* 64 bytes (x86) */
struct shared_info {
@@ -493,7 +493,7 @@ int main(int argc, char *argv[])
vm_ts.tv_sec = wc->sec;
vm_ts.tv_nsec = wc->nsec;
- TEST_ASSERT(wc->version && !(wc->version & 1),
+ TEST_ASSERT(wc->version && !(wc->version & 1),
"Bad wallclock version %x", wc->version);
TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 412d85205546..3f4c8cfe7aca 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -4059,6 +4059,9 @@ usage: ${0##*/} OPTS
-p Pause on fail
-P Pause after each test
-v Be verbose
+
+Tests:
+ $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER
EOF
}
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
index 8f6997d35816..d9d1d4190126 100644
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -240,11 +240,8 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
*p += sizeof(__u32);
}
- if (ioam6h->type.bit6) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
- return 1;
+ if (ioam6h->type.bit6)
*p += sizeof(__u32);
- }
if (ioam6h->type.bit7) {
if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 27d0eb9afdca..b8bdbec0cf69 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -75,6 +75,7 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+ ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
done
}
@@ -1476,7 +1477,7 @@ ipv6_tests()
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "single subflow IPv6" 1 1 1
@@ -1511,7 +1512,7 @@ ipv6_tests()
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
chk_join_nr "remove subflow and signal IPv6" 2 2 2
chk_add_nr 1 1
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
index 694d70710ff0..dfc27cdc6c05 100644
--- a/tools/testing/selftests/net/settings
+++ b/tools/testing/selftests/net/settings
@@ -1 +1 @@
-timeout=300
+timeout=1500
diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile
index 87e0ec48e2e7..95e485f12d97 100644
--- a/tools/tracing/Makefile
+++ b/tools/tracing/Makefile
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
include ../scripts/Makefile.include
-all: latency
+all: latency rtla
-clean: latency_clean
+clean: latency_clean rtla_clean
-install: latency_install
+install: latency_install rtla_install
latency:
$(call descend,latency)
@@ -16,4 +16,14 @@ latency_install:
latency_clean:
$(call descend,latency,clean)
-.PHONY: all install clean latency latency_install latency_clean
+rtla:
+ $(call descend,rtla)
+
+rtla_install:
+ $(call descend,rtla,install)
+
+rtla_clean:
+ $(call descend,rtla,clean)
+
+.PHONY: all install clean latency latency_install latency_clean \
+ rtla rtla_install rtla_clean
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index 2d52ff0bff7d..7c39728d08de 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -59,7 +59,7 @@ endif
.PHONY: all
all: rtla
-rtla: $(OBJ) doc
+rtla: $(OBJ)
$(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS)
static: $(OBJ)
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 94403806ea56..83822c33e9e7 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -28,13 +28,13 @@ no-header-test += linux/am437x-vpfe.h
no-header-test += linux/android/binder.h
no-header-test += linux/android/binderfs.h
no-header-test += linux/coda.h
+no-header-test += linux/cyclades.h
no-header-test += linux/errqueue.h
no-header-test += linux/fsmap.h
no-header-test += linux/hdlc/ioctl.h
no-header-test += linux/ivtv.h
no-header-test += linux/kexec.h
no-header-test += linux/matroxfb.h
-no-header-test += linux/nfc.h
no-header-test += linux/omap3isp.h
no-header-test += linux/omapfb.h
no-header-test += linux/patchkey.h
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2ad013b8bde9..59b1dd4a549e 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -463,8 +463,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
idx = srcu_read_lock(&kvm->irq_srcu);
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
+ hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
+ link, srcu_read_lock_held(&kvm->irq_srcu))
if (kian->gsi == gsi) {
srcu_read_unlock(&kvm->irq_srcu, idx);
return true;
@@ -480,8 +480,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
{
struct kvm_irq_ack_notifier *kian;
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
+ hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
+ link, srcu_read_lock_held(&kvm->irq_srcu))
if (kian->gsi == gsi)
kian->irq_acked(kian);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 504158f0e131..58d31da8a2f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -427,9 +427,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
#endif
kvm_async_pf_vcpu_init(vcpu);
- vcpu->pre_pcpu = -1;
- INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
-
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
@@ -2251,7 +2248,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
return NULL;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
@@ -2466,9 +2462,8 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn)
}
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
- unsigned long addr, bool *async,
- bool write_fault, bool *writable,
- kvm_pfn_t *p_pfn)
+ unsigned long addr, bool write_fault,
+ bool *writable, kvm_pfn_t *p_pfn)
{
kvm_pfn_t pfn;
pte_t *ptep;
@@ -2578,7 +2573,7 @@ retry:
if (vma == NULL)
pfn = KVM_PFN_ERR_FAULT;
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
- r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
+ r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
if (r == -EAGAIN)
goto retry;
if (r < 0)
@@ -3163,8 +3158,10 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+#ifdef CONFIG_HAVE_KVM_DIRTY_RING
if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
return;
+#endif
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
unsigned long rel_gfn = gfn - memslot->base_gfn;